根据直方图计算PDF

(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.208125,0.0558879,0.608348,0.212553,0.0865896, 0.728542,0,0.609512,0,0,0,0,0,0,0,0.0801339,0.140657,0.0194118,0,0,0.0634682, 0.339545,0.875902,0.8325,0.0260526,0.0711905,0.169894,0.202969,0.0761538,0,0.342055, 0.42781,0,0,0.192115,0,0,0,0,0,0,0,0,0,0,0,1.6473,0,0.232329,0,2.21329,0.748,0.0424286, 0.455439,0.210282,5.56453,0.427959,0,0.352059,0,0,0.567059,0,0,0,0.384462,1.29476, 0.0103125,0,0.0126923,1.03356,0,0,0.289785,0,0)

import gc from sklearn.neighbors import KernelDensity def plot_prob_density(df_lunch, field, x_start, x_end): plt.figure(figsize = (10, 7)) unit = 0 x = np.linspace(df_lunch.min() - unit, df_lunch.max() + unit, 1000)[:, np.newaxis] # Plot the data using a normalized histogram plt.hist(df_lunch, bins=200, density=True, label='LTV', color='blue', alpha=0.2) # Do kernel density estimation kd_lunch = KernelDensity(kernel='gaussian', bandwidth=0.00187).fit(df_lunch) #0.00187 # Plot the estimated densty kd_vals_lunch = np.exp(kd_lunch.score_samples(x)) plt.plot(x, kd_vals_lunch, color='orange') plt.axvline(x=x_start,color='red',linestyle='dashed') plt.axvline(x=x_end,color='red',linestyle='dashed') # Show the plots plt.xlabel(field, fontsize=15) plt.ylabel('Probability Density', fontsize=15) plt.legend(fontsize=15) plt.show() gc.collect() return kd_lunch kd_lunch = plot_prob_density(final_df['LTV'].values.reshape(-1,1), 'LTV', x_start=0, x_end=0.01)

def get_probability(start_value, end_value, eval_points, kd): # Number of evaluation points N = eval_points step = (end_value - start_value) / (N - 1) # Step size x = np.linspace(start_value, end_value, N)[:, np.newaxis] # Generate values in the range kd_vals = np.exp(kd.score_samples(x)) # Get PDF values for each x probability = np.sum(kd_vals * step) # Approximate the integral of the PDF return probability.round(4) print('Probability of LTV 0-3 tips during LUNCH time: {}\n' .format(get_probability(start_value = 0, end_value = 0.01, eval_points = 100, kd = kd_lunch)))

1条回答

网友

1楼 · 发布于 2024-09-27 07:25:57

我在工作中使用了或多或少类似的脚本，这里是我的脚本，可能会对你有所帮助

import gc
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.neighbors import KernelDensity
from scipy import stats
data1 = beta_95[0]

def plot_prob_density(data1, x_start, x_end):
    plt.figure(figsize = (4, 3.5))

    unit = 1.5
    x = np.linspace(-20, 20, 1000)[:, np.newaxis]

    # Plot the data using a normalized histogram
    plt.hist(data1, bins=np.linspace(-20,20,40), density=True, color='r', alpha=0.4)
    #plt.show

    # Do kernel density estimation
    kd_data1 = KernelDensity(kernel='gaussian', bandwidth=1.8).fit(data1)

    # Plot the estimated densty
    kd_vals_data1 = np.exp(kd_data1.score_samples(x))

    plt.plot(x, kd_vals_data1, color='r', label='$N_a$', linewidth = 2)
    
    plt.axvline(x=9.95,color='green',linestyle='dashed', linewidth = 2.0, label='$β_o$')
    plt.axvline(x=1.9,color='black',linestyle='dashed', linewidth = 2.0, label='$β_b$')
    
    plt.axvline(x=x_end,color='red',linestyle='dashed', linewidth = 2, label='$β_{95\%}$')

    # Show the plots
    plt.xlabel('Beta', fontsize=10)
    plt.ylabel('Probability Density', fontsize=10)
    plt.title('02 hours window', fontsize=12)
    plt.xlim(-20, 20)
    plt.ylim(0, 0.3)
    plt.yticks([0, 0.1, 0.2, 0.3]) 
    plt.legend(fontsize=12, loc='upper left', frameon=False)
    plt.show()
    gc.collect()
    return kd_data1

def get_probability(start_value, end_value, eval_points, kd):
    
    # Number of evaluation points 
    N = eval_points                                      
    step = (end_value - start_value) / (N - 1)  # Step size

    x = np.linspace(start_value, end_value, N)[:, np.newaxis]  # Generate values in the range
    kd_vals = np.exp(kd.score_samples(x))  # Get PDF values for each x
    probability = np.sum(kd_vals * step)  # Approximate the integral of the PDF
    return probability.round(4)

data1 = np.array(data1).reshape(-1, 1)

kd_data1 = plot_prob_density(data1, x_start=3.0, x_end=13)

print('Beta-95%: {}\n'
      .format(get_probability(start_value = -10, 
                              end_value = 13, 
                              eval_points = 1000, 
                              kd = kd_data1)))

相关问题更多 >

编程相关推荐

热门问题

热门文章