在Python中实现广义生日悖论

import numpy as np import numpy.random as npr from scipy.special import comb, gammaln import matplotlib.pyplot as plt def p_unique_birthdays(m, k, n): """PMF for obtaining m unique elements when selecting from [0,k) n times. I wanted to use exact=True to see if that helped, hence why this is not vectorised. """ total = 0 for i in range(m): total += (-1)**i * comb(m, i, exact=True) * ((m-i)/k)**n return comb(k, m, exact=True) * total def p_unique_birthdays_logs(m, k, n): """PMF for obtaining m unique elements when selecting from [0,k) n times. I use logs to try and deal with some of the numerical craziness that seems to arise. """ total = 0 for i in range(m): log_mCi = gammaln(m+1) - gammaln(i+1) - gammaln(m-i+1) log_exp_bit = n * (np.log(m-i) - np.log(k)) total += (-1)**i * np.exp(log_mCi + log_exp_bit) return comb(k, m, exact=True) * total def do_stuff(k, n, pmf): n_samples = 50000 p_ms = np.zeros(n) for i in range(n): temp_p = pmf(i+1, k, n) p_ms[i] = temp_p print("Sum of probabilities:", p_ms.sum()) samples = np.zeros(n_samples) for i in range(n_samples): samples[i] = np.unique(npr.choice(k, n, replace=True)).size # So that the histogram is centered on the correct integers. d = np.diff(np.unique(samples)).min() left_of_first_bin = samples.min() - float(d)/2 right_of_last_bin = samples.max() + float(d)/2 fig = plt.figure(figsize=(8,5)) ax = fig.add_subplot(111) ax.grid() ax.bar(range(1, n+1), p_ms, color="C0", label=labels[j]) ax.hist(samples, np.arange(left_of_first_bin, right_of_last_bin + d, d), alpha=0.5, color="C1", density=True, label="Samples") ax.legend() ax.set_xlabel("Unique birthdays") ax.set_ylabel("Normalised frequency") ax.set_title(f"k = {k}, n = {n}") #fig.savefig(f"k{k}_n{n}_{labels[j]}.png") plt.show() random_seed = 1234 npr.seed(random_seed) labels = ["PMF", "PMF (logs)"] pmfs = [p_unique_birthdays, p_unique_birthdays_logs] for j in range(2): for k, n in [(30, 20), (60, 40)]: do_stuff(k, n, pmfs[j])

2条回答

网友

1楼 · 编辑于 2024-06-01 14:03:46

你是对的，这是一些奇怪的数字原因。在

更改此行：

total += (-1)**i * comb(m, i, exact=True) * ((m-i)/k)**n

为此：

total += (-1)**i * comb(m, i, exact=True) * ((m-i)**n)/(k**n)

出于某种原因，如果你强制执行不同的操作顺序，事情就会很顺利地解决。在

您可能需要花更多的时间来弄清楚如何修改您的“日志”版本，但是考虑到上面的更改修复了一些问题，您可能只想完全放弃“日志”版本。在

希望有帮助！在

网友

2楼 · 编辑于 2024-06-01 14:03:46

您可以使用内置的decimal模块来提高精度。在

from decimal import *

getcontext().prec = 10000

def factorial(n):
    res = Decimal(1)
    for i in range(int(n)):
        res = res * Decimal(i + 1)
    return res

def binomial_coefficient(n, k):
    return factorial(n) / factorial(k) / factorial(n - k)

def p_unique_birthdays(m, k, n):
    m = Decimal(m)
    k = Decimal(k)
    n = Decimal(n)
    total = Decimal(0)
    for i in range(int(m) + 1):
        total += Decimal((-1) ** i) * binomial_coefficient(m, i) * binomial_coefficient(k, m) * ((m - i) / k) ** n
    return total

print(p_unique_birthdays(49, 365, 50))

上面的代码打印0.11484925，与http://www.wolframalpha.com/input/?i=sum+combination(49,x)combination(365,49)++(((49-x)%2F365)%5E50)+*+(-1)%5Ex,+x%3D0+to+49相同

相关问题更多 >

编程相关推荐

热门问题

热门文章