提高scipy的Anderson Darling 2样本测试的性能

import numpy as np from scipy.stats import anderson_ksamp import time as t def main(): """ Test scipy's osiginal vs the simplified Anderson-Dalring tests. """ t1, t2 = 0., 0. AD_all = [] for _ in range(1000): N = np.random.randint(10, 200) aa = np.random.uniform(0., 1., N) bb = np.random.uniform(0., 1., N) s = t.time() AD = anderson_ksamp([aa, bb])[0] t1 += t.time() - s s = t.time() AD2 = anderson_ksamp_new([aa, bb]) t2 += t.time() - s # Check that both values are equal AD_all.append([AD, AD2]) AD_all = np.array(AD_all).T print((AD_all[0] == AD_all[1]).all()) print("Improvement: {:.1f}%".format(100. - 100. * t2 / t1)) def anderson_ksamp_new(samples): """ A2akN: equation 7 of Scholz and Stephens. samples : sequence of 1-D array_like Array of sample arrays. Z : array_like Sorted array of all observations. Zstar : array_like Sorted array of unique observations. k : int Number of samples. n : array_like Number of observations in each sample. N : int Total number of observations. Returns ------- A2aKN : float The A2aKN statistics of Scholz and Stephens 1987. """ k = 2 # This will always be 2 Z = np.sort(np.hstack(samples)) N = Z.size Zstar = np.unique(Z) n = np.array([sample.size for sample in samples]) A2kN = 0. Z_ssorted_left = Z.searchsorted(Zstar, 'left') if N == Zstar.size: lj = 1. else: lj = Z.searchsorted(Zstar, 'right') - Z_ssorted_left Bj = Z_ssorted_left + lj / 2. for i in np.arange(0, k): s = np.sort(samples[i]) s_ssorted_right = s.searchsorted(Zstar, side='right') Mij = s_ssorted_right.astype(float) fij = s_ssorted_right - s.searchsorted(Zstar, 'left') Mij -= fij / 2. inner = lj / float(N) * (N*Mij - Bj*n[i])**2 / (Bj*(N - Bj) - N*lj/4.) A2kN += inner.sum() / n[i] A2kN *= (N - 1.) / N H = (1. / n).sum() hs_cs = (1. / np.arange(N - 1, 1, -1)).cumsum() h = hs_cs[-1] + 1 g = (hs_cs / np.arange(2, N)).sum() a = (4*g - 6) * (k - 1) + (10 - 6*g)*H b = (2*g - 4)*k**2 + 8*h*k + (2*g - 14*h - 4)*H - 8*h + 4*g - 6 c = (6*h + 2*g - 2)*k**2 + (4*h - 4*g + 6)*k + (2*h - 6)*H + 4*h d = (2*h + 6)*k**2 - 4*h*k sigmasq = (a*N**3 + b*N**2 + c*N + d) / ((N - 1.) * (N - 2.) * (N - 3.)) m = k - 1 A2 = (A2kN - m) / np.sqrt(sigmasq) return A2

1条回答

网友

1楼 · 发布于 2024-09-29 21:49:02

通过使用np.ndarray.sort而不是np.sort进行就地排序，您可以获得轻微的改进：

Z = np.sort(np.hstack(samples))

变成：

Z = np.hstack(samples)
Z.sort()

及

s = np.sort(samples[i])

变成：

s = samples[i]
s.sort()

通过这些修改，与anderson_ksamp_new（您的版本）相比，我得到了12%的改进

此外，您可以使用np.concatenate而不是np.hstack。这与适当的排序相结合，使我的成绩提高了16%

相关问题更多 >

编程相关推荐

热门问题

热门文章