在Python中生成模拟数据，同时满足与预定义变量相关的一系列关联

import numpy as np import pandas as pd from numpy.random import multivariate_normal as mvn refVar = [75.25,77.93,78.2,61.77,80.88,71.95,79.88,65.53,85.03,61.72,60.96,56.36,23.16,73.36,64.18,83.07,63.25,49.3,78.2,30.96] mean_refVar = np.mean(refVar) for r in np.arange(0,1,0.05): var1 = 1 var2 = 1 cov = r cov_matrix = [[var1,cov], [cov,var2]] data = mvn([mean_refVar,mean_refVar],cov_matrix,size=len(refVar)) output = 'corr_'+str(r.round(2))+'.txt' df = pd.DataFrame(data,columns=['refVar','v'+str(r.round(2)]) df.to_csv(output,sep='\t',index=False) # Ideally, instead of creating an output for each correlation, I would like to generate a DF with refVar and all these newly created Series

1条回答

网友

1楼 · 发布于 2024-10-01 00:20:45

在this answer之后，我们可以生成如下序列：

def rand_with_corr(refVar, corr):
    # center and normalize refVar
    X = np.array(refVar) - np.mean(refVar)
    X = X/np.linalg.norm(X)

    # random sampling Y
    Y = np.random.rand(len(X))
    # centralize Y
    Y = Y - Y.mean()
    
    # find the orthorgonal component to X
    Y = Y - Y.dot(X) * X
    
    # normalize Y
    Y = Y/np.linalg.norm(Y)


    # output
    return Y + (1/np.tan(np.arccos(corr))) * X

# test
out = rand_with_corr(refVar, 0.05)

pd.Series(out).corr(pd.Series(refVar))
# out
# 0.050000000000000086

相关问题更多 >

编程相关推荐

热门问题

热门文章