在dataframe python中复制行

2024-09-24 02:25:49 发布

您现在位置:Python中文网/ 问答频道 /正文

大家下午好

我目前正在写一篇关于python中KMV模型的论文。我从代码here中得到灵感来解非线性方程组。Here是指向用于创建数据帧的CSV文件的链接。这是我目前掌握的代码:

所需模块的进口

from datetime import datetime
import pandas as pd
import numpy as np
import scipy.optimize as sco
from scipy.stats import norm


df = pd.DataFrame()
df = pd.read_csv("AREX.csv", sep=';', engine = "python", decimal=',')

函数来准备要运行的模型的文件

def clean():   
#    df.rename(columns ={"Date": "Date"}, inplace = True)
#    df["Date"] = pd.to_datetime(df['Date'])
    df.set_index("Date", inplace = True)
    df['AREX.O']=df['AREX.O'].astype(float)
    df.drop(['Total Short Term debt'], axis =1, inplace = True)
    return df

def preparation():
    df['e']=df['AREX.O']*df['Share Outstanding']
    df['Short Term Debt']=df['Debt']-df['Total Long term Debt']
    df['f']=df['Short Term Debt']+df['Total Long term Debt']*0.5
    df['log_ret'] = np.log(df['AREX.O']) - np.log(df['AREX.O'].shift(1))
#    df['stdev']=df['log_ret'].rolling(252).std()*m.sqrt(252)
    return df

用于求解a和sigma的算法

我只是想让代码适应我的数据帧

def algo1():

#    formatting the vaules as required
    df["f"] = df["f"].astype(float)
    df["e"] = df["e"].astype(float)


#    #computating of key input variable for the model

    df['a'] = df['f'].add(df["e"])


    #defining a function for the black Scholes equation

    def bseqn(a, debug=False):
        d1 = (np.log(a/f) + (r + 0.5*sigma_a**2)*T)/(sigma_a*np.sqrt(T))
        d2 = d1 - sigma_a*np.sqrt(T)
        y1 = e - (a*norm.cdf(d1) - np.exp(-r*T)*f*norm.cdf(d2))

        if debug:
            print("d1 = {:.6f}".format(d1))
            print("d2 = {:.6f}".format(d2))
            print("Error = {:.6f}".format('y1'))

        return y1

    #Solving the model

    time_horizon=[1]
    timesteps = range(1, len(df))

    results = np.empty((df.shape[0],len(time_horizon)))

    #looping to solve for each row

    for i, years in enumerate(time_horizon):
        T = 1
        results[:,i] = df.loc[:,'a'] 
        for i_t, t in enumerate(timesteps):  
            a = results[t-10:t,i]
            ra =np.log(a/np.roll(a,1))
            sigma_a = np.nanstd(ra) #gives initial value of sigma_a

            if i_t == 0:
                subset_timesteps = range(t-1, t+1)
                print(subset_timesteps)
            else:
                subset_timesteps = [t]

            n_its = 0
            while n_its < 10:
                n_its += 1
                for t_sub in subset_timesteps:
                    r = df.iloc[t_sub]['r'] 
                    f = df.iloc[t_sub]['f']
                    e = df.iloc[t_sub]['e']
                    sol = sco.fsolve(bseqn, results[t_sub,i]) #if I replace newton with fsolve the code works properly
                    results[t_sub,i] = sol # stores the new values of a 

                # Update sigma_a based on new values of a
                last_sigma_a = sigma_a
                a = results[t-10:t,i]
                ra = np.log(a/np.roll(a,1))
                sigma_a = np.nanstd(ra) #new val of sigma
                diff = last_sigma_a - sigma_a
                if abs(diff) < 1e-3:
                    df.loc[t_sub,'sigma_a'] = sigma_a
                    break 
            else:

                pass
    return df

运行函数

def run():   
    clean()
    preparation()
    algo1()
    print(df)
    print(list(df))
#    main_df = df.to_csv("AREX_D.csv")

输出应该在所创建的sigma\u a列上写入sigma\u a的结果,但不是这样,而是添加了一行,因此我最终得到的不是1500行,而是3000行,其中大部分是Nan值。我不明白代码是怎么要求的。。。你知道吗

我怀疑它来自这些台词:

   diff = last_sigma_a - sigma_a
                    if abs(diff) < 1e-3:
                        df.loc[t_sub,'sigma_a'] = sigma_a
                        break 

有人知道发生了什么吗?你知道吗

以下是输出的图片: The results of the sigma seemed to be moved and start from the last row instead of the samed rows as the other variables

非常感谢!你知道吗


Tags: oftheimportlogdffordatedef