我创建了一个类(存储在dataanalysis.py)来存储我将用于数据分析的大多数方法。以下是存储在此文件中的代码:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
class Trening:
def __init__(self, Path):
self.path = Path
self.df = None
def getDF(self):
return self.df
def loadData(self, fileName= "pilot1.csv"):
filePath = str(self.path + fileName)
df = pd.read_csv(filePath, skiprows=2, decimal=".")
#return df
self.df = df
def dnfCountandReplace(self, Return=False):
filt = self.df['FINISH'] == 'DNF'
dnf = self.df[filt]
dnf = dnf.replace('DNF', 1)
dnf.to_csv('faenta.csv')
self.df.replace('DNF', np.NaN, inplace=True)
self.df.dropna(subset=['FINISH'], inplace=True)
if Return:
return dnf
def changeDataType(self, Return=False):
self.df["FINISH"] = self.df["FINISH"].str.replace(',', '.').astype(float)
self.df["INTER 1"] = self.df["INTER 1"].str.replace(',', '.').astype(float)
self.df["SECTION IM4-FINISH"] = self.df["SECTION IM4-FINISH"].str.replace(',', '.').astype(float)
self.df["COMMENT"] = self.df['COMMENT'].astype(str)
self.df["COMMENT"] = self.df['COMMENT'].str.replace('1', 'COURSE 1')
self.df["COMMENT"] = self.df['COMMENT'].str.replace('2', 'COURSE 2')
self.df["COMMENT"] = self.df['COMMENT'].str.replace('9', 'STRAIGHT-GLIDING')
pd.to_numeric(self.df['FINISH'], downcast='float', errors='raise')
pd.to_numeric(self.df['INTER 1'], downcast='float', errors='raise')
pd.to_numeric(self.df['SECTION IM4-FINISH'], downcast='float', errors='raise')
if Return:
return self.df
def renameCommentToCourse(self, Return=False):
self.df.rename(columns={'COMMENT': 'COURSE'}, inplace=True)
if Return:
return self.df
def groupData(self, Return=False):
self.df.groupby(['BIB#', 'COURSE'])['FINISH']
if Return:
return self.df
def findTwoFastestRunsbyGroup(self, Return=False):
self.df['FINISH'].nsmallest(2)
if Return:
return self.df
def parseToCSV(self):
self.df.to_csv('cool.csv')
def calculateSpeed(self, Return=False):
# (x2 - x1) / (t2 - t1)
x2 = 2
x1 = 0
t1 = 0
for i in self.df['INTER 1']:
self.df['ENTRANCESPEED'] = (x2 - x1) / (self.df['INTER 1'] - t1)
if Return:
return self.df
在我读博士期间,我可能会运行这些类方法>;300次,但有时以不同的顺序和组合,这取决于我试图获得的结果。所以我的问题是:有没有一种方法可以将类/链方法分组在一起,以便在需要时调用相关的方法?我知道Pandas有一个有趣的pipe()函数,但我不知道如何使用它。在main.py文件中,我有以下代码:
from models import Trening
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
pilot1 = Trening("/Users/cmagelssen/Desktop/DataAnalyse/data/pilot1/trening1/")
pilot1.loadData("PILOT1_SESSION2.csv")
pilot1.dnfCountandReplace(Return=True)
X = pilot1.changeDataType(Return=True)
pilot1.renameCommentToCourse(Return=True)
pilot1.groupData(Return=True)
pilot1.findTwoFastestRunsbyGroup(Return=True)
df = pilot1.calculateSpeed(Return=True)
plot = pilot1.getDF()
有人知道我如何使用管道()将这些方法链接/组合在一起吗
最好的, 基督教徒
您可以尝试下面的代码。具有您喜欢的任何功能组合。确保如果最后使用的是GetDF,那么管道中调用的所有函数也会编辑self.df
相关问题 更多 >
编程相关推荐