我知道这里有很多解决方案,但是我已经把它们都看完了,仍然不能解决我的问题。我试图打印每个图形的rSquared,但我得到了错误'局部变量“m”在赋值前引用。请帮忙!我知道这里的间隔太小了。我的跑步者拥有所有这些权利。谢谢!在
def readData(fileName):
hsGPA = [] #High School GPA
mathSAT = [] #Math SAT scores
crSAT = [] #Verbal SAT scores
collegeGPA = [] #College GPA
compGPA=[] #ComputerScience GPA
FullList=[]
inputFile = open(fileName, 'r', encoding = 'utf-8')
for line in inputFile:
FullList=line.split(',')
hsGPA.append(float(FullList[0]))
mathSAT.append(int(FullList[1]))
crSAT.append(int(FullList[2]))
collegeGPA.append(float(FullList[3]))
compGPA.append(float(FullList[4]))
return hsGPA, mathSAT, crSAT, collegeGPA, compGPA
def plotData(hsGPA, mathSAT, crSAT, collegeGPA,compGPA):
GPA1 = [] #High School GPA
Score1 = [] #Math SAT scores
Score2= [] #Verbal SAT scores
GPA2 = [] #College GPA
GPA3=[] #ComputerScience GPA
hsGPA, mathGPA, crSAT, collegeGPA,compGPA = readData('satFINAL.txt')
pyplot.figure(1)
pyplot.subplot(5,1,1)
for line in range(len(hsGPA)):
GPA1.append(line)
pyplot.plot(GPA1,hsGPA)
pyplot.subplot(5,1,2)
for line in range(len(mathSAT)):
Score1.append(line)
pyplot.plot(Score1,mathSAT)
pyplot.subplot(5,1,3)
for line in range(len(crSAT)):
Score2.append(line)
pyplot.plot(Score2,crSAT)
pyplot.subplot(5,1,4)
for line in range(len(collegeGPA)):
GPA2.append(line)
pyplot.plot(GPA2,collegeGPA)
pyplot.subplot(5,1,5)
for line in range(len(compGPA)):
GPA3.append(line)
pyplot.plot(GPA3,compGPA)
pyplot.show()
def LinearRegression(xList, yList):
'''
This function finds the constants in the y = mx+b, or linear regression
forumula
xList - a list of the x values
yList - a list of the y values
m - the slope f the line
b - where the line intercepts the y axis
'''
n = len(xList)
sumX = 0
sumXX = 0
sumXY = 0
sumY = 0
for index in range(n):
sumX += xList[index]
sumXY += xList[index] * yList[index]
sumXX += xList[index]**2
sumY += yList[index]
#the components needed to find m and b
m = (n*(sumXY - (sumX*sumY)))/(n*(sumXX - (sumX**2)))
b = (sumY - (m*sumX))/n
#actually implements formula
return m, b
def plotRegression(x,y, xLabel, yLabel):
pyplot.scatter(x,y)
m,b = LinearRegression(x,y)
minX = min(x)
maxX = max(x)
pyplot.plot([minX, maxX], [m * minX + b, m * maxX + b], color ='red')
pyplot.xlabel(xLabel)
pyplot.ylabel(yLabel)
pyplot.show()
def rSquared(x,y):
n = len(x)
R=0
sumS=0
sumT=0
sumY=0
for index in range(n):
a=(y[index]-((m*x[index])+b))**2
sumS = sumS+a
for index in range(len(y)):
sumY = sumY= y[index]
MeanY= sumY/(len(y))
e=(y[index]-MeanY)**2
sumT = sumT+e
m,b= LinearRegression(x, y)
RG=1-(sumS/sumT)
def main():
data = readData('satFINAL.txt')
print(data)
plotData(*data)
hsGPA, mathSAT, crSAT, collegeGPA,compGPA = data
# added ScoreT calculation here
ScoreT = [sum(x) for x in zip(mathSAT, crSAT)]
plotRegression(hsGPA,collegeGPA, 'highGPA', 'collegeGPA')
plotRegression(mathSAT,collegeGPA, 'mathSAT' , 'collegeGPA')
plotRegression(crSAT,collegeGPA, 'crSAT' , 'collegeGPA')
plotRegression(ScoreT,collegeGPA, 'Math and CR SAT' , 'collegeGPA')
plotRegression(mathSAT,crSAT, 'mathSAT', 'CR SAT')
plotRegression(mathSAT,compGPA, 'mathSAT', 'CompGPA')
plotRegression(hsGPA,compGPA, 'HsGPA', 'CompGPA')
plotRegression(ScoreT,compGPA, 'SATscore ', 'CompGPA')
print(rSquared(hsGPA,collegeGPA))
main()
很难说清楚-你的缩进是混乱的,你有大量的代码,而且你还没有给出错误跟踪(这实际上可以识别错误所在的行!)-但是看起来,在
rSquared
的定义中,您在给m
赋值之前调用a=(y[index]-((m*x[index])+b))**2
。在编辑:我浏览并重构了很多重复的代码到循环中;现在希望它更具可读性。我还交叉检查了
linear_regression
函数与scipy.stats.linregress
得到了相同的结果;我已经验证了r_squared
,所以您应该检查一下。在相关问题 更多 >
编程相关推荐