Python散点图p

2024-09-30 16:19:32 发布

您现在位置:Python中文网/ 问答频道 /正文

从“如何像计算机科学家一样思考”课程中得到这个问题:

解释数据文件标签数据.txt使每一行包含一个x,y坐标对。编写一个名为plotRegression的函数,该函数从该文件中读取数据,并根据以下公式使用海龟绘制这些点和最佳拟合线:

y=y‘+m(x−x’

m=∑西仪−nx´y∑x2i−nx´2

http://interactivepython.org/runestone/static/thinkcspy/Files/Exercises.html?lastPosition=1308

我的代码似乎不起作用,我不知道为什么。看起来python将数据解释为str而不是float。在

def plotregression(t):
    labfile = open('labdata.txt','r')
    sumx = 0
    sumy = 0
    count = 0
    sumprod = 0
    sumsqrx =0
    sumsqrnx = 0
    for i in labfile:
        points = i.split()
        print (points)
        t.up()
        t.setpos(points[0],points[1])
        t.stamp()
        sumx = sumx + int(points[0])
        sumy = sumy + int(points[1])
        prod = points[0]*int(points[1])
        sumprod = sumprod + prod
        count += 1
        sqrx = int(points[0])**2
        sumsqrx = sumsqrx + sqrx
        sqrnx = int(points[0])**(-2)
        sumsqrnx = sumsqrnx + sqrnx

    avgx = sumx/count
    avgy = sumy/count

    m = (sumprod - count(avgx*avgy))/sumsqrx- (count(avgx**2))
    print(m)
    for bestline in labfile:
        line = bestline.split()
        y= avgy + m(int(line[0])-avgx)
        t.down()
        t.setpos(0,0)
        t.setpos(line[0],y)

plotregression(kj)

感谢你的帮助。在

Thnx公司


Tags: 数据txtcountlinepointsintsetpossumy
3条回答

实际上,我自己解决了这个问题,它终于像我告诉它的那样做了。但我很想知道我是否可以删去任何不必要的代码行。我觉得它有点太长了,我错过了一些可以让它更简单的方法。在

import turtle

wn= turtle.Screen()
kj = turtle.Turtle()
kj.shape('circle')
kj.turtlesize(0.2)
kj.color('blue')
kj.speed(1)



def plotregression(t):

    sumx = 0
    sumy = 0
    count = 0
    sumprod = 0
    sumsqrx =0

    labfile = open('labdata.txt','r')
    for i in labfile:
        points = i.split()
        print (points)
        t.up()
        t.setpos(int(points[0]),int(points[1]))
        t.stamp()
        sumx = sumx + int(points[0])
        sumy = sumy + int(points[1])
        prod = int(points[0])*int(points[1])
        sumprod = sumprod + prod
        count += 1
        sqrx = int(points[0])**2
        sumsqrx = sumsqrx + sqrx

    avgx = sumx/count
    avgy = sumy/count

    m = (sumprod - count*(avgx*avgy))/(sumsqrx- (count*(avgx**2)))
    print('M is: ',m )

    labfile.close()

    labfile = open('labdata.txt','r')
    besttfit = open('bestfit.txt','w')

    for bestline in labfile:
        line = bestline.split()
        y = avgy + m*(int(line[0])-avgx)
        print('y is:' ,y)
        besttfit.write((line[0])+'\t'+str(y)+'\n')

    labfile.close()
    besttfit.close()



    bestfitline = open('bestfit.txt','r')
    for regline in bestfitline:
        reg = regline.split()
        t.goto(float(reg[0]),float(reg[1]))
        t.down()

    t.write('Best fit line')
    bestfitline.close()


wn.setworldcoordinates(-10,-10,120,120)
figure = plotregression(kj)


wn.exitonclick()

如果我能在任何地方砍柴,请告诉我

我用交互式python解决了同样的问题。这是我怎么做到的。在

import turtle

def plotRegression(data):

    win = turtle.Screen()
    win.bgcolor('pink')

    t = turtle.Turtle()
    t.shape('circle')
    t.turtlesize(0.2)

    x_list, y_list = [i[0] for i in plot_data], [i[1] for i in plot_data]
    x_list, y_list = [float(i) for i in x_list], [float(i) for i in y_list]
    x_sum, y_sum = sum(x_list), sum(y_list)
    x_bar, y_bar = x_sum / len(x_list), y_sum / len(y_list)
    x_list_square = [i ** 2 for i in x_list]
    x_list_square_sum = sum(x_list_square)
    xy_list = [x_list[i] * y_list[i] for i in range(len(x_list))]
    xy_list_sum = sum(xy_list)

    m = (xy_list_sum - len(x_list) * x_bar * y_bar) / (x_list_square_sum - len(x_list) * x_bar ** 2)
    # best y
    y_best = [ (y_bar + m * (x_list[i] - x_bar)) for i in range( len(x_list) ) ]

    # plot points

    max_x = max(x_list)
    max_y = max(y_list)
    win.setworldcoordinates(0, 0, max_x, max_y)
    for i in range(len(x_list)):
        t.penup()
        t.setposition(x_list[i], y_list[i])
        t.stamp()

    #plot best y
    t.penup()
    t.setposition(0,0)
    t.color('blue')
    for i in range(len(x_list)):
        t.setposition(x_list[i],y_best[i])
        t.pendown()

    win.exitonclick()

with open('files/labdata.txt', 'r') as f:
    plot_data = [aline.split() for aline in f]

plotRegression(plot_data)

我晚了5年左右,但这是我的两分钱。在

问题可能出在以下方面: t.setpos(点[0],点[1]) 这是告诉海龟转到点[0]和点[1]的字符串值。 例如,如果点[0]存储值“50”,而点[1]保存值“60”,则“50”+“60”将返回字符串“5060”

这条线也可能有问题: prod=points[0]*int(点[1]) 这是将点[0]中的字符串值与点[1]中的整数值相加 在这种情况下,使用前面的值points[0]将是“50”,int(points[1])将是60。那是60而不是“60”。所以不能将字符串“50”与整数60相加。在

我是如何解决这个问题的:

import turtle
import math
import statistics as stats


def get_line(means, slope, xlist):
    """Return a list of best y values."""
    line = [(means[1] + slope * (xlist[x] + means[0]))
            for x in range(len(xlist))]
    return line


def get_mtop(xlist, ylist, n, means):
    """Return top half of m expression."""
    xbyy_list = [xlist[x] * ylist[x] for x in range(len(xlist))]
    xbyy_sum = sum(xbyy_list)
    nby_means = n * (means[0] * means[1])
    top = xbyy_sum - nby_means
    return top


def get_mbot(xlist, n, means):
    """Return bottom half of m expression."""
    sqr_comprehension = [x**2 for x in xlist]
    sqr_sum = sum(sqr_comprehension)
    nbymean_sqr = n * means[0]**2
    bot = sqr_sum - nbymean_sqr
    return bot


def get_mean(xlist, ylist):
    """Return a tuple that contains the means of xlist and ylist
        in form of (xmean,ymean)."""
    xmean = stats.mean(xlist)
    ymean = stats.mean(ylist)
    return xmean, ymean


def plotRegression(input_file, input_turtle):
    """Draw the plot regression.""""
    infile = open(input_file, 'r')
    input_turtle.shape("circle")
    input_turtle.penup()

    # Get a list of xcoor and a list of ycoor
    xcoor = []
    ycoor = []
    for line in infile:
        coor = line.split()
        xcoor.append(int(coor[0]))
        ycoor.append(int(coor[1]))

    # Plot and count the points
    num_points = 0
    for count in range(len(xcoor)):
        input_turtle.goto(xcoor[count], ycoor[count])
        input_turtle.stamp()
        num_points += 1

    # Get the mean values of the xcoor and ycoor lists
    means_tup = get_mean(xcoor, ycoor)
    print(means_tup)

    # Get the value for M
    mtop = get_mtop(xcoor, ycoor, num_points, means_tup)
    mbot = get_mbot(xcoor, num_points, means_tup)
    m = mtop / mbot
    print(m)

    # Draw the line
    yline = get_line(means_tup, m, xcoor)
    input_turtle.color("green")
    input_turtle.goto(xcoor[0], yline[0])
    input_turtle.pendown()
    for x in range(len(xcoor)):
        print(xcoor[x], yline[x])
        input_turtle.goto(xcoor[x], yline[x])

    input_turtle.hideturtle()


def main():
    """Create the canvas and the turtle. Call the function(s)"""
    # Set up the screen
    sc = turtle.Screen()
    sc.setworldcoordinates(0, 0, 100, 100)
    sc.bgcolor("black")

    # Create the turtle
    Donatello = turtle.Turtle()
    Donatello.color("purple")

    # Run plot Regression
    labdata = """C:\\Users\\user\\pathtofile\\labdata.txt"""
    plotRegression(labdata, Donatello)

    sc.exitonclick()


if __name__ == "__main__":
    main()

我不知道这是不是正确的坡度,但似乎是在正确的方向。希望这能帮助那些有同样问题的人。在

相关问题 更多 >