文件tx中带Numpy的加权平均数

drive-------------------------------------------------------------------------k='6' seq nelsa s length: 4044778 # 51 1 # 64 1 # 65 1 # 67 2 # 70 1 # 72 1 # 73 1 # 77 1 # 79 1 # 86 1 # 88 2 # 89 1 # 92 1 # 94 1 # 95 1 # 96 2 # 100 1 # 103 1 # 105 1 # 108 1 # 112 1 # 119 1 # 123 1 # 126 1 # 127 1 # 129 1 # 130 1 # 133 3 # 134 1 # 135 1 # 138 2 # 139 1 # 140 1 # 141 1 # 142 1 # 143 1 # 144 2 # 145 1 # 148 2 # 150 3 # 151 1 # 152 1 # 153 1 # 154 1 # 155 1 # 156 1 # 157 1 # 159 3 # 160 1 # 161 1 # 162 1 # 163 1 # 164 1 # 165 2 # 167 2 # 168 1 # 169 1 # 170 1 # 172 2 # 173 1 # 174 1 # 175 1 -------------------------------------------------------------------------k='7' seq nelsa s length: 4044778 # 4 1 # 5 1 # 8 1 # 9 1 # 10 3 # 11 3 # 12 4 # 13 7 # 14 6 # 15 5 # 16 11 # 17 7 # 18 14 # 19 8 # 20 15 # 21 13 # 22 10 # 23 6 # 24 22 # 25 14 # 26 19 # 27 17 # 28 20 # 29 25 # 30 15 # 31 22 # 32 18 # 33 23 # 34 30 # 35 24 # 36 35 # 37 39 # 38 27 # 39 33 # 40 36 # 41 34 # 42 40 # 43 43 # 44 44 # 45 44 # 46 43 # 47 50 # 48 51 # 49 54 # 50 55 # 51 44 # 52 49 # 53 56 # 54 35 # 55 52 # 56 47 # 57 48 # 58 65 # 59 56 # 60 53 # 61 54 # 62 66 # 63 47 # 64 61 # 65 50 # 66 46 # 67 69 # 68 65 # 69 66 # 70 59 # 71 59 # 72 55 # 73 73 # 74 91 # 75 73 # 76 56 # 77 66 # 78 63 # 79 67 # 80 78 # 81 51 # 82 69 # 83 60 # 84 64 # 85 73 # 86 58 # 87 60 # 88 64 # 89 73 # 90 63 # 91 65 # 92 59 # 93 69 # 94 67 # 95 73 # 96 50 # 97 53 # 98 68 # 99 65 # 100 63 # 101 55 # 102 73 # 103 76 # 104 66 # 105 70 # 106 75 # 107 66 # 108 56 # 109 49 # 110 68 # 111 52 # 112 66 # 113 67 # 114 66 # 115 52 # 116 61 # 117 59 # 118 65 # 119 67 # 120 56 # 121 60 # 122 64 # 123 53 # 124 59 # 125 66 # 126 58 # 127 77 # 128 51 # 129 67 # 130 53 # 131 56 # 132 62 # 133 64 # 134 56 # 135 42 # 136 71 # 137 57 # 138 53 # 139 52 # 140 65 # 141 59 # 142 61 # 143 60 # 144 64

#! /usr/bin/python import numpy import csv file = open('Acetobacterium_woodii_DSM_1030.mdistr', 'rb') data = csv.reader(file, delimiter=' ') table = [row for row in data] a = table[10] print a[2]

2条回答

网友

1楼 · 编辑于 2024-09-30 18:29:27

我相信有一个更优雅的答案，但这是可行的：

#parse data
with open('data.txt') as f:
    lines = f.readlines()
    alldata = []
    kdata = []
    for i,line in enumerate(lines):
        if len(line) > 1:
            if line[0] =='#':
                x = int(line.split(' ')[1])
                y = int(line.split(' ')[2][:-1])
                kdata.append([x,y])
            else:
                if i !=0:
                    alldata.append([kval,kdata])
                ptr = line.find('k=')+3
                kval = int(line[ptr:ptr+1])
                kdata=[]
    alldata.append([kval,kdata])

#analyse data
for kblock in alldata:
    kval = kblock[0]
    sumx = sum([x for z in kblock[1:] for (x,y) in z])
    sumy = sum([y for z in kblock[1:] for (x,y) in z])
    sumxy = sum([x*y for z in kblock[1:] for (x,y) in z])
    mean = sumxy/sumy
    print('The mean of k-{0} is :{1}'.format(kval,mean))

numpy本应是我的首选，但如果您的数据大小可变，则这将变得更加困难。希望这有帮助。你知道吗

网友

2楼 · 编辑于 2024-09-30 18:29:27

简单的解决方案如下：

#! /usr/bin/python
import numpy as np

with open('Acetobacterium_woodii_DSM_1030.mdistr', 'r') as f:
    # read file
    lines = f.read().splitlines()

values = []
weights = []

for line in lines:
    if line.startswith("-"):
        # print statistics and reset variables
        print ("Min: %d, Max: %d, Avg: %d" %(min(values),max(values),
                                             np.average(values, 
                                                        weights=weights)))
        values = []
        weights = []
    elif line.startswith("#"):
        row = line.split()
        values.append(int(row[1]))
        weights.append(int(row[2]))

# print statistics before quitting
print ("Min: %d, Max: %d, Avg: %d" %(min(values),max(values),
                                     np.average(values,
                                                weights=weights)))

相关问题更多 >

编程相关推荐

热门问题

热门文章