对数据进行子集化，并对每个fi中的行进行计数

import re def file_len(filename): with open(filename, mode = 'r', buffering = 1) as f: for i, line in enumerate(f): pass return i inputfile = open("test.txt", 'r') outputfile_f1 = open("f1.txt", 'w') outputfile_f2 = open("f2.txt", 'w') matchlines = inputfile.readlines() outputfile_f1.write(matchlines[0]) #add the header to the "f1.txt" for line in matchlines: if re.match("sire*", line): outputfile_f1.write(line) elif re.match("dam*", line): outputfile_f1.write(line) else: outputfile_f2.write(line) print 'the number of individuals in f1 is:', file_len(outputfile_f1) print 'the number of individuals in f2 is:', file_len(outputfile_f2) inputfile.close() outputfile_f1.close() outputfile_f2.close()

"Traceback (most recent call last): File "./subset_individuals_based_on_ID.py", line 28, in <module> print 'the number of individuals in f1 is:', file_len(outputfile_f1) File "./subset_individuals_based_on_ID.py", line 7, in file_len with open(filename, mode = 'r', buffering = 1) as f: TypeError: coercing to Unicode: need string or buffer, file found "

2条回答

网友

1楼 · 编辑于 2024-10-03 06:18:47

您还可以使用itertools.tee将输入拆分为多个流并分别处理它们。在

import itertools

def write_file(match, source, out_file):
    count = -1
    with open(out_file, 'w') as output:
        for line in source:
            if count < 0 or match(line):
                output.write(line)
                count += 1

    print('Wrote {0} lines to {1}'.format(count, out_file))


with open('test.txt', 'r') as f:
    first, second = itertools.tee(f.readlines())

    write_file(lambda x: not x.startswith('IND'), first, 'f1.txt')
    write_file(lambda x: x.startswith('IND'), second, 'f2.txt')

编辑-删除多余的elif

网友

2楼 · 编辑于 2024-10-03 06:18:47

我可能误读了你，但我相信你只是想这么做：

>>> with open('test', 'r') as infile:
...   with open('test_out1', 'w') as out1, open('test_out2', 'w') as out2:
...     header, *lines = infile.readlines()
...     out1.write(header)
...     out2.write(header)
...     for line in lines:
...       if line.startswith('sir') or line.startswith('dam'):
...         out1.write(line)
...       else:
...         out2.write(line)

{cd1}之前的内容：

^{pr2}$

test_out1之后的内容：

ID,MARK1,MARK2
sire1,AA,BB
dam2,AB,AA
sire3,AB,-
dam1,AA,BB

test_out2之后的内容：

ID,MARK1,MARK2
IND4,BB,AB
IND5,BB,AA

相关问题更多 >

编程相关推荐

热门问题

热门文章