<p>这是一个可以做一些你可能想要的事情的类。在</p>
<pre><code>"""
Read in genome of E. Coli (or whatever) from given input file,
process it in segments of 100 basepairs at a time.
Usage: 100pairs [-n <pairs>] [-p] <file>
<file> Input file.
-n, numpairs <pairs> Use <pairs> per iteration. [default: 100]
-p, partial Allow partial sequences at end of genome.
"""
import docopt
class GeneBuffer:
def __init__(self, path, bases=100, partial=True):
self._buf = None
self.bases = int(bases)
self.partial = partial
self.path = path
def __enter__(self):
self._file = open(self.path, 'r')
self._header = next(self._file)
return self
def __exit__(self, *args):
if self._file:
self._file.close()
def __iter__(self):
return self
def __next__(self):
if self._buf is None:
self._buf = ''
while self._file and len(self._buf) < self.bases:
try:
self._buf += next(self._file).strip()
except StopIteration:
self._file.close()
self._file = None
break
if len(self._buf) < self.bases:
if len(self._buf) == 0 or not self.partial:
raise StopIteration
result = self._buf[:self.bases]
self._buf = self._buf[1:]
return result
def analyze(basepairs):
"""
Dammit, Jim! I'm a computer programmer, not a geneticist!
"""
print(basepairs)
def main(args):
numpairs = args[' numpairs']
partial = args[' partial']
with GeneBuffer(args['<file>'], bases=numpairs, partial=partial) as genome:
print("Header: ", genome._header)
count = 0
for basepairs in genome:
count += 1
print(count, end=' ')
analyze(basepairs)
if __name__ == '__main__':
args = docopt.docopt(__doc__)
main(args)
</code></pre>