<p>如果有人对我在我收到的评论的帮助下找到的初学者解决方案感兴趣,这里是:</p>
<pre><code>import sys, re
annot = file("example.embl", "r")
embl = ""
annotation = []
for line in annot:
embl += line
embl_list = embl.split("FT gen")
for item in embl_list:
if "e " in item:
split_item = item.split("\n")
for l in split_item:
if "e " in l:
if not "complement" in l:
coordinates = l[13:len(l)]
C = coordinates.split("..")
genestart = C[0]
geneend = C[1]
strand = "+"
if "complement" in l:
coordinates = l[24:len(l)-1]
C = coordinates.split("..")
genestart = C[0]
geneend = C[1]
strand = "-"
if "/locus_tag" in l:
L = l.split('"')
locus = L[1]
if "/product" in l:
P = l.split('"')
product = P[1]
annotation.append({
"locus": locus,
"genestart": genestart,
"geneend": geneend,
"product": product,
})
else:
print "Finished!"
</code></pre>