我运行了下面的代码并比较了write2gff
和run
方法中的mrnas
。结果是run
方法中的mrnas
包含1个list元素。另一方面,write2gff
方法中的mrnas
包含0个list元素。请参阅以下代码的打印输出:
NbV1Ch08 g63 1
Prepare to write!!!
NbV1Ch08 g63 0
如何修复write2gff
中的mrnas
包含与run
方法相同的1元素?你知道吗
#!/usr/bin/python3
import click
from collections import defaultdict
from collections import OrderedDict
from Bio.Blast import NCBIXML
from BCBio import GFF
from Bio.Seq import Seq
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
from Bio.SeqFeature import SeqFeature, FeatureLocation
class Gene():
def __init__(self, type, start, end, strand, qualifiers):
self.type = type
self.start = start
self.end = end
self.strand = strand
self.qualifiers = qualifiers
def write2gff(genes, mrnas, ref_info):
print("Prepare to write!!!")
with open("data/test.gff3", "w") as out_keep:
for seqID in genes.keys():
rec = SeqRecord(Seq(ref_info[seqID] * 'A'), seqID)
for gene_id in genes[seqID].keys():
gene = genes[seqID][gene_id]
top_feature = SeqFeature(FeatureLocation(gene.start, gene.end), type="gene",
strand=gene.strand, qualifiers=gene.qualifiers)
print(seqID, gene_id, len(mrnas[seqID][gene_id]))
top_feature.sub_features = mrnas[seqID][gene_id]
rec.features.append([top_feature])
GFF.write([rec], out_keep)
@click.command()
@click.option('--gff3', help="Provide GFF3 file", required=True)
@click.option('--keep', help="Keep GFF3 file", required=True)
@click.option('--reject', help="Reject GFF3 file", required=True)
@click.option('--xml', help="Blast XML file", required=True)
@click.option('--fasta', help="Blast XML file", required=True)
def run(gff3, keep, reject, xml, fasta):
ref_info = {i.id: len(i) for i in SeqIO.parse(fasta, "fasta")}
#blast_hits = retrieve_hits_data(xml)
test={}
test["g63.t3"] = "B3 domain-containing protein Os03g0120900"
genes = OrderedDict()
mrnas = defaultdict()
for rec in GFF.parse(gff3):
for gene in rec.features:
mrnas[rec.id] = defaultdict(list)
for mrna in gene.sub_features:
if mrna.id in test:
mrna.qualifiers["Note"] = test[mrna.id] #blast_hit.hit_def
mrnas[rec.id][gene.id].append(mrna)
print(rec.id, gene.id, len(mrnas[rec.id][gene.id]))
if len(mrnas[rec.id][gene.id]) > 0:
genes[rec.id] = defaultdict()
genes[rec.id][gene.id] = Gene(gene.type, gene.location.start, gene.location.end,
gene.location.strand, gene.qualifiers)
write2gff(genes, mrnas, ref_info)
if __name__ == '__main__':
run()
目前没有回答
相关问题 更多 >
编程相关推荐