我正在尝试用另一个来自第二个字典的文件中的信息来更新从一个txt文件创建的字典。我的问题是每次我试图更新它都会把我的文件缩短到 single dictionary output" : {my updated output},而不是预期的{my updated output},{my updated output}


import re
import os
import glob
asps = []

gbFileNames = list(glob.glob(os.path.join('/Users/schneider/Downloads/Reilly/*.gb')))

gbDict = {}

for myfile in gbFileNames:
    currentfile = open(myfile, 'r')
    for line in currentfile:
        if 'ACCESSION' in line: 
            accn = line.split(' ')[-1].rstrip()
            gbDict[accn] = {'host':'','isolate':''}
        elif 'host=' in line: 
            gbDict[accn]['host'] += line.split('"')[1]
        elif 'isolate=' in line: 
            gbDict[accn]['isolate'] += line.split('"')[1]

seqFileNames = list(glob.glob(os.path.join('/Users/schneider/Downloads/Reilly/*.txt')))

fastaDict = {}

for myfile in seqFileNames:
    currentfile = open(myfile, 'r')
    for line in currentfile:
        if '>' in line:
        # DEFINE GENE ID
            pseudoGeneID = re.search('(?<=gene)\w{1,}', line)
            GeneID = pseudoGeneID.group(0)
        #   fastaDict[GeneID] = {'accn':'','host':'','isolate':'','seq':''} #initiate subdictionary after introducing GeneID variable
            fastaDict[GeneID] = {'accn':'','seq':''} #initiate subdictionary after introducing GeneID variable
            # DEFINE TAXON by accession number
            accn = line.split('|')[1].split('.')[0]
            fastaDict[GeneID]['accn'] += accn.rstrip() # assign accession ID to dictionary using += refer to rstrip down below :)
            seq = line # here we basically say that if it doesnt start with > we assume it must be a sequence, thus we call the line a seq to make more sense :) 
            fastaDict[GeneID]['seq'] += seq.rstrip()  # rstrip is used here to guarantee that any crap will not come along with your nice sequence data

print fastaDict[GeneID]

fastaDict output = GeneID{accn;seq}
gbDict output = accn{host;isolate}


updatedDict output = GENEID{accn;seq;host;isolate}



