如何以dict形式返回序列代码（字符串），它告诉我氨基酸发生的频率？

DNA_Codons = { # U 'UUU': 'Phenylalanin', 'UCU': 'Serin', 'UAU': 'Tyrosin', 'UGU': 'Cystein', # UxU 'UUC': 'Phenylalanin', 'UCC': 'Serin', 'UAC': 'Tyrosin', 'UGC': 'Cystein', # UxC 'UUA': 'Leucin', 'UCA': 'Serin', 'UAA': '---', 'UGA': '---', # UxA 'UUG': 'Leucin', 'UCG': 'Serin', 'UAG': '---', 'UGG': 'Tryptophan', # UxG # C 'CUU': 'Leucin', 'CCU': 'Prolin', 'CAU': 'Histidin', 'CGU': 'Arginin', # CxU 'CUC': 'Leucin', 'CCC': 'Prolin', 'CAC': 'Histidin', 'CGC': 'Arginin', # CxC 'CUA': 'Leucin', 'CCA': 'Prolin', 'CAA': 'Glutamin', 'CGA': 'Arginin', # CxA 'CUG': 'Leucin', 'CCG': 'Prolin', 'CAG': 'Glutamin', 'CGG': 'Arginin', # CxG # A 'AUU': 'Isoleucin', 'ACU': 'Threonin', 'AAU': 'Asparagin', 'AGU': 'Serin', # AxU 'AUC': 'Isoleucin', 'ACC': 'Threonin', 'AAC': 'Asparagin', 'AGC': 'Serin', # AxC 'AUA': 'Isoleucin', 'ACA': 'Threonin', 'AAA': 'Lysin', 'AGA': 'Arginin', # AxA 'AUG': 'Met', 'ACG': 'Threonin', 'AAG': 'Lysin', 'AGG': 'Arginin', # AxG # G 'GUU': 'Valin', 'GCU': 'Alanin', 'GAU': 'Asparaginsäure', 'GGU': 'Glycin', # GxU 'GUC': 'Valin', 'GCC': 'Alanin', 'GAC': 'Asparaginsäure', 'GGC': 'Glycin', # GxC 'GUA': 'Valin', 'GCA': 'Alanin', 'GAA': 'Glutaminsäure', 'GGA': 'Glycin', # GxA 'GUG': 'Valin', 'GCG': 'Alanin', 'GAG': 'Glutaminsäure', 'GGG': 'Glycin' # GxG } def translate_code(seq, init_pos=0): return { DNA_Codons[seq[pos:pos + 3]] for pos in range(init_pos, len(seq) - 2, 3) } print(translate_code("ACAAUUGACACAUAUCGUCGAGGGUGGCCA"))

3条回答

网友

1楼 · 编辑于 2024-06-17 16:16:32

您可以使用collections模块中的Counter类：

为了排除你不想要的组合，我建议根本不要把它们放在密码子字典里。我还颠倒了你的字典，以减少重复的次数，使它更易于维护

设置：

acids = { 'Alanin':        ['GCA', 'GCC', 'GCG', 'GCU'],
          'Arginin':       ['AGA', 'AGG', 'CGA', 'CGC', 'CGG', 'CGU'],
          'Asparagin':     ['AAC', 'AAU'],
          'Asparaginsäure':['GAC', 'GAU'],
          'Cystein':       ['UGC', 'UGU'],
          'Glutamin':      ['CAA', 'CAG'],
          'Glutaminsäure': ['GAA', 'GAG'],
          'Glycin':        ['GGA', 'GGC', 'GGG', 'GGU'],
          'Histidin':      ['CAC', 'CAU'],
          'Isoleucin':     ['AUA', 'AUC', 'AUU'],
          'Leucin':        ['CUA', 'CUC', 'CUG', 'CUU', 'UUA', 'UUG'],
          'Lysin':         ['AAA', 'AAG'],
          'Met':           ['AUG'],
          'Phenylalanin':  ['UUC', 'UUU'],
          'Prolin':        ['CCA', 'CCC', 'CCG', 'CCU'],
          'Serin':         ['AGC', 'AGU', 'UCA', 'UCC', 'UCG', 'UCU'],
          'Threonin':      ['ACA', 'ACC', 'ACG', 'ACU'],
          'Tryptophan':    ['UGG'],
          'Tyrosin':       ['UAC', 'UAU'],
          'Valin':         ['GUA', 'GUC', 'GUG', 'GUU'] }
codons = { seq:name for name,sequences in acids.items() for seq in sequences }

计数：

from collections import Counter
def translate_code(seq, init_pos=0):
    return Counter( codons[seq[pos:pos + 3]]
                    for pos in range(init_pos, len(seq), 3)
                    if seq[pos:pos + 3] in codons)

输出：

print(translate_code("ACAAUUGACACAUAUCGUCGAGGGUGGCCA"))

Counter({'Threonin': 2, 'Arginin': 2, 'Isoleucin': 1, 'Asparaginsäure': 1, 'Tyrosin': 1, 'Glycin': 1, 'Tryptophan': 1, 'Prolin': 1})

注意，Counter类实际上是一个字典。如果需要，您可以将其转换回普通词典

网友

2楼 · 编辑于 2024-06-17 16:16:32

def translate_code(seq, init_pos=0):
    final_codons = {}
    for pos in range(init_pos, len(seq) - 2, 3):
        current_codon = DNA_Codons[seq[pos:pos + 3]]
        if current_codon in final_codons:
            final_codons[current_codon] += 1
        else:
            final_codons[current_codon] = 1
    return final_codons

这应该完全按照您指定的方式工作

网友

3楼 · 编辑于 2024-06-17 16:16:32

import numpy as np    



def count_amino_acids(seq, init_pos=0):

    #First, create an amino acid dictionary from the codon dictionary:
    count_dict = {}    
    
    # go from the initial position, to full length in steps of 3
    for i in np.arange(init_pos, len(seq), 3): 
        codon = seq[i:i+3] # get the codon
        aa = DNA_Codons[codon] # look up the amino acid
        
        if aa == ' -':       # stop at stop codons
            return count_dict
        
        count_dict[aa] += 1 # increment the counter

    return count_dict
    
count_amino_acids("ACAAUUGACACAUAUCGUCGAGGGUGGCCA")

相关问题更多 >

编程相关推荐

热门问题

热门文章