
2024-05-19 17:07:13 发布

您现在位置:Python中文网/ 问答频道 /正文



#!/usr/bin/env python

import sys
import re

def main():
    translation = {'gca':'A', 'gcc':'A', 'gcg':'A', 'gct':'A', 'tgc':'C', 'tgt':'C', 'gac':'D', 'gat':'D', 'gaa':'E', 'gag':'E', 'ttc':'F', 'ttt':'F', 'gga':'G', 'ggg':'G', 'ggc':'G', 'ggt':'G', 'cac':'H', 'cat':'H', 'ata':'I', 'atc':'I', 'att':'I', 'aaa':'K', 'aag':'K', 'tta':'L', 'ttg':'L', 'cta':'L', 'ctc':'L', 'ctg':'L', 'ctt':'L', 'atg':'M', 'tgg':'W', 'tac':'Y', 'tat':'Y'}
    translation.update(dict.fromkeys(['aac', 'aat'], 'N'))
    translation.update(dict.fromkeys(['cca', 'ccc', 'ccg', 'cct'], 'P'))
    translation.update(dict.fromkeys(['caa', 'cag'], 'Q'))
    translation.update(dict.fromkeys(['aac', 'aat'], 'N'))
    translation.update(dict.fromkeys(['aga', 'agg', 'cga', 'cgc', 'cgg', 'cgt'], 'R'))
    translation.update(dict.fromkeys(['agc', 'agt', 'tca', 'tcc', 'tcg', 'tct'], 'S'))
    translation.update(dict.fromkeys(['aca', 'acc', 'acg', 'act'], 'T'))
    translation.update(dict.fromkeys(['gta', 'gtc', 'gtg', 'gtt'], 'V'))
    translation.update(dict.fromkeys(['taa', 'tga', 'tag'], 'STOP'))
    rna = ""
    f = open(sys.argv[1], 'rU') #gets the code from a file
    for line in f:
        trimmedline = re.sub(r'[^atcgu]','', line)
        rna = rna + trimmedline
    #This part of the code iterates through the rna string one letter at a time
    #At each letter it grabs the next two letters in the string and joins the three letters together -> codon
    #If the codon pattern matches one of two strings it grabs the corresponding value from the translation dict and adds this to the 'primary' string
    #It then triggers a while loop that moves through the rna string, pulling back triplets and looking them up in the translation dict
    #These values are also added to primary
    #When the while loop returns 'STOP', the while loop exits and the for loop begins the process again from the next letter in 'rna'
    #i.e. if the first 'a' in atgcaaca... triggered the while loop, the next letter would be t
    for base in range(len(rna) - 2):
        codon = rna[base] + rna[base + 1] + rna[base + 2]
        if (codon == 'aug' or codon == "atg"):
            print 'Start codon found at position ' + str(base)
            primary = translation[codon]
            reset = 0
            l = 1
            while reset == 0:
                newcodon = rna[base + (3 * l)] + rna[base + (3 * l) + 1] + rna[base + (3 * l) + 2]
                if translation[newcodon] == 'STOP':
                    reset = 1
                    print primary
                    print '------------'
                    primary = primary + translation[newcodon]
                    #print primary
                    l = l + 1            
if __name__ == '__main__':


            for triplet in rna[base + 3:((len(rna)-base)-((len(rna)- base) % 3)): 3]:
                newcodon = rna[triplet] + rna[triplet + 1] + rna[triplet + 2]
#                newcodon = rna[base + (3 * l)] + rna[base + (3 * l) + 1] + rna[base + (3 * l) + 2]
                if translation[newcodon] == 'STOP':
                    reset = 1
                    print primary
                    print '------------'
                    primary = primary + translation[newcodon]
                    l = l + 1  



29581 ttttccgttt acgatatata gtctactctt gtgcagaatg aattctcgta actacatagc
29641 acaagtagat gtagttaact ttaatctcac atagcaatct ttaatcagtg tgtaacatta
29701 gggaggactt gaaagagcca ccacattttc accgaggcca cgcggagtac gatcgagtgt
29761 acagtgaaca atgctaggga gagctgccta tatggaagag ccctaatgtg taaaattaat
29821 tttagtagtg ctatccccat gtgattttaa tagcttctta ggagaatgac aaaaaaaaaa
29881 aaaaaaaaaa aaaaaaaaaa aaa


Tags: theinforbaseupdate序列translationdict
1楼 · 发布于 2024-05-19 17:07:13


while reset == 0 and len(rna) > (base + (3 * l) + 2): 

将防止脚本超出字符串的长度。(base + (3 * l) + 2是您试图从rna字符串添加的最大索引,因此使用它作为退出while循环的测试)

相关问题 更多 >