如何用所需字符的特定百分比分隔字符串的各个部分？

import re def converttostr(input_seq, seperator): # Join all the strings in list final_str = seperator.join(input_seq) return final_str DNA_seq = input("") RNA_seq = DNA_seq.replace("T", "U") N = RNA_seq iRNA = (N.translate(str.maketrans({"A": "U", "G": "C", "U": "A", "C": "G"}))) iRNA_str = iRNA K = 21 iRNA_comb = [iRNA[i: j] for i in range(len(iRNA_str)) for j in range(i + 1, len(iRNA_str) + 1) if len(iRNA_str[i:j]) == K] print("All iRNA combinations", iRNA_comb) seperator = ', ' LtS = converttostr(iRNA_comb, seperator) print("List converted to string: ", LtS) CG = re.split(" CG |[^a-zA-Z ]+",LtS) print("siRNA with CG founded",CG)

3条回答

网友

1楼 · 编辑于 2024-10-03 09:07:35

此代码查找字符串中有多少个GC或CG组合，并将值在30-50%之间的组合过滤到输出数组

我还打印了为不同测试用例计算的百分比，供您参考

代码：

import regex as re

siRNAs=['GUUUCCCTTTG', 'GCTTTUGCTUT', 'GCTUGCUTGCU', 'CGTUCGUTCGU', 'GCTUCGUTCGU', 'CGCGTUUTCGU', 'GCGCTUUTGCU',
        'GCGCGCGCTUUTGCU', 'GCGCGCGCCGCGCGTUUTGCU'  ]

def get_count(mstring, sub1, sub2):
    idxs1 = [(m.start(), m.end()) for m in re.finditer(sub1, mstring)]
    idxs2 = [(m.start(), m.end()) for m in re.finditer(sub2, mstring)]
    count = len(idxs1)
    for i2 in idxs2:
        if any([i1[0] <= i2[0] < i1[1] for i1 in idxs1]):
            continue
        count+=1
    return count


for x in siRNAs:
    print('siRNA: ', x, ' percentage: ',((get_count(x, "GC", "CG")) * 2) / len(x) * 100, '%')


output = [x for x in siRNAs if 30 <= ((get_count(x, "GC", "CG")) * 2) / len(x) * 100 <=50]
print('output: ', output)

输入：

['GUUUCCCTTTG', 'GCTTTUGCTUT', 'GCTUGCUTGCU', 'CGTUCGUTCGU', 'GCTUCGUTCGU', 'CGCGTUUTCGU', 'GCGCTUUTGCU', 'GCGCGCGCTUUTGCU', 'GCGCGCGCCGCGCGTUUTGCU']

输出：

siRNA:  GUUUCCCTTTG  percentage:  0.0 %
siRNA:  GCTTTUGCTUT  percentage:  36.36363636363637 %
siRNA:  GCTUGCUTGCU  percentage:  54.54545454545454 %
siRNA:  CGTUCGUTCGU  percentage:  54.54545454545454 %
siRNA:  GCTUCGUTCGU  percentage:  54.54545454545454 %
siRNA:  CGCGTUUTCGU  percentage:  54.54545454545454 %
siRNA:  GCGCTUUTGCU  percentage:  54.54545454545454 %
siRNA:  GCGCGCGCTUUTGCU  percentage:  66.66666666666666 %
siRNA:  GCGCGCGCCGCGCGTUUTGCU  percentage:  76.19047619047619 %

output:  ['GCTTTUGCTUT']

网友

2楼 · 编辑于 2024-10-03 09:07:35

DNA_seq = input("")

RNA_seq  = DNA_seq.replace("T", "U")

N = RNA_seq
iRNA = (N.translate(str.maketrans({"A": "U", "G": "C", "U": "A", "C": "G"})))
iRNA_str = iRNA 

K = 4
iRNA_comb = [iRNA[i: j] for i in range(len(iRNA_str)) for j in range(i + 1, len(iRNA_str) + 1) if len(iRNA_str[i:j]) == K]
print("All iRNA combinations", iRNA_comb)

siRNAs = iRNA_comb

for x in siRNAs:
    print('siRNA: ', x, ' percentage: ',((x.count("C") + x.count("G"))) / len(x) * 100, '%')

output = [x for x in siRNAs if 30 <= ((x.count("C") + x.count("G"))) / len(x) * 100 <=50]

print('output: ', output)

网友

3楼 · 编辑于 2024-10-03 09:07:35

我试图弄明白这段代码的作用，但我不能确定你以前用过其他语言吗？只需指定要接收的输入数据和输出

如果符合条件（30%-50%），则返回true。然后你可以把它添加到列表或其他任何地方

def foo(seq: str) -> bool:
    """searching for the 21 nucleotides length sequences with content of CG or GC from 30 to 50%
    """
    return 30 < (seq.count("GC") * 2) / len(h) * 100 < 50

相关问题更多 >

编程相关推荐

热门问题

热门文章