擅长:python、mysql、java
<p>下面是一些应该使用regex的方法。在这个解决方案中,我找到所有出现的酶串,并使用它们对应的索引进行拆分。在</p>
<pre><code>def digestfragmentwithenzyme(seqs, enzymes):
out = []
dic = dict(enzymes) # dictionary of enzyme indices
for seq in seqs:
sub = []
pos1 = 0
enzstr = '|'.join(enz[0] for enz in enzymes) # "TC|GC" in this case
for match in re.finditer('('+enzstr+')', seq):
index = dic[match.group(0)]
pos2 = match.start()+index
sub.append(seq[pos1:pos2])
pos1 = pos2
sub.append(seq[pos1:])
out.append(sub)
# [['AATT', 'CCGGT', 'CGGGG', 'CT', 'CGGGGG'], ['AAAG', 'CAAAAT', 'CAAAAAAG', 'CAAAAAAT', 'C']]
return out
</code></pre>