我目前正在尝试CS50 pset6。在我的代码中,我试图读取文本并计算文本中某个DNA或STR链的最长运行时间,以便在CSV文件中找到匹配的DNA。谁能告诉我为什么当我在CS50 IDE上运行程序时,它只返回一个闪烁的光标?我认为我的计算功能有问题,但我不确定为什么。如有任何建议,将不胜感激
from sys import argv, exit
import csv
def getstring(argv):
with open(argv[2]) as f:
str = f.read()
return str
def compute(tmp):
aatg = gata = tatc = gaaa = tctg = agatc = tctag = ttttttct = 0
#Check if DNA Sequence is AATG,GATA,TATC,GAAA,TCTG
i = 0
j = 5
length = len(tmp)
while (j <= (length - 5)):
AAT = GAT = TAT = GAA = TCT = 0
if tmp[i:j] == "AATG":
AAT += 1
i = i + 5
j = i + 5
elif tmp[i:j] != "AATG":
i = i - 5
j = i + 5
if AAT > aatg:
aatg = AAT
if tmp[i:j] == "GATA":
GAT += 1
i = i + 5
j = i + 5
if tmp[i:j] != "GATA":
i = i - 5
j = i + 5
if GAT > gata:
gata = GAT
if tmp[i:j] == "TATC":
TAT += 1
i = i + 5
j = i + 5
if tmp[i:j] != "TATC":
i = i - 5
j = i + 5
if TAT > tatc:
tatc = TAT
if tmp[i:j] == "GAAA":
GAA += 1
i = i + 5
j = i + 5
if tmp[i:j] != "GAAA":
i = i - 5
j = i + 5
if GAA > gaaa:
gaaa = GAA
if tmp[i:j] == "TCTG":
TCT += 1
i = i + 5
j = i + 5
if tmp[i:j] != "TCTG":
i = i - 4
j = i + 5
if TCT > tctg:
tctg = TCT
#Check if DNA Sequence is AGATC or TCTAG
b = 6
while (b <= (length - 6)):
AGA = TCTA = 0
if tmp[i:b] == "AGATC":
AGA += 1
i = i + 6
b = i + 6
elif tmp[i:b] != "AGATC":
i = i - 6
b = i + 6
if AGA > agatc:
agatc = AGA
if tmp[i:b] == "TCTAG":
TCTA += 1
i = i + 6
b = i + 6
if tmp[i:b] != "TCTAG":
i = i - 5
b = i + 6
if TCTA > tctag:
tctag = TCTA
#Check if DNA Sequence is TTTTTTCT
d = 9
while (d <= (length - 9)):
TTT = 0
if tmp[i:d] == "TTTTTTCT":
TTT += 1
i = i + 9
d = i + 9
elif tmp[i:d] != "TTTTTTCT":
i = i - 8
d = i + 9
if TTT > ttttttct:
ttttttct = TTT
return aatg, gata, tatc, gaaa, tctg, agatc, tctag, ttttttct
def main():
if len(argv) != 3:
print("Usage: python dna.py data.csv sequence.txt")
exit(1)
tmp = getstring(argv)
AATG, GATA, TATC, GAAA, TCTG, AGATC, TCTAG, TTTTTTCT = compute(tmp)
print(f"{AATG}, {GATA}, {TATC}") #willdeletelater
#Store STRCOUNT in dictionary
STRCOUNT = {
'AGATC' : AGATC,
'TTTTTTCT' : TTTTTTCT,
'AATG' : AATG,
'TCTAG' : TCTAG,
'GATA' : GATA,
'TATC' : TATC,
'GAAA' : GAAA,
'TCTG' : TCTG
}
#Only large csv has all the STR dna sequence, small csv only has AGATC,AATG,TATC
with open(argv[1], "r") as CSVfile:
read = csv.reader(CSVfile)
c=0
field={}
for row in read:
field[c] = row
c += 1
ncolumns = len (field[0])
CSVfile.seek(0)
reader = csv.DictReader(CSVfile)
for row in reader:
if ncolumns == 4:
if row['AGATC'] == STRCOUNT['AGATC'] and row['AATG'] == STRCOUNT['AATG'] and row['TATC'] == STRCOUNT['TATC']:
print(f"{row['name']}")
exit (0)
elif ncolumns != 4:
if row['AGATC'] == STRCOUNT['AGATC'] and row['TTTTTTCT'] == STRCOUNT['TTTTTTCT'] and row['AATG'] == STRCOUNT['AATG'] and row['TCTAG'] == STRCOUNT['TCTAG'] and row['GATA'] == STRCOUNT['GATA'] and row['TATC'] == STRCOUNT['TATC'] and row['GAAA'] == STRCOUNT['GAAA'] and row['TCTG'] == STRCOUNT['TCTG']:
print(f"{row['name']}")
exit (0)
print("No match")
exit (1)
main ()
以下是tmp的DNA序列:
AAGGTAAGTTTAGAATATAAAAGGTGAGTTAAATAGAATAGGTTAAAATTAAAGGAGATCAGATCAGATCAGATCTATCTATCTATCTATCTATCAGAAAAGAGTAAATAGTTAAAGAGTAAGATATTGAATTAATGGAAAATATTGTTGGGGAAAGGAGGGATAGAAGG
下面是10行大型CSV文件:
name,AGATC,TTTTTTCT,AATG,TCTAG,GATA,TATC,GAAA,TCTG
Albus,15,49,38,5,14,44,14,12
Cedric,31,21,41,28,30,9,36,44
Draco,9,13,8,26,15,25,41,39
Fred,37,40,10,6,5,10,28,8
Ginny,37,47,10,23,5,48,28,23
Hagrid,25,38,45,49,39,18,42,30
Harry,46,49,48,29,15,5,28,40
Hermione,43,31,18,25,26,47,31,36
James,46,41,38,29,15,5,48,22
以下是指向PSET6 DNA规范的链接: https://cs50.harvard.edu/x/2020/psets/6/dna/#:~:text=python%20dna.py%20databases/small.csv%20sequences/1.txt
目前没有回答
相关问题 更多 >
编程相关推荐