Python:我的“匹配”函数的输入变量

2024-10-01 07:30:58 发布

您现在位置:Python中文网/ 问答频道 /正文

我有很多清单,比如:

ABCC8 = ['TRIM29', 'IGL@', 'DOCK6', 'SVEP1', 'S100A11', 'EPHA2', 'KLHL7', 'ANXA3', 'NAB1', 'CELF2', 'EDNRB', 'PLAGL1', 'IL6ST', 'S100A8', 'CKLF', 'TIPARP', 'CDH3', 'MAP3K8', 'LYST', 'LEPR', 'FHL2', 'ARL4C', 'IL1RN', 'ESR1', 'CD93', 'ATP2B4', 'KAT2B', 'ELOVL5', 'SCD', 'SPTBN1', 'AKAP13', 'LDLR', 'ADRB2', 'LTBP4', 'TGM2', 'TIMP3', 'RAN', 'LAMA3', 'ASPH', 'ID4', 'STX11', 'CNN2', 'EGR1']

APP = ['GULP1', 'PREPL', 'FHL1', 'METTL7A', 'TRIM13', 'YPEL5', 'PTEN', 'FAM190B', 'GSN', 'UBL3', 'PTGER3', 'COBLL1', 'EPB41L3', 'KLF4', 'BCL2L2', 'CYLD', 'SLK', 'ENSA', 'SKAP2', 'FBXO3', 'PDCD4', 'ATP2A2', 'AKAP11', 'PAFAH1B1', 'RALGAPA1', 'YWHAZ', 'BNIP3L', 'ATP8A1', 'TNXB', 'DICER1', 'C17orf91', 'BEX4', 'PPM1A', '2017-09-10', 'NDRG2', 'NCOA1', 'NAB1', 'STX7', 'ZFAND5', 'CD47', 'SFRS5', 'CLASP2', 'PBX1', 'NR3C1', 'ABCA8', 'ETFDH', 'RBPMS', 'FOXO1', 'KLF6', 'ADH1B', 'RAB22A', 'CCNG2', 'NFIB', 'IDS', 'NR3C2', 'MAF', 'NDEL1', 'EZR', 'PCDH9', 'KIAA0494', 'CITED2', 'MGEA5', 'RUFY3', 'ALDH3A2', 'N4BP2L2', 'EPS15', 'TSPAN5', 'SNRPN', 'SSBP2', 'ELOVL5', 'C5orf4', 'FOXN3', 'ABCA5', 'SEC62', 'PELI1', 'MYCBP2', 'USP15', 'TACC1', 'SHMT1', 'RNF103', 'CDC14B', 'SYNE1', 'NDN', 'PHKB', 'EIF1', 'TROVE2', 'MBD4', 'GAB1']

BECN1 = ['LMNA', 'NHP2L1', 'IDS', 'ATP6V0B', 'ENSA', 'TBCB', 'NDUFA13', 'TOLLIP', 'PLEKHB2', 'MBOAT7', 'C16orf13', 'PGAM1', 'MIF', 'ACTR1A', 'OAZ1', 'GNAS', 'ARF1', 'MAPKAPK3', 'LCMT1', 'ATP6V1D', 'FLOT1', 'PRR13', 'COX5B', 'PGP', 'CYB561', 'CNIH4', 'COX6B1', 'ARPC5L', 'NCKIPSD', 'C9orf16', 'LSM4', 'ATP5L', 'C14orf2', 'AURKAIP1', 'MRPL41', 'PDPK1', 'NOP10', 'CANT1', 'CALM3', 'PSEN2', 'C9orf86', 'ATP6V0E1', 'PIN1', 'LARP1', 'HTATIP2', 'PPP1R7', 'HCFC1R1', 'UQCR10', 'FAM134A', 'GPAA1', 'THY1', 'PPM1A', 'NAPA', 'NDUFC2', 'EPS8L1', 'PSME2', 'UBE2M', 'ORMDL2', 'TCEB2', 'RMND5B', 'ATPIF1', 'RNF19B', 'PEBP1', 'PCBP2', 'GHITM', 'AP3S2', 'TSPAN5', 'AP2S1', 'C20orf24', 'RABIF', 'NDUFB2', 'PFDN2', 'GPR172A', 'RTN4', 'GAPDH', 'MAPK13', 'FKBP8', 'PTGER3', 'BSCL2', 'TUBG1', 'FAM162A', 'GDI1', 'SPTLC2', 'YWHAZ', 'BCAP31', 'OSBPL1A', 'ATP6AP1', 'CALM1', 'PEX16', 'MYCBP2']

ARNTL = ['NCAM1', 'SLC11A2', 'RPL35A', 'PDLIM5', 'RPL31', 'NFIB', 'GYG2', 'IGHG1', 'NAAA', 'DLC1', 'EPOR', 'DIO2', 'ESR1', 'KLK10', 'CYP2C9', 'SPN', 'RPS9', 'PRELP', 'CYP3A43', 'PLAGL1', 'COBLL1', 'ADCK2', 'RPL13', 'NRP2', 'SCEL', 'DOCK6', 'NENF', 'MLLT4', 'SERPINB13', 'PALMD', 'TMEM132A', 'ASAP3', 'MTAP', 'NOVA1', 'ALOX12', 'SPINK5', 'LDB3', 'ATP5S', 'LMNA', 'BAIAP2', 'FZD4', 'GNAS', 'OBSL1', 'TCL6', 'ICOSLG', 'MACROD2', 'MAST4', 'EDA', 'ADAM22', 'CSHL1', 'SYNGR1', 'THBS1', 'PEX16', 'NOS1', 'SLCO1A2', 'CYP2A7', 'PRDM2', 'DTNA', 'HSD17B4', 'RPL29', 'PDCD4', 'IL1RN', 'CASZ1', 'C9orf16', 'RGS12', 'TRD@', 'ATP1A2', 'MPRIP', 'PDE4C', 'SPTLC2', 'TNXB', 'DDAH2', 'AOX1', 'PAIP2B', 'HNF4A', 'GLS', 'EMP1', 'ARHGEF4', 'FUT6', 'ACACB', 'NR5A2', 'N4BP2L1', 'APAF1', 'DSC2', 'EDNRB', 'RPL27A', 'CYP2C18']

I have a function which returns me the number of matches of same strings among the different lists of strings compared to my reference (`ref`) list.

    def sort_by_matches(ref, lists):
        reference = set(ref)
        lists = sorted([[len(reference.intersection(set(l))), name, l] for name, l in lists], key=lambda x: (x[0], -len(x[2])), reverse=True)
        for matches, name, a_list in lists:
            print("Matches {} in {}".format(matches, name))

    sort_by_matches(APP, [("ABCC8", ABCC8), ("APP", APP), ("BECN1", BECN1), ("ARNTL", ARNTL), ("BMI1", BMI1), ("CASP8", CASP8), ("CASP9", CASP9), ("CLOCK", CLOCK), ("CRAT", CRAT), ("CRY2", CRY2), ("CSF1", CSF1), ("CTCF", CTCF), ("DNMT1", DNMT1), ("EP300", EP300), ("FBXW7", FBXW7), ("FOXA1", FOXA1), ("FOXO1", FOXO1), ("FOXO3", FOXO3), ("GADD34", GADD34), ("GATA3", GATA3), ("GCK", GCK), ("GLI1", GLI1), ("GLP1", GLP1), ("GLP1R", GLP1R), ("GLUT1", GLUT1),("GLUT2", GLUT2),("HES1", HES1),("HEY1", HEY1),("HIF1A", HIF1A),("HNF1A", HNF1A),("HNF4A", HNF4A),("ICMT", ICMT),("ID1", ID1),("IDH1", IDH1),("IL4", IL4),("IL6", IL6),("LC3A", LC3A),("LYL1", LYL1),("MFSD2A", MFSD2A),("MOAP1", MOAP1),("MTNR1B", MTNR1B),("MTOR", MTOR),("MYF5", MYF5),("MYOD1", MYOD1),("MSTN", MSTN),("NANOG", NANOG),("NOTCH1", NOTCH1),("NR1D1", NR1D1),("POU5F1", POU5F1),("PAX7", PAX7),("PDK1", PDK1),("PER2", PER2),("PHF6", PHF6),("PRMT5", PRMT5),("PSEN1", PSEN1),("PSEN2", PSEN2),("PTCH1", PTCH1),("RMST", RMST),("RUNX1", RUNX1),("SETD2", SETD2),("SIN3A", SIN3A),("SOCS1", SOCS1),("SOX2", SOX2),("STAT3", STAT3),("STK11", STK11),("TAF1", TAF1),("TCF3", TCF3),("TEAD1", TEAD1), ("TERT", TERT),("RANKL", RANKL),("TOP2A", TOP2A), ("TOX3", TOX3), ("TRIM28", TRIM28), ("TSHZ2", TSHZ2), ("TSHZ3", TSHZ3), ("TSP1", TSP1), ("TWIST1", TWIST1), ("FN1", FN1), ("VHL", VHL), ("WLS", WLS), ("WNT3", WNT3), ("WNT3A", WNT3A), ("WNT5A", WNT5A), ("WT1", WT1), ("YAP1", YAP1), ('MYBPC3', MYBPC3),  ("PPARG", PPARG), ("NKD1", NKD1), ("LRP5", LRP5), ("SMO", SMO), ("CSNK1E", CSNK1E), ("DKK1", DKK1), ("MYH7", MYH7), ("AXIN2", AXIN2), ("TCF7", TCF7), ("NEUROD1", NEUROD1), ("FZD5", FZD5), ("FZD8", FZD8), ("CREB1", CREB1), ("TCF7L2", TCF7L2), ("SOX17", SOX17), ("TP53", TP53), ("PTGER3", PTGER3), ("FERMT2", FERMT2), ("WNT1", WNT1), ("WNT7B", WNT7B), ("MDM4", MDM4), ("IL10", IL10 ),  ("DVL1", DVL1 ), ("PGR", PGR), ("TSC1", TSC1), ("ASCL2", ASCL2)])

如何在函数sort_by_matches(ref, lists)中为ref使用'input'变量,而不是每次用不同的引用复制粘贴不同的函数。你知道吗

复制粘贴上述匹配函数太长,因为我有数百个列表。我怎样才能解决这个问题?你知道吗


Tags: ofnamerefappsortlistsreferencematches
1条回答
网友
1楼 · 发布于 2024-10-01 07:30:58

可能有一种更简洁的方法可以做到这一点,因为我是Python新手,但我做了以下修改:

已定义的列表放入元组列表中,元组中的第一项表示列表的名称,第二项引用已定义的列表(类似于已调用函数的方式):

myLists = [("ABCC8", ABCC8), ("APP", APP), ("BECN1", BECN1), ("ARNTL", ARNTL)]

我修改了ref参数,因此函数sort_by_matches需要该参数的元组(这样,如果需要,可以引用列表的名称):

def sort_by_matches(ref, lists):
    reference = set(ref[1])
    lists = sorted([[len(reference.intersection(set(l))), name, l] for name, l in lists], key=lambda x: (x[0], -len(x[2])), reverse=True)
    for matches, name, a_list in lists:
        print("{} Matches {} in {}".format(ref[0],matches, name))

接下来,在myLists上循环,调用sort_by_matches以获取引用列表及其后面的所有列表:

i = 0
while(i < len(myLists) - 1):
    sort_by_matches(myLists[i], myLists[i + 1:])
    i = i + 1

这会将每个列表与其他列表进行比较。你知道吗

输出如下所示:

ABCC8 Matches 5 in ARNTL
ABCC8 Matches 2 in APP
ABCC8 Matches 0 in BECN1
APP Matches 7 in BECN1
APP Matches 4 in ARNTL
BECN1 Matches 5 in ARNTL


编辑

您需要一种方法以某种方式引用所有列表,因为它们包含在代码中。下面我将对列表的引用存储在变量MY_LISTS中。每次在程序中定义另一个列表时,只需向该变量添加一个条目。(不管您决定如何实现,因为列表是在代码中显式定义的,所以需要有一个引用列表的集合,以避免每次调用函数时都显式引用所有列表。)

尚未对sort_by_matches函数进行任何修改。相反,在while循环中,当前引用列表的名称被打印出来,这样您就可以知道其他列表与哪个列表进行比较。你知道吗

循环从MY_LISTS中的第一个列表开始,将其与MY_LISTS中的所有后续列表进行比较。在循环的每次迭代中,引用列表仅与MY_LISTS中的后续列表进行比较。这使得每个可能的列表对只执行一次比较。你知道吗

ABCC8 = ['TRIM29', 'IGL@', 'DOCK6', 'SVEP1', 'S100A11', 'EPHA2', 'KLHL7', 'ANXA3', 'NAB1', 'CELF2', 'EDNRB', 'PLAGL1', 'IL6ST', 'S100A8', 'CKLF', 'TIPARP', 'CDH3', 'MAP3K8', 'LYST', 'LEPR', 'FHL2', 'ARL4C', 'IL1RN', 'ESR1', 'CD93', 'ATP2B4', 'KAT2B', 'ELOVL5', 'SCD', 'SPTBN1', 'AKAP13', 'LDLR', 'ADRB2', 'LTBP4', 'TGM2', 'TIMP3', 'RAN', 'LAMA3', 'ASPH', 'ID4', 'STX11', 'CNN2', 'EGR1']

APP = ['GULP1', 'PREPL', 'FHL1', 'METTL7A', 'TRIM13', 'YPEL5', 'PTEN', 'FAM190B', 'GSN', 'UBL3', 'PTGER3', 'COBLL1', 'EPB41L3', 'KLF4', 'BCL2L2', 'CYLD', 'SLK', 'ENSA', 'SKAP2', 'FBXO3', 'PDCD4', 'ATP2A2', 'AKAP11', 'PAFAH1B1', 'RALGAPA1', 'YWHAZ', 'BNIP3L', 'ATP8A1', 'TNXB', 'DICER1', 'C17orf91', 'BEX4', 'PPM1A', '2017-09-10', 'NDRG2', 'NCOA1', 'NAB1', 'STX7', 'ZFAND5', 'CD47', 'SFRS5', 'CLASP2', 'PBX1', 'NR3C1', 'ABCA8', 'ETFDH', 'RBPMS', 'FOXO1', 'KLF6', 'ADH1B', 'RAB22A', 'CCNG2', 'NFIB', 'IDS', 'NR3C2', 'MAF', 'NDEL1', 'EZR', 'PCDH9', 'KIAA0494', 'CITED2', 'MGEA5', 'RUFY3', 'ALDH3A2', 'N4BP2L2', 'EPS15', 'TSPAN5', 'SNRPN', 'SSBP2', 'ELOVL5', 'C5orf4', 'FOXN3', 'ABCA5', 'SEC62', 'PELI1', 'MYCBP2', 'USP15', 'TACC1', 'SHMT1', 'RNF103', 'CDC14B', 'SYNE1', 'NDN', 'PHKB', 'EIF1', 'TROVE2', 'MBD4', 'GAB1']

BECN1 = ['LMNA', 'NHP2L1', 'IDS', 'ATP6V0B', 'ENSA', 'TBCB', 'NDUFA13', 'TOLLIP', 'PLEKHB2', 'MBOAT7', 'C16orf13', 'PGAM1', 'MIF', 'ACTR1A', 'OAZ1', 'GNAS', 'ARF1', 'MAPKAPK3', 'LCMT1', 'ATP6V1D', 'FLOT1', 'PRR13', 'COX5B', 'PGP', 'CYB561', 'CNIH4', 'COX6B1', 'ARPC5L', 'NCKIPSD', 'C9orf16', 'LSM4', 'ATP5L', 'C14orf2', 'AURKAIP1', 'MRPL41', 'PDPK1', 'NOP10', 'CANT1', 'CALM3', 'PSEN2', 'C9orf86', 'ATP6V0E1', 'PIN1', 'LARP1', 'HTATIP2', 'PPP1R7', 'HCFC1R1', 'UQCR10', 'FAM134A', 'GPAA1', 'THY1', 'PPM1A', 'NAPA', 'NDUFC2', 'EPS8L1', 'PSME2', 'UBE2M', 'ORMDL2', 'TCEB2', 'RMND5B', 'ATPIF1', 'RNF19B', 'PEBP1', 'PCBP2', 'GHITM', 'AP3S2', 'TSPAN5', 'AP2S1', 'C20orf24', 'RABIF', 'NDUFB2', 'PFDN2', 'GPR172A', 'RTN4', 'GAPDH', 'MAPK13', 'FKBP8', 'PTGER3', 'BSCL2', 'TUBG1', 'FAM162A', 'GDI1', 'SPTLC2', 'YWHAZ', 'BCAP31', 'OSBPL1A', 'ATP6AP1', 'CALM1', 'PEX16', 'MYCBP2']

ARNTL = ['NCAM1', 'SLC11A2', 'RPL35A', 'PDLIM5', 'RPL31', 'NFIB', 'GYG2', 'IGHG1', 'NAAA', 'DLC1', 'EPOR', 'DIO2', 'ESR1', 'KLK10', 'CYP2C9', 'SPN', 'RPS9', 'PRELP', 'CYP3A43', 'PLAGL1', 'COBLL1', 'ADCK2', 'RPL13', 'NRP2', 'SCEL', 'DOCK6', 'NENF', 'MLLT4', 'SERPINB13', 'PALMD', 'TMEM132A', 'ASAP3', 'MTAP', 'NOVA1', 'ALOX12', 'SPINK5', 'LDB3', 'ATP5S', 'LMNA', 'BAIAP2', 'FZD4', 'GNAS', 'OBSL1', 'TCL6', 'ICOSLG', 'MACROD2', 'MAST4', 'EDA', 'ADAM22', 'CSHL1', 'SYNGR1', 'THBS1', 'PEX16', 'NOS1', 'SLCO1A2', 'CYP2A7', 'PRDM2', 'DTNA', 'HSD17B4', 'RPL29', 'PDCD4', 'IL1RN', 'CASZ1', 'C9orf16', 'RGS12', 'TRD@', 'ATP1A2', 'MPRIP', 'PDE4C', 'SPTLC2', 'TNXB', 'DDAH2', 'AOX1', 'PAIP2B', 'HNF4A', 'GLS', 'EMP1', 'ARHGEF4', 'FUT6', 'ACACB', 'NR5A2', 'N4BP2L1', 'APAF1', 'DSC2', 'EDNRB', 'RPL27A', 'CYP2C18']

MY_LISTS = [("ABCC8", ABCC8), ("APP", APP), ("BECN1", BECN1), ("ARNTL", ARNTL)]

def sort_by_matches(ref, lists):
    reference = set(ref)
    lists = sorted([[len(reference.intersection(set(l))), name, l] for name, l in lists], key=lambda x: (x[0], -len(x[2])), reverse=True)
    for matches, name, a_list in lists:
        print("Matches {} in {}".format(matches, name))

i = 0
while(i < len(MY_LISTS) - 1):
    print("Comparing Lists to " + MY_LISTS[i][0])
    sort_by_matches(MY_LISTS[i][1], MY_LISTS[i + 1:])
    i = i + 1

输出如下所示:

Comparing Lists to ABCC8
Matches 5 in ARNTL
Matches 2 in APP
Matches 0 in BECN1
Comparing Lists to APP
Matches 7 in BECN1
Matches 4 in ARNTL
Comparing Lists to BECN1
Matches 5 in ARNTL

相关问题 更多 >