在层次字典中优化python键搜索

class Species: '''This structure contains all the information needed for all genes. One specie have several genes, one gene several proteins''' def __init__(self, name): self.name = name #name of the GENE self.genes = {} def addProtein(self, gene, protname, len): #Converting a line from the input file into a protein and/or an exon if gene in self.genes: #Gene in the structure self.genes[gene].proteins[protname] = Protein(protname, len) self.genes[gene].updateProts() else: self.genes[gene] = Gene(gene) self.updateNgenes() self.genes[gene].proteins[protname] = Protein(protname, len) self.genes[gene].updateProts() def updateNgenes(self): #Updating the number of genes self.ngenes = len(self.genes.keys())

class Protein: #The class protein contains information about the length of the protein and a list with it's exons (with it's own attributes) def __init__(self, name, len): self.name = name self.len = len class Gene: #The class gene contains information about the gene and a dict with it's proteins (with it's own attributes) def __init__(self, name): self.name = name self.proteins = {} self.updateProts() def updateProts(self): #Update number of proteins self.nproteins = len(self.proteins)

1条回答

网友

1楼 · 发布于 2024-09-30 20:24:21

不能使用defaultdict，因为__init__方法需要参数。你知道吗

这可能是您的瓶颈之一：

def updateNgenes(self):
#Updating the number of genes
    self.ngenes = len(self.genes.keys())

len(self.genes.keys())在计算长度之前创建所有键的list。这意味着，每次你添加一个基因，你就创建一个列表，然后扔掉它。你拥有的基因越多，这个列表就越昂贵。要避免创建中间列表，只需执行len(self.genes)。你知道吗

更好的办法是将ngenes设为property，这样它只在需要时才计算出来。你知道吗

@property
def ngenes(self):
    return len(self.genes)

对于Gene类中的nproteins也可以这样做。你知道吗

以下是重构后的代码：

class Species:
    '''This structure contains all the information needed for all genes.
    One specie have several genes, one gene several proteins'''

    def __init__(self, name):
        self.name = name #name of the GENE
        self.genes = {}

    def addProtein(self, gene, protname, len):
        #Converting a line from the input file into a protein and/or an exon
        if gene not in self.genes:
            self.genes[gene] = Gene(gene) 
        self.genes[gene].proteins[protname] = Protein(protname, len)

    @property
    def ngenes(self):
        return len(self.genes)

class Protein:
    #The class protein contains information about the length of the protein and a list with it's exons (with it's own attributes)
    def __init__(self, name, len):
        self.name = name
        self.len = len

class Gene:
    #The class gene contains information about the gene and a dict with it's proteins (with it's own attributes)
    def __init__(self, name):
        self.name = name
        self.proteins = {}

    @property
    def nproteins(self):
        return len(self.proteins)

相关问题更多 >

编程相关推荐

热门问题

热门文章