使用Python从列表和词汇表构建数组

import numpy as np liste = ["a","b","c","d","e","f","g","h","i","j"] dico = {"a/b": 4, "c/d" : 2, "f/g" : 5, "g/h" : 2} #now i'd like to build a square array (liste x liste) and fill it up with the values of # my dict. def make_array(liste,dico): array1 = [] liste_i = [] #each line of the array for i in liste: if liste_i : array1.append(liste_i) liste_i = [] for j in liste: if dico.has_key(i+"/"+j): liste_i.append(dico[i+"/"+j]) elif dico.has_key(j+"/"+i): liste_i.append(dico[j+"/"+i]) else : liste_i.append(0) array1.append(liste_i) print array1 matrix = np.array(array1) print matrix.shape() print matrix return matrix make_array(liste,dico)

def clustering(matrix, liste_globale_occurences, output2): most_common_groups = [] Y = scipy.spatial.distance.pdist(matrix) Z = scipy.cluster.hierarchy.linkage(Y,'average', 'euclidean') scipy.cluster.hierarchy.dendrogram(Z) clust_h = scipy.cluster.hierarchy.fcluster(Z, t = 15, criterion='distance') print clust_h print len(clust_h) most_common = collections.Counter(clust_h).most_common(3) group1 = most_common[0][0] group2 = most_common[1][0] group3 = most_common[2][0] most_common_groups.append(group1) most_common_groups.append(group2) most_common_groups.append(group3) with open(output2, 'w') as results: # here the begining of the problem for group in most_common_groups: for i, val in enumerate(clust_h): if group == val: mise_en_page = "{0:36s} groupe co-occurences = {1:5s} \n" results.write(mise_en_page.format(str(liste_globale_occurences[i]),str(val)))

2条回答

网友

1楼 · 编辑于 2024-10-03 11:17:44

你可以创建一个矩阵mat=len（liste）*len（liste）的0，并通过你的dico和split键：“/”之前的val将是行数，“/”之后的val将是列数。这样您就不需要使用“has\u key”搜索功能。你知道吗

网友

2楼 · 编辑于 2024-10-03 11:17:44

您的问题看起来像一个O（n²），因为您需要从liste中获得所有的组合。所以你必须有一个内环。你知道吗

您可以尝试将每一行写入一个文件，然后在以后的新进程中，从该文件创建矩阵。新进程将使用更少的内存，因为它不必存储liste和dico的大量输入。比如说：

def make_array(liste,dico):
    f = open('/temp/matrix.txt', 'w')
    for i in liste:
        for j in liste:
            # This is just short circuit evaluation of logical or. It gets the first value that's not nothing
            f.write('%s ' % (dico.get(i+"/"+j) or dico.get(j+"/"+i) or 0))
        f.write('\n')
    f.close()
    return

一旦执行完毕，您就可以调用

print np.loadtxt('/temp/matrix.txt', dtype=int)

我使用了短路求值来减少if语句的代码行。实际上，如果您使用list comprehensions，您可以将make_array函数简化为：

def make_array(liste,dico):
    return np.array([[dico.get(i+"/"+j) or dico.get(j+"/"+i) or 0 for j in liste] for i in liste])

相关问题更多 >

编程相关推荐

热门问题

热门文章