查找从叶到林中每个节点的所有路径

[[ANALYTICAL_BALANCE,BFG_DEPOSIT], [CUSTOMER_DETAIL,BALANCE], [BFG_2056, FFD_15], [BALANCE,BFG_16], [BFG_16,STAT_HIST], [ANALYTICAL_BALANCE,BFG_2056], [CUSTOM_DATA,AND_11], [AND_11,DICT_DEAL], [DICT_DEAL,BFG_2056]]

def create_adj(edges): adj = {} # or use defaultdict(list) to avoid `if` in the loop below for a, b in edges: if not a in adj: adj[a] = [] if not b in adj: adj[b] = [] adj[a].append(b) return adj

def all_paths(adj): def recur(path): node = path[-1] neighbors = [neighbor for neighbor in adj[node] if not neighbor in path] if not neighbors: yield path for neighbor in neighbors: yield from recur(path + [neighbor]) for node in adj: yield from recur([node])

data = [ ["ANALYTICAL_BALANCE","BFG_DEPOSIT"], ["CUSTOMER_DETAIL","BALANCE"], ["BFG_2056", "FFD_15"], ["BALANCE","BFG_16"], ["BFG_16","STAT_HIST"], ["ANALYTICAL_BALANCE","BFG_2056"], ["CUSTOM_DATA","AND_11"], ["AND_11","DICT_DEAL"], ["DICT_DEAL","BFG_2056"] ] adj = create_adj(data) print([path for path in all_paths(adj) if len(path) > 1]) [ANALYTICAL_BALANCE,BFG_DEPOSIT] [CUSTOMER_DETAIL,BALANCE,BFG_16,STAT_HIST] [BFG_2056,FFD_15] [BALANCE,BFG_16,STAT_HIST] [ANALYTICAL_BALANCE,BFG_2056,FFD_15] [CUSTOM_DATA,AND_11,DICT_DEAL,BFG_2056,FFD_15] [AND_11,DICT_DEAL,BFG_2056,FFD_15] [DICT_DEAL,BFG_2056,FFD_15]

Tree1: ANALYTICAL_BALANCE BFG_DEPOSIT Tree2: ANALYTICAL_BALANCE BFG_2056 ANALYTICAL_BALANCE FFD_15 CUSTOM_DATA AND_11 CUSTOM_DATA DICT_DEAL CUSTOM_DATA BFG_2056 CUSTOM_DATA FFD_15 Tree3: CUSTOMER_DETAIL BALANCE CUSTOMER_DETAIL BFG_16 CUSTOMER_DETAIL STAT_HIST

# create adjacencies def create_adj(edges): adj = {} for a, b in edges: if not a in adj: adj[a] = [] if not b in adj: adj[b] = [] adj[a].append(b) return adj # find all paths def all_paths(adj): def recur(path): node = path[-1] neighbors = [neighbor for neighbor in adj[node] if not neighbor in path] if not neighbors: yield path for neighbor in neighbors: yield from recur(path + [neighbor]) for node in adj: yield from recur([node]) # delete the connections from list def conn_deletion(list, list_deletion): after_del = [x for x in list if x[0] not in list_deletion] return after_del # get paths where len of path is > 2 and save them as a leaf to node. Also save connections to deletion. def unpack_paths(my_list): list_of_more_succ = [] to_deletion = [] for item in my_list: if len(item) == 1: print("len 1",item) to_deletion.append(item[0]) elif len(item) > 2: for i in range(1, len(item) - 1): to_deletion.append(item[i]) print("len > 2", item[i]) if [item[0], item[i]] in list_of_more_succ: pass else: list_of_more_succ.append([item[0], item[i]]) list_concat = my_list + list_of_more_succ sorted_list = list(k for k, _ in itertools.groupby(list_concat)) final = conn_deletion(sorted_list, list(dict.fromkeys(to_deletion))) return final data = [line.split() for line in open('data.txt', encoding='utf-8')] adj = create_adj(data) print(adj) workbook = Workbook() sheet = workbook.active sheet["A1"] = "Source" sheet["B1"] = "Child" loaded = list(all_paths(adj)) final_edited = unpack_paths(loaded) # Save data to excel file. We don't want paths with len == 1 or > 2. for row, item in enumerate(final_edited, start=2): if len(item) > 2: pass elif len(item) == 1: pass else: sheet[f"A{row}"] = item[0] sheet[f"B{row}"] = item[1] workbook.save("DataMap.xlsx")

1条回答

网友
1楼 · 发布于 2024-05-06 11:48:36

我建议将all_paths更改为leaf_paths，这意味着它只会生成从叶子开始的路径
然后使用这些路径：
标识它所指向的根（路径中的最后一个元素）
标识叶（路径中的第一个元素）
迭代该路径中的所有非叶，并将它们与叶成对组合
将这些对存储在由根键入的字典中
下面是如何在两个标有注释的地方更改all_paths：
def leaf_paths(adj): def recur(path): node = path[-1] neighbors = [neighbor for neighbor in adj[node] if not neighbor in path] if not neighbors: yield path for neighbor in neighbors: yield from recur(path + [neighbor]) # identify the internal nodes (not leaves) internals = set(parent for parents in adj.values() for parent in parents) for node in adj: if not node in internals: # require that the starting node is a leaf yield from recur([node])
然后添加此函数：
def all_leaf_pairs(paths): trees = {} for path in paths: if len(path) > 1: root = path[-1] if not root in trees: trees[root] = [] it = iter(path) leaf = next(it) trees[root].extend((leaf, node) for node in it) return trees
你的主程序可以：
data = [ ["ANALYTICAL_BALANCE","BFG_DEPOSIT"], ["CUSTOMER_DETAIL","BALANCE"], ["BFG_2056", "FFD_15"], ["BALANCE","BFG_16"], ["BFG_16","STAT_HIST"], ["ANALYTICAL_BALANCE","BFG_2056"], ["CUSTOM_DATA","AND_11"], ["AND_11","DICT_DEAL"], ["DICT_DEAL","BFG_2056"] ] adj = create_adj(data) paths = leaf_paths(adj) import pprint pprint.pprint(all_leaf_pairs(paths))
这将输出：
{'BFG_DEPOSIT': [('ANALYTICAL_BALANCE', 'BFG_DEPOSIT')], 'FFD_15': [('ANALYTICAL_BALANCE', 'BFG_2056'), ('ANALYTICAL_BALANCE', 'FFD_15'), ('CUSTOM_DATA', 'AND_11'), ('CUSTOM_DATA', 'DICT_DEAL'), ('CUSTOM_DATA', 'BFG_2056'), ('CUSTOM_DATA', 'FFD_15')], 'STAT_HIST': [('CUSTOMER_DETAIL', 'BALANCE'), ('CUSTOMER_DETAIL', 'BFG_16'), ('CUSTOMER_DETAIL', 'STAT_HIST')]}
解释leaf_paths
此函数使用递归。它在其作用域中定义了recur
但主代码从识别内部节点（即至少有一个子节点）开始。由于adj为给定节点提供父节点，因此我们只需收集所有这些父节点
我们使用这组内部节点来确保只在叶节点上启动递归，就像在输出中我们希望路径总是以叶节点开始一样
recur函数将从给定的叶向它能找到的任何根移动。它使用它可以找到的下一个父级（neighbor）扩展路径，并执行递归，直到没有更多的父级（即，它是根）。当这种情况发生时，累积路径（以叶子开始，以根结束）将产生
leaf_paths本身产生recur产生的任何路径

解释`leaf_paths`

相关问题更多 >

编程相关推荐

热门问题

热门文章