<p>下面的程序解决了原来的问题。也许有一种更有效的算法,但我认为这个算法相当快。在</p>
<p>修改此代码以处理更新版本中更复杂的dict应该不太困难。在</p>
<p>(我使用的是Python2.6,所以没有dict理解,这就是为什么我使用生成器表达式构建dict)。在</p>
<p><strong>合并_列表.py</strong></p>
<pre><code>#! /usr/bin/env python
''' Merge lists in a dict
Lists are merged if they have any element in common,
so that in the resulting dict no list element will be
associated with more than one key.
Written by PM 2Ring 2014.11.18
From http://stackoverflow.com/q/26972204/4014959
'''
#Some test data
groups = {
'g01': ['a', 'b', 'c', 'd'],
'g02': ['a', 'b', 'c', 'd', 'e'],
'g03': ['f', 'g', 'h'],
'g04': ['g', 'j'],
#'g05': ['g', 'a'],
'g06': ['k', 'l'],
'g07': ['l', 'm'],
'g08': ['m', 'n'],
'g09': ['o', 'p'],
'g10': ['p', 'q'],
'g11': ['q', 'o'],
#'g12': ['l', 'q'],
}
def merge_lists(d):
src = dict((k, set(v)) for k, v in d.iteritems())
while True:
dest = {}
count = 0
while src:
k1, temp = src.popitem()
if temp is None:
continue
for k2, v in src.iteritems():
if v is None:
continue
if temp & v:
temp |= v
src[k2] = None
count += 1
k1 = min(k1, k2)
dest[k1] = temp
if count > 0:
#print count
#print_dict(dest)
src = dest
else:
dest = dict((k, sorted(list(v))) for k, v in dest.iteritems())
return dest
def print_dict(d):
for k in sorted(d.keys()):
print "%s: %s" % (k, d[k])
print
def main():
print_dict(groups)
print 20*'-'
dest = merge_lists(groups)
print_dict(dest)
if __name__ == '__main__':
main()
</code></pre>
<p><strong>输出</strong></p>
^{pr2}$
<hr/>
<p>这是一个在更新的dict结构上工作的版本。在</p>
<pre><code>#! /usr/bin/env python
''' Merge lists in a dict
Lists are merged if they have any element in common,
so that in the resulting dict no list element will be
associated with more than one key.
The key of the merged item is selected from the sub-dict with the lowest
value of oldest_node.
Written by PM 2Ring 2014.11.21
From http://stackoverflow.com/q/26972204/4014959
'''
#Some test data
groups = {
'group1': {'IDs': ['a','b','c','d'], 'oldest_node': 'node_30'},
'group2': {'IDs': ['c','d','e'], 'oldest_node': 'node_40'},
'group3': {'IDs': ['h','k'], 'oldest_node': 'node_2'},
'group4': {'IDs': ['z','w','x','j'], 'oldest_node': 'node_6'},
'group5': {'IDs': ['h','z'], 'oldest_node': 'node_9'},
}
def merge_lists(d):
#Convert IDs to a set and oldest_node to an int
src = {}
for k, v in d.iteritems():
src[k] = {
'IDs': set(v['IDs']),
'oldest_node': int(v['oldest_node'][5:])
}
#print_dict(src)
while True:
dest = {}
count = 0
while src:
k1, temp = src.popitem()
if temp is None:
continue
for k2, v in src.iteritems():
if v is None:
continue
if temp['IDs'] & v['IDs']:
#Merge IDs
temp['IDs'] |= v['IDs']
#Determine key of merge from oldest_node
if v['oldest_node'] < temp['oldest_node']:
k1 = k2
temp['oldest_node'] = v['oldest_node']
src[k2] = None
count += 1
dest[k1] = temp
src = dest
#Exit loop if no changes occured
if count == 0:
break
else:
#print count
#print_dict(src)
pass
#Convert dict back to original form
dest = {}
for k, v in src.iteritems():
dest[k] = {
'IDs': sorted(list(v['IDs'])),
'oldest_node': 'node_%d' % v['oldest_node']
}
return dest
def print_dict(d):
for k in sorted(d.keys()):
print "%s: %s" % (k, d[k])
print
def main():
print_dict(groups)
print 20*'-'
dest = merge_lists(groups)
print_dict(dest)
if __name__ == '__main__':
main()
</code></pre>
<p><strong>输出</strong></p>
<pre><code>group1: {'IDs': ['a', 'b', 'c', 'd'], 'oldest_node': 'node_30'}
group2: {'IDs': ['c', 'd', 'e'], 'oldest_node': 'node_40'}
group3: {'IDs': ['h', 'k'], 'oldest_node': 'node_2'}
group4: {'IDs': ['z', 'w', 'x', 'j'], 'oldest_node': 'node_6'}
group5: {'IDs': ['h', 'z'], 'oldest_node': 'node_9'}
group1: {'IDs': ['a', 'b', 'c', 'd', 'e'], 'oldest_node': 'node_30'}
group3: {'IDs': ['h', 'j', 'k', 'w', 'x', 'z'], 'oldest_node': 'node_2'}
</code></pre>