将相似的dict条目分组为一个键元组

def tupelize_dict(data): from itertools import chain, combinations while True: rounds = [] for x in combinations(data.keys(), 2): rounds.append((x, data[x[0]], data[x[1]])) end = True for k, a, b in rounds: if a == b: k_chain = [x if isinstance(x, (tuple, list)) else [x] for x in k] data[tuple(sorted(chain.from_iterable(k_chain)))] = a [data.pop(r) for r in k] end = False break if end: break return data

3条回答

网友
1楼 · 编辑于 2024-10-02 06:29:20

就像你可以这样做一样。你知道吗
def tupelize_dict(ds): cache = {} for key, value in ds.items(): cache.setdefault(value, []).append(key) return {tuple(v): k for k, v in cache.items()} ds = {1: 'foo', 2: 'bar', 3: 'foo', 4: 'bar', 5: 'foo'} print(tupelize_dict(ds))

网友
2楼 · 编辑于 2024-10-02 06:29:20

像这样的事情应该可以做到：
>>> from collections import defaultdict >>> ds = {1: 'foo', ... 2: 'bar', ... 3: 'foo', ... 4: 'bar', ... 5: 'foo'} >>> >>> d = defaultdict(list) >>> for k, v in ds.items(): ... d[v].append(k) ... >>> res = {tuple(v): k for k, v in d.items()} >>> res {(1, 3, 5): 'foo', (2, 4): 'bar'}

网友
3楼 · 编辑于 2024-10-02 06:29:20

按照acushner的答案，如果我能计算数据集元素内容的哈希值，就有可能使它工作。你知道吗

import pickle
from collections import defaultdict

def tupelize_dict(ds):
    t = {}
    d = defaultdict(list)
    for k, v in ds.items():
        h = dumps(ds)
        t[h] = v
        d[h].append(k)

    return {tuple(v): t[k] for k, v in d.items()}

这个解决方案比我原来的提议快得多。你知道吗

为了测试它，我制作了一组大的随机嵌套字典，并在两个实现上运行cProfile：

original: 204.9 seconds
new:        6.4 seconds

编辑：

我意识到dumps不适用于某些字典，因为键的顺序可能会因模糊的原因而在内部发生变化（参见question）

一个解决方法是订购所有的dict：

import copy
import collections

def faithfulrepr(od):
    od = od.deepcopy(od)
    if isinstance(od, collections.Mapping):
        res = collections.OrderedDict()
        for k, v in sorted(od.items()):
            res[k] = faithfulrepr(v)
        return repr(res)
    if isinstance(od, list):
        for i, v in enumerate(od):
            od[i] = faithfulrepr(v)
        return repr(od)
    return repr(od)

def tupelize_dict(ds):
    taxonomy = {}
    binder = collections.defaultdict(list)
    for key, value in ds.items():
        signature = faithfulrepr(value)
        taxonomy[signature] = value
        binder[signature].append(key)
    def tu(keys):
        return tuple(sorted(keys)) if len(keys) > 1 else keys[0]
    return {tu(keys): taxonomy[s] for s, keys in binder.items()}

相关问题更多 >

编程相关推荐

热门问题

热门文章