# create the lookup table
lookup_dict = {}
for group in indices:
lookup_dict[group] = sorted(indices[group].keys(),
lambda e1, e2: indices[group][e1]-indices[group][e2])
import collections
def create_lookup_list(messages, labels):
# Collect all the values
lookup = collections.defaultdict(set)
for msg in messages:
for l, v in zip(labels, msg):
lookup[l].add(v)
# Make the value sets lists
for k, v in lookup.items():
lookup[k] = list(v)
# Make the lookup_list
lookup_list = []
for msg in messages:
lookup_list.append([lookup[l].index(v) for l, v in zip(labels, msg)])
return lookup_list, lookup
from itertools import count
from collections import defaultdict
def create_lookup_list(data, domains):
domain_keys = defaultdict(lambda:defaultdict(count().next))
out = []
for row in data:
out.append(tuple(domain_keys[dom][val] for val, dom in zip(row, domains)))
lookup_table = dict((k, sorted(d, key=d.get)) for k, d in domain_keys.items())
return out, lookup_table
在Otto的答案(或其他任何使用string->;id dicts的人)中,我将替换(如果您喜欢超速):
通过
^{pr2}$这是更好的,因为直接分配给逆数组中的每个项比排序快。在
我的长度和复杂性差不多:
defaultdict
与itertools.count().next
方法相结合是将标识符分配给唯一项的好方法。下面是一个如何在您的案例中应用的示例:编辑:注意在python3中,}。在
count().next
变成了count().__next__
或{相关问题 更多 >
编程相关推荐