有效计算频繁对的出现次数

def get_sup(x): x_sup = 0 for t in shared_itemset.value: if x.issubset(t): #if set({x}).issubset(set(t)): x_sup +=1 if x_sup >= sup: return x, x_sup else: return () data = sc.textFile('browsing.txt') itemset = data.map(lambda line: ([item for item in line.strip().split(' ')])) shared_itemset = sc.broadcast(itemset.map(lambda x: frozenset(x)).collect()) c1 = data.flatMap(lambda line: line.strip().split(' ')).distinct() c1 = c1.map(lambda x : frozenset({x})) print(c1.count())----------------------------------------------- 12592 f1 = c1.map(get_sup).filter(lambda x: x) print(f1.count())----------------------------------------------- 735 f1.persist() f1m = f1.map(lambda x: tuple(x[0])) f1c = f1m.cartesian(f1m).map(lambda x: frozenset(x[0]+x[1])).filter(lambda x: len(x)>1).distinct() # f1c has 269745 elements and has data structure frozenset({'FRO11987', 'GRO73461'}) f2 = f1c.map(get_sup).filter(lambda x: x) pairs

1条回答

网友

1楼 · 发布于 2024-09-27 21:26:06

这个解决方案可能有效：对于每个篮子，它计算其元素的可能对，然后检查pair是否在这些元素中。然后输出包含所述对的篮子的百分比

from itertools import permutations
b1 = [1, 2, 3, 4]
b2 = [4, 5, 6]
baskets = [b1, b2]
pair = [1, 2]

def finder(baskets, pair):
    baskets_asstr = [''.join(map(str, i)) for i in baskets]
    pairsOfEveryBasket = [["".join(i) for i in permutations(baskets_asstr[j], 2)] for j in range(len(baskets_asstr))]
    zipped = list(zip(baskets_asstr, pairsOfEveryBasket))
    pair_str = "".join([str(i) for i in pair])
    num = 0
    for i in zipped:
        if pair_str in i[1]:
            num += 1
    return (num/len(baskets_asstr)

finder(baskets, pair)

相关问题更多 >

编程相关推荐

热门问题

热门文章