向量化lis

def tfc(slova): import math as m meshokslov1=[{}for i in range(len(slova))] meshokslov2=[{}for i in range(len(slova))] SummaKvadratov=0 for i in range(len(slova)): for j in range(len(slova[i])): n=0 for q in slova: if q.count(slova[i][j])!=0: n+=1 if slova[i][j] in meshokslov1: continue else: meshokslov1[i][slova[i][j]]=slova[i].count(slova[i][j])*m.log10(len(slova)/n) SummaKvadratov+=(slova[i].count(slova[i][j])*m.log10(len(slova)/n))**2 for i in range(len(slova)): for j in range(len(slova[i])): if slova[i][j] in meshokslov2: continue else: meshokslov2[i][slova[i][j]]=meshokslov1[i][slova[i][j]]/(SummaKvadratov**0.5) return meshokslov2

1条回答

网友

1楼 · 发布于 2024-09-27 04:19:56

以下是针对您的问题的自上而下设计的解决方案：

import math

def frequency(term, document):
    return document.count(term) / len(document)

def totalNumOfDocuments(inList):
    return len(inList)

def numOfDocumentsForTerm(inList, term):
    return len([doc for doc in inList if term in doc])

def TFCWeighting(inList):

    tfc = []

    N = totalNumOfDocuments(inList)

    for document in inList:

        temp = []
        for term in document:

            #numerator
            freq = frequency(term, document)
            n = numOfDocumentsForTerm(inList, term)
            logarithm = math.log(N/n)
            numerator = freq * logarithm

            #denominator
            summation = sum([(frequency(t, document) * math.log(N / numOfDocumentsForTerm(inList, term))) ** 2 for t in document])
            denominator = math.sqrt(summation)

            temp.append(round(numerator / denominator,3))

        tfc.append(temp)

    return tfc

l1=[['can','help','you'],['thank','you'],['help','help','help','help','help','help']]
l2=[['I','help'],['will','you']]

print(TFCWeighting(l1))
print(TFCWeighting(l2))

输出：

[[0.577, 0.577, 0.577], [0.707, 0.707], [0.408, 0.408, 0.408, 0.408, 0.408, 0.408]]
[[0.707, 0.707], [0.707, 0.707]]

相关问题更多 >

编程相关推荐

热门问题

热门文章

向量化lis

相关问题 更多 >

编程相关推荐

热门问题

热门文章

相关问题更多 >