import math, functools
def splitPairs(word):
return [(word[:i+1], word[i+1:]) for i in range(len(word))]
def segment(word):
if not word: return []
allSegmentations = [[first] + segment(rest)
for (first, rest) in splitPairs(word)]
return max(allSegmentations, key = wordSegFitness)
class OneGramDist(dict):
def __init__(self):
self.gramCount = 0
for line in open('Norvig Word Library.txt'):
(word, count) = line[:-1].split('\t')
self[word] = int(count)
self.gramCount += self[word]
def __call__(self, word):
if word in self:
return float(self[word]) / self.gramCount
else:
return 1.0/ self.gramCount
singleWordProb = OneGramDist()
def wordSegFitness(words):
return functools.reduce(lambda x,y: x+y),
(math.log10(singleWordProblem(w)) for w in words)
我正在尝试改进一些文本文件的分词。这些文件中的一些单词被连接起来(例如'howmuchdoesthecarcost
或{
但是,当我运行类似segment("helloworld")
的代码时,我得到了以下错误:TypeError: reduce expected at least 2 arguments, got 1
。如何更改reduce中的参数而不丢失wordSegFitness()
的有效性?在
目前没有回答
相关问题 更多 >
编程相关推荐