from collections import Counter
input = 'file.txt'
CounterWords = {}
words = {}
with open(input,'r', encoding='utf-8-sig') as fh:
for line in fh:
word_list = line.replace(',','').replace('\'','').replace('.','').lower().split()
for word in word_list:
if len(word) < 6
continue
elif word not in CounterWords:
CounterWords[word] = 1
else:
CounterWords[word] = CounterWords[word] + 1
N = 50
top_words = Counter(CounterWords).most_common(N)
for word, frequency in top_words:
print("%s %d" % (word, frequency))
目前,我可以选择两个最频繁的字串超过X个字符。你知道吗
程序应筛选文本并计算单词,如:
“气候金融” “市场失灵” “巴黎2015”
仍应包括每个字符串的最小字符数,以防止出现诸如“I和”之类的结果。你知道吗
您只需使用
your_file_content.count(your_string)
:pairwise
函数取自:Iterate a list as pair (current, next) in Python相关问题 更多 >
编程相关推荐