计算文本中特定刺痛的程序，如“气候金融”

from collections import Counter input = 'file.txt' CounterWords = {} words = {} with open(input,'r', encoding='utf-8-sig') as fh: for line in fh: word_list = line.replace(',','').replace('\'','').replace('.','').lower().split() for word in word_list: if len(word) < 6 continue elif word not in CounterWords: CounterWords[word] = 1 else: CounterWords[word] = CounterWords[word] + 1 N = 50 top_words = Counter(CounterWords).most_common(N) for word, frequency in top_words: print("%s %d" % (word, frequency))

1条回答

网友

1楼 · 发布于 2024-04-24 06:36:28

您只需使用your_file_content.count(your_string)：

from collections import Counter
input = 'D:\\file.txt'

import itertools
def pairwise(iterable):
    # "s -> (s0,s1), (s1,s2), (s2, s3), ..."
    a, b = itertools.tee(iterable)
    next(b, None)
    return zip(a, b)  

CounterWords = {}
CounterPairs = {}
words = {}
file_content = ''
with open(input,'r', encoding='utf-8-sig', errors='ignore') as fh:
  file_content = fh.read().replace('\n', ' ')
  word_list = file_content.replace(',','').replace('\'','').replace('.','').lower().split()
  word_list = list(dict.fromkeys(word_list)) # to remove duplicates
  word_pairs_list = pairwise(word_list)
  for word in word_list:
    if len(word) < 6:
      continue
    else:
      CounterWords[word] = file_content.count(word)
  for pair in word_pairs_list:
    CounterPairs[pair] = file_content.count(' '.join(pair))
N = 50

# for all single words :
top_words = Counter(CounterWords).most_common(N)
for word, frequency in top_words:
  print("%s %d" % (word, frequency))

# for all pairs :
top_pairs = Counter(CounterPairs).most_common(N)
for pair, frequency in top_pairs:
  print("%s %d" % (pair, frequency))

# for specific pairs :
print("\n%s %d" % ('climate finance', CounterPairs[('climate', 'finance')]))

pairwise函数取自：Iterate a list as pair (current, next) in Python

相关问题更多 >

编程相关推荐

热门问题

热门文章