得到我认为是好的结果，但还不够

def calculate_frequencies(file_contents): # Here is a list of punctuations and uninteresting words you can use to process your text punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~''' uninteresting_words = ["the", "a", "to", "if", "is", "it", "of", "and", "or", "an", "as", "i", "me", "my", \ "we", "our", "ours", "you", "your", "yours", "he", "she", "him", "his", "her", "hers", "its", "they", "them", \ "their", "what", "which", "who", "whom", "this", "that", "am", "are", "was", "were", "be", "been", "being", \ "have", "has", "had", "do", "does", "did", "but", "at", "by", "with", "from", "here", "when", "where", "how", \ "all", "any", "both", "each", "few", "more", "some", "such", "no", "nor", "too", "very", "can", "will", "just", \ "in", "for", "so" ,"on", "says", "not", "into", "because", "could", "out", "up", "back", "about"] # LEARNER CODE START HERE frequencies = {} words = file_contents.split() final_words = [] for item in words: item = item.lower() if item in punctuations: words = words.replace(item, "") if item not in uninteresting_words and item.isalpha()==True: final_words.append(item) for final in final_words: if final not in frequencies: frequencies[final]=0 else: frequencies[final]+=1 #wordcloud cloud = wordcloud.WordCloud() cloud.generate_from_frequencies(frequencies) return cloud.to_array()

1条回答

网友

1楼 · 发布于 2024-09-19 20:28:07

如前所述，我认为您的代码不会运行words是一个列表，.replace不是有效的list方法

要简单地获取计数，请参阅以下代码

有关标点符号，请参阅-Best way to strip punctuation from a string

计数时，使用Counter

import string
from collections import Counter

uninteresting_words = {"the", "a", "to", "if", "is", "it", "of", "and", "or", "an", "as", "i", "me", "my", \
"we", "our", "ours", "you", "your", "yours", "he", "she", "him", "his", "her", "hers", "its", "they", "them", \
"their", "what", "which", "who", "whom", "this", "that", "am", "are", "was", "were", "be", "been", "being", \
"have", "has", "had", "do", "does", "did", "but", "at", "by", "with", "from", "here", "when", "where", "how", \
"all", "any", "both", "each", "few", "more", "some", "such", "no", "nor", "too", "very", "can", "will", "just", \
"in", "for", "so" ,"on", "says", "not", "into", "because", "could", "out", "up", "back", "about"}

def calculate_frequencies(s):
  global uninteresting_words
  words = (x.lower().strip().translate(str.maketrans('', '', string.punctuation)) for x in s.strip().split())

  c = Counter(words)
  for x in uninteresting_words:
    if x in c:
      del c[x]
  return c

print(calculate_frequencies('this is a string! A very fancy string?'))
# Counter({'string': 2, 'fancy': 1})

对于WordCloud，您不需要计算任何东西，因为它会为您计算。注意stopwords有一个参数和一个process_text函数，该函数使用默认情况下忽略标点的正则表达式模式-https://amueller.github.io/word_cloud/generated/wordcloud.WordCloud.html

相关问题更多 >

编程相关推荐

热门问题

热门文章