我对python相当陌生,我正在尝试让这段代码打开txt文件,去掉文件中的标点符号,读取这些文件,创建单词列表,然后计算每个单词的出现次数。它正在计算字母的出现次数。另外,如何在其他函数中正确调用函数
import os
# create the dictionary
dictionary = {}
# create dictionary list
dictionarylist = []
def make_a_listh():
path = 'data/training/'
Heal = path + 'Health/'
heal_files = os.listdir(Heal)
# print(heal_files)
punctuations = '''!()-—[]{};:'"\,<>.|/?@#$%^&*_~'''
no_puncth = ""
line = "---------------------------------------------------
--------------------------"
for j in heal_files:
file2 = open(Heal + j, 'r').read()
for char in file2:
if char not in punctuations:
no_puncth = no_puncth + char
print(j + line, "\n", no_puncth)
def make_a_listm():
path = 'data/training/'
Minn = path + 'Minnesota/'
minn_files = os.listdir(Minn)
# print the filename and a new line
punctuations = '''!()—-—[]{};:’'"\,<>.|/?@“#$%^&*_~'''
no_punctm = ""
line = "---------------------------------------------------
-------------------------"
for i in minn_files:
file1 = open(Minn + i, 'r')
for char in file1:
if char not in punctuations:
no_punctm = no_punctm + char
# print(i + line, "\n", no_punctm.replace('"',''))
return no_punctm
def Freq(file1):
# as long as there is a line in file loop
for line in file1:
# create variable to hold each word from the file
words = line.split()
# as long as there is a word in words loop
for eachword in words:
# if there is an existing word in dictionary
increase occurrence count
if eachword in dictionary:
dictionary[eachword] = dictionary[eachword] + 1
# if there is a word that is new set count to 1
else:
dictionary[eachword] = 1
# for every item (k and v) in dictionary, loop
for k, v in dictionary.items():
# create temporary place holder for v and k values
temporary = [v, k]
# (add) temporary values to dictionaryList
dictionarylist.append(temporary)
# print out each value from dictionaryList in.
descending order on new lines
print("\n".join(map(str, sorted(dictionarylist,
reverse=True))))
Freq(file1=make_a_listm())
下面是如何使用collections模块中的
Counter()
方法,以及如何使用re.sub()
更有效地处理标点:相关问题 更多 >
编程相关推荐