为了修复一堆全大写的文本文件,我编写了一个脚本:
def lowit(line):
line = line.lower()
sentences = line.split('. ')
sentences2 = [sentence[0].capitalize() + sentence[1:] for sentence in sentences]
string2 = '. '.join(sentences2)
return string2
def capcico(line, allKeywords):
allWords = line.split(' ')
original = line.split(' ')
for i,words in enumerate(allWords):
words = words.replace(',', '')
words = words.replace('.', '')
words = words.replace(';', '')
if words in allKeywords:
original[i] = original[i].capitalize()
return ' '.join(original)
def main():
dfile = open('fixed.txt', 'w')
f = open('allist.txt', 'r')
allKeywords = f.read().split('\n')
with open('ulm.txt', 'r') as fileinput:
for line in fileinput:
low_line = lowit(line)
dfile.write('\n' + capcico(low_line, allKeywords))
dfile.close()
if __name__ == '__main__':
main()
它是可行的,但问题是,如果同一行中有多个城市/国家,它就不能将其资本化:
TOWN IN WUERTTEMBERG, GERMANY.
更改为:
Town in Wuerttemberg, germany.
有什么问题吗?
TNX公司
因为“德国”实际上是“德国\n”。 去掉单词的下线。。。你知道吗
相关问题 更多 >
编程相关推荐