from docx import Document
alphaDic = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','!','?','.','~',',','(',')','$','-',':',';',"'",'/']
doc = Document('realexample.docx')
docIndex = 0
def delete_paragraph(paragraph):
p = paragraph._element
p.getparent().remove(p)
p._p = p._element = None
while docIndex < len(doc.paragraphs):
firstSen = doc.paragraphs[docIndex].text
rep_dic = {ord(k):None for k in alphaDic + [x.upper() for x in alphaDic]}
translation = (firstSen.translate(rep_dic))
removeExcessSpaces = " ".join(translation.split())
if removeExcessSpaces != '':
doc.paragraphs[docIndex].text = removeExcessSpaces
else:
delete_paragraph(doc.paragraphs[docIndex])
docIndex -=1 # go one step back in the loop because of the deleted index
docIndex +=1
所以测试文档看起来像这样
^{pr2}$我正在努力实现下面的结果。在
你好
朋友们
晚上好
现在代码删除了所有的空段落和多余的空格,所以我有点困在这里了。我只想把英语单词造成的断行删除。在
你好
朋友们
晚上好
您可以做的是查找英语单词,一旦找到英语单词“word”,请将其附加“\n”,然后从文档中删除此新结果“word”。在python中附加字符串的方式是使用+符号。只需执行“WORD”+“\n”
相关问题 更多 >
编程相关推荐