pattern = '['+''.join(string.punctuation)+']+' # Make a char set in regex syntax
for line in file:
tokens = line.split(' ')
for token in tokens:
parsed = parse_token(re.sub(pattern, token))
# Now do whatever else you might need to do with token and parsed.
# Remember, you still have access to the `line` string and `tokens` list!
def parse_token(token):
pass # Do whatever you need to do with your "clean" token here.
坚持原著会更容易些,不是吗?你把标点符号放回去的最终目标是什么?如果你要重建整条生产线,为什么不把它放在第一位呢?在
我会使用正则表达式来实现:
您可以遍历结果列表,更改单词,然后
''.join()
将其还原为一个在相同位置使用相同标点符号的句子。在相关问题 更多 >
编程相关推荐