<p>信用证:<a href="https://gist.github.com/BroHui/aca2b8e6e6bdf3cb4af4b246c9837fa3" rel="nofollow noreferrer">https://gist.github.com/BroHui/aca2b8e6e6bdf3cb4af4b246c9837fa3</a></p>
<p>这就行了。它使用标记化。您可以根据自己的使用情况修改此代码</p>
<pre><code>""" Strip comments and docstrings from a file.
"""
import sys, token, tokenize
def do_file(fname):
""" Run on just one file.
"""
source = open(fname)
mod = open(fname + ",strip", "w")
prev_toktype = token.INDENT
first_line = None
last_lineno = -1
last_col = 0
tokgen = tokenize.generate_tokens(source.readline)
for toktype, ttext, (slineno, scol), (elineno, ecol), ltext in tokgen:
if 0: # Change to if 1 to see the tokens fly by.
print("%10s %-14s %-20r %r" % (
tokenize.tok_name.get(toktype, toktype),
"%d.%d-%d.%d" % (slineno, scol, elineno, ecol),
ttext, ltext
))
if slineno > last_lineno:
last_col = 0
if scol > last_col:
mod.write(" " * (scol - last_col))
if toktype == token.STRING and prev_toktype == token.INDENT:
# Docstring
mod.write("# ")
elif toktype == tokenize.COMMENT:
# Comment
mod.write("\n")
else:
mod.write(ttext)
prev_toktype = toktype
last_col = ecol
last_lineno = elineno
if __name__ == '__main__':
do_file("text.txt")
</code></pre>
<hr/>
<p><strong>text.txt:</strong></p>
<pre><code># this is comment line.
age = 18 # comment in line
msg1 = "I'm #1." # comment. there's a # in code.
msg2 = 'you are #2. ' + 'He is #3' # strange sign ' # ' in comment.
print('Waiting your answer')
</code></pre>
<hr/>
<p><strong>输出:</strong></p>
<pre><code>age = 18
msg1 = "I'm #1."
msg2 = 'you are #2. ' + 'He is #3'
print('Waiting your answer')
</code></pre>
<p><strong>输入:</strong></p>
<pre><code>msg1 = "I'm #1." # comment. there's a # in code. the regex#.*$ will match #1." # comment. there's a # in code. . Right match shoud be # comment. there's a # in code.
</code></pre>
<p><strong>输出:</strong></p>
<pre><code>msg1 = "I'm #1."
</code></pre>