擅长:python、mysql、java
<p>根据我们在聊天中讨论的内容,发布解决方案:</p>
<pre><code>import codecs
import re
RE_SID = re.compile(ur'id:(<<")?(?P<sid>[A-Za-z\d._+]*)', re.U) # \d used to match non-ASCII digits, too
input_file = codecs.open(cfg.log_file, encoding='utf-8') # Read the file with UTF8 encoding
for line in input_file:
fields = line.strip().split(u' ') # u prefix is important!
if len(fields) >= 11:
try:
# ......
sid = RE_SID.search(fields[7]).group('sid') # Or check if there is a match first
</code></pre>