擅长:python、mysql、java
<p>也许将单词/分数文件作为dict的dict加载到内存中,然后循环遍历每个句子中的每个单词,检查单词文件中句子中每个单词的dict键。在</p>
<p>像这样的东西能起作用吗:</p>
<pre><code>word_lookup = load_words(file)
for s in sentences:
run_sentence(s)
def load_words(file):
word_lookup = {}
for line in file:
(term,score,numPos,numNeg) = re.split("\t", line.strip())
if not words.has_key(term):
words[term] = {'score': score, 'numPos': numPos, 'numNeg': numNeg}
return word_lookup
def run_sentence(s):
s = standardize_sentence(s) # Assuming you want to strip punctuation, symbols, convert to lowercase, etc
words = s.split(' ')
first = words[0]
last = words[-1]
for word in words:
word_info = check_word(word)
if word_info:
# Matched word, use your scores somehow (word_info['score'], etc)
def check_word(word):
if word_lookup.has_key(word):
return word_lookup[word]
else:
return None
</code></pre>