错误“类型为'module'的参数不可编辑”

def clean_text (text): '''Text Preprocessing ''' # Convert words to lower case text = text.lower() #Expand contractions if True: text = text.split() new_text= [] for word in text: if word in contractions: new_text.append(contractions [word]) else: new_text.append(word) text = "".join(new_text) # Format words and remove unwanted characters text = re.sub(r'https?:\/\/[\r\n],"[\r\n]"', '', text, flags=re.MULTILINE) text = re.sub(r'\<a href', ' ', text) text = re.sub(r'&', '', text) text- re.sub(r'[_"\-;%()|+&=*%.,!?:#$@\[\]/]',' ', text) text = re.sub(r'<br />', ' ', text) text = re.sub(r'\'', ' ', text) #remove stopwords if remove_stopwords: text = text.split() stops = set(stopwords.words ("english")) text = [w for w in text if not w in stops] text = "" .join(text) # Tokenize each word text = nltk.WordPunctTokenizer().tokenize(text) text = nltk.TreebankWordTokenizer().tokenize(text) text = nltk.WordPunctTokenizer().tokenize(text) #Lemmatize each token lemm = nltk.stem.WordNetLemmatizer() text = list(map(lambda word:list(map(lemm.lemmatize, word)), text)) return text

2条回答

网友

1楼 · 编辑于 2024-09-29 01:27:03

该错误似乎是由contractions引起的。我不知道您是如何创建contractions的，但是请记住，只有当contractions是一个类似于 contractions = ["abc", "xyz", "123"]

网友

2楼 · 编辑于 2024-09-29 01:27:03

您导入的contradictions与您实际想要迭代的内容似乎不匹配，以检查word是否在某个内容中（没有很难说的完整示例）。有关相同的错误，请参见下面的示例。解决办法是使用contradictions.[something iterable from this module]


import os

sent = ['1', '2', '3']
something_it = ['2']

def return_str(s):
    # if s in something_it: # <- something like this
    if s in os:
        return 'x'
    else:
        return s

another_list = list(map(return_str, sent))
print(another_list)

相关问题更多 >

编程相关推荐

热门问题

热门文章