在字典中找不到的单词不能被考虑

df = pd.read_csv('C:/Users/User/Desktop/Coding/lstm emotion recognition/emotion.data/emotion.data') #Preparing data for model traininng #Tokenization-Since the data is already tokenized and lowecased, we just need to split the words input_sentences = [text.split(" ") for text in df["text"].values.tolist()] labels = df["emotions"].values.tolist() #creating vocabulary(word index) #Initialize word2id and label2id dictionaries that will be used to encode words and labels word2id = dict() #creating the dictionary named word2id label2id = dict() #creating a dictionary named label2id max_words = 0 #maximum number of words in a sentence #construction of word2id for sentence in input_sentences: for word in sentence: #Add words to word2id if not exist if word not in word2id: word2id[word] = len(word2id) #If length of the sentence is greater than max_words, update max_words if len(sentence) > max_words: max_words = len(sentence) #Construction of label2id and id2label dictionaries label2id = {l: i for i, l in enumerate(set(labels))} id2label = {v: k for k, v in label2id.items()} from keras.models import load_model model = load_model('modelsave2.py') print(model) import keras model_with_attentions = keras.Model(inputs=model.input, output=[model.output, model.get_layer('attention_vec').output]) #File I/O Open function for read data from JSON File with open('C:/Users/User/Desktop/Coding/parsehubjsonfileeg/all.json', encoding='utf8') as file_object: # store file data in object data = json.load(file_object) # dictionary for element which you want to keep new_data = {'selection1': []} print(new_data) # copy item from old data to new data if it has 'reviews' for item in data['selection1']: if 'reviews' in item: new_data['selection1'].append(item) print(item['reviews']) print('--') # save in file with open('output.json', 'w') as f: json.dump(new_data, f) selection1 = data['selection1'] for item in selection1: name = item['name'] print ('>>>>>>>>>>>>>>>>>> ', name) CommentID = item['reviews'] for com in CommentID: comment = com['review'].lower() # converting all to lowercase result = re.sub(r'\d+', '', comment) # remove numbers results = (result.translate( str.maketrans('', '', string.punctuation))).strip() # remove punctuations and white spaces comments = remove_stopwords(results) print('>>>>>>',comments) encoded_samples = [[word2id[word] for word in comments]] # Padding encoded_samples = keras.preprocessing.sequence.pad_sequences(encoded_samples, maxlen=max_words) # Make predictions label_probs, attentions = model_with_attentions.predict(encoded_samples) label_probs = {id2label[_id]: prob for (label, _id), prob in zip(label2id.items(), label_probs[0])} # Get word attentions using attenion vector print(label_probs) print(max(label_probs))

>>>>>> ['amazing', 'stay', 'nights', 'cleanliness', 'room', 'faultless'] {'fear': 0.26750156, 'love': 0.0044763167, 'joy': 0.06064613, 'surprise': 0.32365623, 'sadness': 0.03203068, 'anger': 0.31168908} surprise >>>>>> ['good', 'time', 'food', 'good'] Traceback (most recent call last): File "C:/Users/User/PycharmProjects/Dissertation/loadandresult.py", line 96, in <module> encoded_samples = [[word2id[word] for word in comments]] File "C:/Users/User/PycharmProjects/Dissertation/loadandresult.py", line 96, in <listcomp> encoded_samples = [[word2id[word] for word in comments]] KeyError: 'everydaythe'

1条回答

网友

1楼 · 发布于 2024-07-03 08:22:12

您可以在列表中添加以下条件：

encoded_samples = [[word2id[word] for word in comments if word in word2id.keys()]]

这只会在注释中添加字典键中已经存在的单词

编辑：

当您处理字典时，如果您试图访问一个不确定每个字典都存在的密钥，那么可以使用get()。此方法允许您查询字典中的键，如果该键不存在，它将返回您可以选择的默认值，如下面的代码所示：

my_dict = {'id': 0, 'reviews': 4.5}
your_dict = {'id': 1}

# If I just specify the key, the default return value is None
your_dict.get('reviews')

# However, I can specify the return value
your_dict.get('reviews', default=4.0)

相关问题更多 >

编程相关推荐

热门问题

热门文章