Huggingface Transformers在训练Bert二进制分类模型时返回“ValueError:太多值无法解包（预期为2）”

Epoch 1/50 ---------- Aici incepe train_epoch /usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py:477: UserWarning: This DataLoader will create 4 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary. cpuset_checked)) ----Checkpoint train_epoch 2---- ----Checkpoint train_epoch 2---- ----forward checkpoint 1---- --------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-175-fd9f98819b6f> in <module>() 23 device, 24 scheduler, ---> 25 df_train.shape[0] 26 ) 27 print(f'Train loss {train_loss} Accuracy:{train_acc}') 4 frames <ipython-input-173-bfbecd87c5ec> in train_epoch(model, data_loader, loss_fn, optimizer, device, scheduler, n_examples) 21 targets = d['targets'].to(device) 22 print('----Checkpoint train_epoch 2----') ---> 23 outputs = model(input_ids=input_ids,attention_mask=attention_mask) 24 print('----Checkpoint train_epoch 3----') 25 _,preds = torch.max(outputs,dim=1) /usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs) 887 result = self._slow_forward(*input, **kwargs) 888 else: --> 889 result = self.forward(*input, **kwargs) 890 for hook in itertools.chain( 891 _global_forward_hooks.values(), <ipython-input-171-e754ea3edc36> in forward(self, input_ids, attention_mask) 16 input_ids=input_ids, 17 attention_mask=attention_mask, ---> 18 return_dict=False) 19 20 print('----forward checkpoint 2-----') /usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs) 887 result = self._slow_forward(*input, **kwargs) 888 else: --> 889 result = self.forward(*input, **kwargs) 890 for hook in itertools.chain( 891 _global_forward_hooks.values(), /usr/local/lib/python3.7/dist-packages/transformers/models/bert/modeling_bert.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict) 923 elif input_ids is not None: 924 input_shape = input_ids.size() --> 925 batch_size, seq_length = input_shape 926 elif inputs_embeds is not None: 927 input_shape = inputs_embeds.size()[:-1] ValueError: too many values to unpack (expected 2)

class DisasterClassifier(nn.Module): def __init__(self, n_classes): super(DisasterClassifier,self).__init__() self.bert=BertModel.from_pretrained(PRE_TRAINED_MODEL,return_dict=False) self.drop=nn.Dropout(p=0.3) # in timpul antrenarii, valori aleatorii sunt inlocuite cu 0, cu probabilitate p -> regularization and preventing the co-adaptation of neurons self.out = nn.Linear(self.bert.config.hidden_size,n_classes) def forward(self,input_ids,attention_mask): print('----forward checkpoint 1----') bertOutput = self.bert( input_ids=input_ids, attention_mask=attention_mask, return_dict=False) print('----forward checkpoint 2-----') output = self.drop(bertOutput['pooler_output']) return self.out(output)`

optimizer = AdamW(model.parameters(),lr = 2e-5,correct_bias=False) total_steps = len(train_data_loader)*EPOCHS scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=total_steps) loss_fn = nn.CrossEntropyLoss().to(device) def train_epoch(model,data_loader,loss_fn,optimizer,device,scheduler,n_examples): print('Aici incepe train_epoch') model = model.train() losses =[] correct_predictions = 0 for d in data_loader: print('----Checkpoint train_epoch 2----') input_ids = d['input_ids'].to(device) attention_mask=d['attention_mask'].to(device) targets = d['targets'].to(device) print('----Checkpoint train_epoch 2----') outputs = model(input_ids=input_ids,attention_mask=attention_mask) print('----Checkpoint train_epoch 3----') _,preds = torch.max(outputs,dim=1) loss = loss_fn(outputs, targets) correct_predictions += torch.sum(preds == targets) losses.append(loss.item()) #backpropagation steps loss.backward() nn.utils.clip_grad_norm_(model.parameters,max_norm=1.0) optimizer.step() scheduler.step() optimizer.zero_grad() return (correct_predictions.double() / n_examples), np.mean(losses)

history = defaultdict(list) best_accuracy = 0 for epoch in range(EPOCHS): print(f'Epoch {epoch + 1}/{EPOCHS}') print('-' * 10) # train_acc,train_loss = train_epoch(model, # train_data_loader, # loss_fn, # optimizer, # device, # scheduler, # len(df_train)) train_acc, train_loss = train_epoch( model, train_data_loader, loss_fn, optimizer, device, scheduler, df_train.shape[0] ) print(f'Train loss {train_loss} Accuracy:{train_acc}') val_acc, val_loss = eval_model(model,val_data_loader,loss_fn,device,len(df_val)) print(f'Validation loss {val_loss} Accuracy:{val_acc}') print() history['train_acc'].append(train_acc) history['train_loss'].append(train_loss) history['val_acc'].append(val_acc) history['val_loss'].append(val_loss) if val_acc > best_accuracy: torch.save(model.state_dict(), 'best_model_state.bin') best_accuracy = val_acc

1条回答

网友

1楼 · 发布于 2024-09-27 19:25:29

我也有同样的问题。您需要检查输入ID的形状，它应该是（批次大小，序列长度）。在你的情况下，我猜是（1，批量大小，序列长度）或其他什么。我们也要这样做：

input_ids = input_ids.squeeze(0)
outputs = model(input_ids=input_ids,attention_mask=attention_mask)

相关问题更多 >

编程相关推荐

热门问题

热门文章