我试着运行一个神经网络来学习更多关于分类嵌入的知识(神经网络代码的示例在这里https://yashuseth.blog/2018/07/22/pytorch-neural-network-for-tabular-data-with-categorical-embeddings/),但是Spyder在最后尝试运行循环后给出了AttributeError
Traceback (most recent call last): File "", line 1, in File "C:\Workspace\Python_Runtime\Python\lib\multiprocessing\spawn.py", line 116, in spawn_main exitcode = _main(fd, parent_sentinel) File "C:\Workspace\Python_Runtime\Python\lib\multiprocessing\spawn.py", line 126, in _main self = reduction.pickle.load(from_parent) AttributeError: Can't get attribute 'TabularDataset' on <module 'main' (built-in)>
我的理解是,这是因为Spyder在多处理功能方面存在问题
正如一些答案所暗示的那样,我已经尝试过将所有不在类或def中的内容包装起来
if __name__ == '__main__':
但这似乎没有帮助,错误仍然会出现
我还尝试导入多进程包,而不是多进程,但没有帮助。我想我需要去更改spawn.py文件中的行,但不确定具体如何更改
问题是,在我目前的电脑上,我只有Spyder。我试着用Pycharm在家里的个人电脑上的另一个数据集上运行相同的代码,它工作正常,没有任何错误
有人知道我如何解决Spyder中的问题吗
我使用的神经网络代码如下:
from torch.utils.data import Dataset, DataLoader
class TabularDataset(Dataset):
def __init__(self, data, cat_cols=None, output_col=None):
"""
Characterizes a Dataset for PyTorch
Parameters
----------
data: pandas data frame
The data frame object for the input data. It must
contain all the continuous, categorical and the
output columns to be used.
cat_cols: List of strings
The names of the categorical columns in the data.
These columns will be passed through the embedding
layers in the model. These columns must be
label encoded beforehand.
output_col: string
The name of the output variable column in the data
provided.
"""
self.n = data.shape[0]
if output_col:
self.y = data[output_col].astype(np.float32).values.reshape(-1, 1)
else:
self.y = np.zeros((self.n, 1))
self.cat_cols = cat_cols if cat_cols else []
self.cont_cols = [col for col in data.columns
if col not in self.cat_cols + [output_col]]
if self.cont_cols:
self.cont_X = data[self.cont_cols].astype(np.float32).values
else:
self.cont_X = np.zeros((self.n, 1))
if self.cat_cols:
self.cat_X = data[cat_cols].astype(np.int64).values
else:
self.cat_X = np.zeros((self.n, 1))
def __len__(self):
"""
Denotes the total number of samples.
"""
return self.n
def __getitem__(self, idx):
"""
Generates one sample of data.
"""
return [self.y[idx], self.cont_X[idx], self.cat_X[idx]]
import torch
import torch.nn as nn
import torch.nn.functional as F
class FeedForwardNN(nn.Module):
def __init__(self, emb_dims, no_of_cont, lin_layer_sizes,
output_size, emb_dropout, lin_layer_dropouts):
"""
Parameters
----------
emb_dims: List of two element tuples
This list will contain a two element tuple for each
categorical feature. The first element of a tuple will
denote the number of unique values of the categorical
feature. The second element will denote the embedding
dimension to be used for that feature.
no_of_cont: Integer
The number of continuous features in the data.
lin_layer_sizes: List of integers.
The size of each linear layer. The length will be equal
to the total number
of linear layers in the network.
output_size: Integer
The size of the final output.
emb_dropout: Float
The dropout to be used after the embedding layers.
lin_layer_dropouts: List of floats
The dropouts to be used after each linear layer.
"""
super().__init__()
# Embedding layers
self.emb_layers = nn.ModuleList([nn.Embedding(x, y)
for x, y in emb_dims])
no_of_embs = sum([y for x, y in emb_dims])
self.no_of_embs = no_of_embs
self.no_of_cont = no_of_cont
# Linear Layers
first_lin_layer = nn.Linear(self.no_of_embs + self.no_of_cont,
lin_layer_sizes[0])
self.lin_layers = nn.ModuleList([first_lin_layer] + [nn.Linear(lin_layer_sizes[i], lin_layer_sizes[i + 1]) for i in range(len(lin_layer_sizes) - 1)])
for lin_layer in self.lin_layers:
nn.init.kaiming_normal_(lin_layer.weight.data)
# Output Layer
self.output_layer = nn.Linear(lin_layer_sizes[-1],
output_size)
nn.init.kaiming_normal_(self.output_layer.weight.data)
# Batch Norm Layers
self.first_bn_layer = nn.BatchNorm1d(self.no_of_cont)
self.bn_layers = nn.ModuleList([nn.BatchNorm1d(size)
for size in lin_layer_sizes])
# Dropout Layers
self.emb_dropout_layer = nn.Dropout(emb_dropout)
self.droput_layers = nn.ModuleList([nn.Dropout(size)
for size in lin_layer_dropouts])
def forward(self, cont_data, cat_data):
if self.no_of_embs != 0:
x = [emb_layer(cat_data[:, i])
for i,emb_layer in enumerate(self.emb_layers)]
x = torch.cat(x, 1)
x = self.emb_dropout_layer(x)
if self.no_of_cont != 0:
normalized_cont_data = self.first_bn_layer(cont_data)
if self.no_of_embs != 0:
x = torch.cat([x, normalized_cont_data], 1)
else:
x = normalized_cont_data
for lin_layer, dropout_layer, bn_layer in\
zip(self.lin_layers, self.droput_layers, self.bn_layers):
x = F.relu(lin_layer(x))
x = bn_layer(x)
x = dropout_layer(x)
x = self.output_layer(x)
return x
categorical_features = ["cat1", "cat2", "cat3"]
output_feature = ["output"]
data = data[output_feature + categorical_features + ["cont1", "cont2"]].copy().dropna()
from sklearn.preprocessing import LabelEncoder
label_encoders = {}
for cat_col in categorical_features:
label_encoders[cat_col] = LabelEncoder()
data[cat_col] = label_encoders[cat_col].fit_transform(data[cat_col])
dataset = TabularDataset(data=data, cat_cols=categorical_features,output_col=output_feature)
batchsize = 256
dataloader = DataLoader(dataset, batchsize, shuffle=True, num_workers=1)
cat_dims = [int(data[col].nunique()) for col in categorical_features]
emb_dims = [(x, min(50, (x + 1) // 2)) for x in cat_dims]
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = FeedForwardNN(emb_dims, no_of_cont=2, lin_layer_sizes=[50, 100],
output_size=1, emb_dropout=0.04,
lin_layer_dropouts=[0.001,0.01]).to(device)
import tqdm
no_of_epochs = 5
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
for epoch in tqdm.tqdm(range(no_of_epochs)):
for y, cont_x, cat_x in dataloader:
cat_x = cat_x.to(device)
cont_x = cont_x.to(device)
y = y.to(device)
# Forward Pass
preds = model(cont_x, cat_x)
loss = criterion(preds, y)
# Backward Pass and Optimization
optimizer.zero_grad()
loss.backward()
optimizer.step()
您可以尝试使用控制台名称空间而不是空名称空间来运行代码(尝试保留
TabularDataset
定义)。为此,您需要选中首选项对话框中的选项Run in Console's namespace instead of an empty one
:菜单Tools > Preferences
(或🔧 按钮显示对话框)和Run > General settings > Run in Console's namespace instead of an empty one
相关问题 更多 >
编程相关推荐