英文:
Convert Tensoflow model to PyTorch model - model isn't learning
问题
以下是您要翻译的代码部分:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import Conv1D, GlobalMaxPooling1D
from tensorflow.keras.datasets import imdb
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=5000)
x_train = sequence.pad_sequences(x_train, maxlen=400, padding="post")
x_test = sequence.pad_sequences(x_test, maxlen=400, padding="post")
model = Sequential()
model.add(Embedding(5000, 50, input_length=400))
model.add(Dropout(0.2))
model.add(Conv1D(250, 3, padding='valid', activation='relu', strides=1))
model add(GlobalMaxPooling1D())
model.add(Dense(250))
model.add(Dropout(0.2))
model.add(Activation('relu'))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
h2 = model.fit(x_train, y_train, batch_size=32, epochs=10, validation_data=(x_test, y_test))
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torch
from tqdm import tqdm
import torch.nn.functional as F
from sklearn.metrics import accuracy_score
class CustomDataset(Dataset):
def __init__(self, x, y):
self.x = x
self.y = y
def __len__(self):
return len(self.y)
def __getitem__(self, idx):
return self.x[idx], self.y[idx]
train_dataloader = DataLoader(CustomDataset(torch.Tensor(x_train), torch.Tensor(y_train)), batch_size=32, shuffle=True)
test_dataloader = DataLoader(CustomDataset(torch.Tensor(x_test), torch.Tensor(y_test)), batch_size=32, shuffle=True)
class MyModel(torch.nn.Module):
def __init__(self, vocab_size=5000, input_len=400, embedding_dims=50, kernel_size=3, filters=250, hidden_dims=250):
super(MyModel, self).__init__()
self.embedding_dims = embedding_dims
self.input_len = input_len
self.embedding = torch.nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dims)
self.dropout1 = torch.nn.Dropout(p=0.2)
self.conv1d = torch.nn.Conv1d(in_channels=embedding_dims, out_channels=filters, kernel_size=kernel_size, padding=(0,), stride=1)
self.pool = torch.nn.AdaptiveMaxPool1d(1)
self.linear1 = torch.nn.Linear(in_features=hidden_dims, out_features=hidden_dims)
self.dropout2 = torch.nn.Dropout(p=0.2)
self.activation = torch.nn.ReLU()
self.output = torch.nn.Linear(in_features=hidden_dims, out_features=1)
self.activation2 = torch.nn.Sigmoid()
def forward(self, x):
x = self.dropout1(self.embedding(x.type(torch.LongTensor)))
x = self.conv1d(x.view(-1, self.embedding_dims, self.input_len))
x = self.pool(x)
x = self.activation(self.dropout2(self.linear1(x.view(-1,x.size()[1])))
x = self.activation2(self.output(x))
return x
class FitTorchModel():
def __init__(self, model, num_epochs=10, steps_per_epoch=782):
self.model = model
self.epochs = num_epochs
self.steps_per_epoch = steps_per_epoch
def fit(self, train_dataloader, test_dataloader):
opt = torch.optim.Adam(self.model.parameters(), lr=0.001)
crit = torch.nn.BCELoss(reduction = "mean")
history_df = pd.DataFrame(columns = ["Loss", "Accuracy", "Val_Loss", "Val_Acc"])
for epoch in range(self.epochs):
self.model.train()
print(f"Epoch {epoch}")
epoch_loss = 0
epoch_acc = 0
it = iter(train_dataloader)
for step in tqdm(range(self.steps_per_epoch)):
opt.zero_grad()
x, y = next(it)
y_pred = self.model(x).view(-1)
loss = crit(y_pred, y)
epoch_loss += loss.item()
epoch_acc += accuracy_score(y==1, y_pred > 0.5)
loss.backward()
opt.step()
val_loss, val_acc = self.predict_proba(test_dataloader, crit)
df = pd.DataFrame({"Loss": epoch_loss/(step+1),
"Accuracy": epoch_acc/(step+1),
"Val_Loss": val_loss, "Val_Acc": val_acc}, index=[0])
history_df = pd.concat((history_df, df), ignore_index=True)
return history_df
def predict_proba(self, test_dataloader, crit):
self.model.eval()
val_loss = 0
val_acc = 0
it = iter(test_dataloader)
with torch.no_grad():
for step in tqdm(range(self.steps_per_epoch)):
x,y = next(it)
y_pred = self.model(x).view(-1)
batch_loss = crit(y_pred, y)
val_loss += batch_loss.item()
val_acc += accuracy_score(y==1, y_pred > 0.5)
return val_loss/(step+1), val_acc/(step+1)
ftm = FitTorchModel(model=MyModel(), num_epochs=10, steps_per_epoch=782)
history_df = ftm.fit(train_dataloader, test_dataloader)
英文:
I'm trying to port a tensorflow neural network to pytorch, as an exercise to familiarize myself with both / their nuances. This is the tensorflow network I'm porting to pytorch:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import Conv1D, GlobalMaxPooling1D
from tensorflow.keras.datasets import imdb
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=5000)
x_train = sequence.pad_sequences(x_train, maxlen=400, padding="post")
x_test = sequence.pad_sequences(x_test, maxlen=400, padding="post")
model = Sequential()
model.add(Embedding(5000, 50, input_length=400))
model.add(Dropout(0.2))
model.add(Conv1D(250, 3, padding='valid',activation='relu',strides=1))
model.add(GlobalMaxPooling1D())
model.add(Dense(250))
model.add(Dropout(0.2))
model.add(Activation('relu'))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
h2 = model.fit(x_train, y_train, batch_size=32, epochs=10, validation_data=(x_test, y_test))
The shapes of each layer is shown below:
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding (Embedding) (None, 400, 50) 250000
dropout (Dropout) (None, 400, 50) 0
conv1d (Conv1D) (None, 398, 250) 37750
global_max_pooling1d (Globa (None, 250) 0
lMaxPooling1D)
dense (Dense) (None, 250) 62750
dropout_1 (Dropout) (None, 250) 0
activation (Activation) (None, 250) 0
dense_1 (Dense) (None, 1) 251
activation_1 (Activation) (None, 1) 0
=================================================================
Total params: 350,751
Trainable params: 350,751
Non-trainable params: 0
And the output of the tensorflow model is:
Epoch 1/10
loss: 0.4043 - accuracy: 0.8021 - val_loss: 0.2764 - val_accuracy: 0.8854
Epoch 2/10
loss: 0.2332 - accuracy: 0.9052 - val_loss: 0.2690 - val_accuracy: 0.8888
Epoch 3/10
loss: 0.1598 - accuracy: 0.9389 - val_loss: 0.2948 - val_accuracy: 0.8832
Epoch 4/10
loss: 0.1112 - accuracy: 0.9600 - val_loss: 0.3015 - val_accuracy: 0.8906
Epoch 5/10
loss: 0.0810 - accuracy: 0.9700 - val_loss: 0.3057 - val_accuracy: 0.8868
Epoch 6/10
loss: 0.0537 - accuracy: 0.9811 - val_loss: 0.4055 - val_accuracy: 0.8868
Epoch 7/10
loss: 0.0408 - accuracy: 0.9860 - val_loss: 0.4083 - val_accuracy: 0.8852
Epoch 8/10
loss: 0.0411 - accuracy: 0.9845 - val_loss: 0.4789 - val_accuracy: 0.8789
Epoch 9/10
loss: 0.0380 - accuracy: 0.9862 - val_loss: 0.4828 - val_accuracy: 0.8827
Epoch 10/10
loss: 0.0329 - accuracy: 0.9879 - val_loss: 0.4999 - val_accuracy: 0.8825
Here's what I have in my PyTorch port over:
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torch
from tqdm import tqdm
import torch.nn.functional as F
from sklearn.metrics import accuracy_score
class CustomDataset(Dataset):
def __init__(self, x, y):
self.x = x
self.y = y
def __len__(self):
return len(self.y)
def __getitem__(self, idx):
return self.x[idx], self.y[idx]
train_dataloader = DataLoader(CustomDataset(torch.Tensor(x_train), torch.Tensor(y_train)), batch_size=32, shuffle=True)
test_dataloader = DataLoader(CustomDataset(torch.Tensor(x_test), torch.Tensor(y_test)), batch_size=32, shuffle=True)
class MyModel(torch.nn.Module):
def __init__(self, vocab_size=5000, input_len=400, embedding_dims=50, kernel_size=3, filters=250, hidden_dims=250):
super(MyModel, self).__init__()
self.embedding_dims = embedding_dims
self.input_len = input_len
self.embedding = torch.nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dims)
self.dropout1 = torch.nn.Dropout(p=0.2)
self.conv1d = torch.nn.Conv1d(in_channels=embedding_dims, out_channels=filters, kernel_size=kernel_size, padding=(0,), stride=1)
self.pool = torch.nn.AdaptiveMaxPool1d(1)
self.linear1 = torch.nn.Linear(in_features=hidden_dims, out_features=hidden_dims)
self.dropout2 = torch.nn.Dropout(p=0.2)
self.activation = torch.nn.ReLU()
self.output = torch.nn.Linear(in_features=hidden_dims, out_features=1)
self.activation2 = torch.nn.Sigmoid()
def forward(self, x):
x = self.dropout1(self.embedding(x.type(torch.LongTensor)))
x = self.conv1d(x.view(-1, self.embedding_dims, self.input_len))
x = self.pool(x)
x = self.activation(self.dropout2(self.linear1(x.view(-1,x.size()[1]))))
x = self.activation2(self.output(x))
return x
class FitTorchModel():
def __init__(self, model, num_epochs=10, steps_per_epoch=782):
self.model = model
self.epochs = num_epochs
self.steps_per_epoch = steps_per_epoch
def fit(self, train_dataloader, test_dataloader):
opt = torch.optim.Adam(self.model.parameters(), lr=0.001)
crit = torch.nn.BCELoss(reduction = "mean")
history_df = pd.DataFrame(columns = ["Loss", "Accuracy", "Val_Loss", "Val_Acc"])
for epoch in range(self.epochs):
self.model.train()
print(f"Epoch {epoch}")
epoch_loss = 0
epoch_acc = 0
it = iter(train_dataloader)
for step in tqdm(range(self.steps_per_epoch)):
opt.zero_grad()
x, y = next(it)
y_pred = self.model(x).view(-1)
loss = crit(y_pred, y)
epoch_loss += loss.item()
epoch_acc += accuracy_score(y==1, y_pred > 0.5)
loss.backward()
opt.step()
val_loss, val_acc = self.predict_proba(test_dataloader, crit)
df = pd.DataFrame({"Loss": epoch_loss/(step+1),
"Accuracy": epoch_acc/(step+1),
"Val_Loss": val_loss, "Val_Acc": val_acc}, index=[0])
history_df = pd.concat((history_df, df), ignore_index=True)
return history_df
def predict_proba(self, test_dataloader, crit):
self.model.eval()
val_loss = 0
val_acc = 0
it = iter(test_dataloader)
with torch.no_grad():
for step in tqdm(range(self.steps_per_epoch)):
x,y = next(it)
y_pred = self.model(x).view(-1)
batch_loss = crit(y_pred, y)
val_loss += batch_loss.item()
val_acc += accuracy_score(y==1, y_pred > 0.5)
return val_loss/(step+1), val_acc/(step+1)
ftm = FitTorchModel(model=MyModel(), num_epochs=10, steps_per_epoch=782)
history_df = ftm.fit(train_dataloader, test_dataloader)
The shape of each layer is:
After embedding layer: torch.Size([32, 400, 50])
After dropout1 layer: torch.Size([32, 400, 50])
After convolution1d layer: torch.Size([32, 250, 398])
After maxpooling layer: torch.Size([32, 250, 1])
After linear1 layer: torch.Size([32, 250])
After dropout2 layer: torch.Size([32, 250])
After activation layer: torch.Size([32, 250])
After output layer: torch.Size([32, 1])
After activation2 layer: torch.Size([32, 1])
The output of the pytorch model training is:
Loss Accuracy Val_Loss Val_Acc
0 0.697899 0.505874 0.692495 0.511629
1 0.693063 0.503477 0.693186 0.503637
2 0.693190 0.496044 0.693149 0.499201
3 0.693181 0.501359 0.693082 0.502038
4 0.693169 0.503237 0.693234 0.495964
5 0.693177 0.500240 0.693154 0.500679
6 0.693069 0.507473 0.693258 0.498881
7 0.693948 0.500320 0.693145 0.501598
8 0.693196 0.499640 0.693164 0.496324
9 0.693170 0.500759 0.693140 0.501918
Couple things: the accuracy hovers around guessing (this is a binary classification task), no matter how many epochs have passed. Secondly, the training loss barely improves. I set the learning rate to the default learning rate described by tensorflow's Adam Optimizer docs. What else am I missing here? I had some trouble with the input / output dimensions for the various layers - did I mess those up at all?
答案1
得分: 2
以下是代码部分的翻译:
class MyModel(torch.nn.Module):
def __init__(self, vocab_size=5000, input_len=400, embedding_dims=50, kernel_size=3, filters=250, hidden_dims=250):
super(MyModel, self).__init__()
self.embedding_dims = embedding_dims
self.input_len = input_len
self.embedding = torch.nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dims)
self.dropout1 = torch.nn.Dropout(p=0.2)
self.conv1d = torch.nn.Conv1d(in_channels=embedding_dims, out_channels=filters, kernel_size=kernel_size, padding=(0,), stride=1)
self.pool = torch.nn.AdaptiveMaxPool1d(1)
self.linear1 = torch.nn.Linear(in_features=hidden_dims, out_features=hidden_dims)
self.dropout2 = torch.nn.Dropout(p=0.2)
self.activation = torch.nn.ReLU()
self.output = torch.nn.Linear(in_features=hidden_dims, out_features=1)
self.activation2 = torch.nn.Sigmoid()
def forward(self, x):
x = self.dropout1(self.embedding(x.type(torch.LongTensor)))
x = self.activation(self.conv1d(x.moveaxis(-1,-2)))
x = self.pool(x).squeeze(-1)
x = self.activation(self.dropout2(self.linear1(x)))
x = self.activation2(self.output(x))
return x
请注意,这是代码的翻译部分,没有包括其他内容。
英文:
Some observations:
- Use BCEWithLogitsLoss as loss on the output of the last linear layer, before the sigmoid. This includes the sigmoid activation in a more numerically stable fashion.
- The TensorFlow model has a ReLU after the Convolution, the pytorch implementations does not.
In general, for debugging, one might want to look at weight.grad
of some of your weights after the loss.backward()
and see if gradients calculated. Also printing out the value of one of the weights in each iteration to see if your optimizer actually changes the weights can help...
Also, it can depend on the input data:
(Are you sure that x_test is scaled correctly?)
If you are transforming your inputs to Long
before embedding them and all x_test, for example, are floats between 0 and 1, they will all be converted to 0! And the network will have a hard time predicting the labels from all zeros as constant input!
But now to the actual issue in this particular case:
Be careful with .view! It might not do what you expect. It just reshapes the tensor but does not move the data around.
What you really want is .moveaxes(-1,2)
instead!!
Loss Accuracy Val_Loss Val_Acc
0 0.573489 0.671715 0.402601 0.819413
1 0.376908 0.830163 0.33786 0.850783
2 0.308343 0.868646 0.296171 0.872323
3 0.258806 0.893342 0.319121 0.865849
4 0.227044 0.907649 0.3172 0.868326
5 0.202789 0.918478 0.281184 0.886549
6 0.179744 0.928549 0.291027 0.886589
7 0.161205 0.93702 0.329196 0.879156
8 0.145447 0.944094 0.294914 0.889746
9 0.133034 0.949568 0.291476 0.889826
After adding the relu after the convolution and, more importantly, fixing the view!
class MyModel(torch.nn.Module):
def __init__(self, vocab_size=5000, input_len=400, embedding_dims=50, kernel_size=3, filters=250, hidden_dims=250):
super(MyModel, self).__init__()
self.embedding_dims = embedding_dims
self.input_len = input_len
self.embedding = torch.nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dims)
self.dropout1 = torch.nn.Dropout(p=0.2)
self.conv1d = torch.nn.Conv1d(in_channels=embedding_dims, out_channels=filters, kernel_size=kernel_size, padding=(0,), stride=1)
self.pool = torch.nn.AdaptiveMaxPool1d(1)
self.linear1 = torch.nn.Linear(in_features=hidden_dims, out_features=hidden_dims)
self.dropout2 = torch.nn.Dropout(p=0.2)
self.activation = torch.nn.ReLU()
self.output = torch.nn.Linear(in_features=hidden_dims, out_features=1)
self.activation2 = torch.nn.Sigmoid()
def forward(self, x):
x = self.dropout1(self.embedding(x.type(torch.LongTensor)))
x = self.activation(self.conv1d(x.moveaxis(-1,-2)))
x = self.pool(x).squeeze(-1)
x = self.activation(self.dropout2(self.linear1(x)))
x = self.activation2(self.output(x))
return x
答案2
得分: 1
fit
函数中你用 tinymodel
初始化了 opt
,如下所示:
opt = torch.optim.Adam(tinymodel.parameters(), lr=0.001)
看起来你的优化器没有作用在正确的模型上(参考这个回答,了解优化器和模型参数之间的关系)。
你需要在 fit
函数中替换以下这行代码:
def fit(self, train_dataloader, test_dataloader):
opt = torch.optim.Adam(self.model.parameters(), lr=0.001)
# ...
另外,你使用了在训练和测试时行为不同的 Dropout
层。你应该在 fit
和 predict_proba
函数的开头分别添加 self.model.train()
和 self.model.eval()
。
英文:
What is tinymodel
you init opt
with in fit
function:
opt = torch.optim.Adam(tinymodel.parameters(), lr=0.001)
It seems like your optimizer is not working on the right model (see this answer on the relation between the optimizer and the parameters of the model).
You need to replace this line in fit
function:
def fit(self, train_dataloader, test_dataloader):
opt = torch.optim.Adam(self.model.parameters(), lr=0.001)
# ...
Additionally, you are using Dropout
layer that has different behavior in train and test.
You should add self.model.train()
and self.model.eval()
at the beginning of your fit
and predict_proba
functions respectively.
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论