2023年2月6日 21:40:37go评论49阅读模式

英文:

Convert Tensoflow model to PyTorch model - model isn't learning

问题

以下是您要翻译的代码部分：

import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import Conv1D, GlobalMaxPooling1D
from tensorflow.keras.datasets import imdb

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=5000)
x_train = sequence.pad_sequences(x_train, maxlen=400, padding="post")
x_test = sequence.pad_sequences(x_test, maxlen=400, padding="post")
model = Sequential()
model.add(Embedding(5000, 50, input_length=400))
model.add(Dropout(0.2))
model.add(Conv1D(250, 3, padding='valid', activation='relu', strides=1))
model add(GlobalMaxPooling1D())
model.add(Dense(250))
model.add(Dropout(0.2))
model.add(Activation('relu'))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
h2 = model.fit(x_train, y_train, batch_size=32, epochs=10, validation_data=(x_test, y_test))

from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torch
from tqdm import tqdm
import torch.nn.functional as F
from sklearn.metrics import accuracy_score

class CustomDataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

train_dataloader = DataLoader(CustomDataset(torch.Tensor(x_train), torch.Tensor(y_train)), batch_size=32, shuffle=True)
test_dataloader = DataLoader(CustomDataset(torch.Tensor(x_test), torch.Tensor(y_test)), batch_size=32, shuffle=True)

class MyModel(torch.nn.Module):
    def __init__(self, vocab_size=5000, input_len=400, embedding_dims=50, kernel_size=3, filters=250, hidden_dims=250):
        super(MyModel, self).__init__()
        self.embedding_dims = embedding_dims
        self.input_len = input_len
        self.embedding = torch.nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dims)
        self.dropout1 = torch.nn.Dropout(p=0.2)
        self.conv1d = torch.nn.Conv1d(in_channels=embedding_dims, out_channels=filters, kernel_size=kernel_size, padding=(0,), stride=1)
        self.pool = torch.nn.AdaptiveMaxPool1d(1)
        self.linear1 = torch.nn.Linear(in_features=hidden_dims, out_features=hidden_dims)
        self.dropout2 = torch.nn.Dropout(p=0.2)
        self.activation = torch.nn.ReLU()
        self.output = torch.nn.Linear(in_features=hidden_dims, out_features=1)
        self.activation2 = torch.nn.Sigmoid()

    def forward(self, x):
        x = self.dropout1(self.embedding(x.type(torch.LongTensor)))
        x = self.conv1d(x.view(-1, self.embedding_dims, self.input_len))
        x = self.pool(x)
        x = self.activation(self.dropout2(self.linear1(x.view(-1,x.size()[1])))
        x = self.activation2(self.output(x))
        return x

class FitTorchModel():
    def __init__(self, model, num_epochs=10, steps_per_epoch=782):
        self.model = model
        self.epochs = num_epochs
        self.steps_per_epoch = steps_per_epoch

    def fit(self, train_dataloader, test_dataloader):
        opt = torch.optim.Adam(self.model.parameters(), lr=0.001)
        crit = torch.nn.BCELoss(reduction = "mean")
        history_df = pd.DataFrame(columns = ["Loss", "Accuracy", "Val_Loss", "Val_Acc"])
        for epoch in range(self.epochs):
            self.model.train()
            print(f"Epoch {epoch}")
            epoch_loss = 0
            epoch_acc = 0
            it = iter(train_dataloader)
            for step in tqdm(range(self.steps_per_epoch)):
                opt.zero_grad()
                x, y = next(it)
                y_pred = self.model(x).view(-1)
                loss = crit(y_pred, y)
                epoch_loss += loss.item()
                epoch_acc += accuracy_score(y==1, y_pred > 0.5)
                loss.backward()
                opt.step()
            val_loss, val_acc = self.predict_proba(test_dataloader, crit)
            df = pd.DataFrame({"Loss": epoch_loss/(step+1), 
                               "Accuracy": epoch_acc/(step+1),
                               "Val_Loss": val_loss, "Val_Acc": val_acc}, index=[0]) 
            history_df = pd.concat((history_df, df), ignore_index=True) 
        return history_df

    def predict_proba(self, test_dataloader, crit):
        self.model.eval()
        val_loss = 0
        val_acc = 0
        it = iter(test_dataloader)
        with torch.no_grad():    
            for step in tqdm(range(self.steps_per_epoch)):
                x,y = next(it)
                y_pred = self.model(x).view(-1)
                batch_loss = crit(y_pred, y)
                val_loss += batch_loss.item()
                val_acc += accuracy_score(y==1, y_pred > 0.5)
        return  val_loss/(step+1), val_acc/(step+1)

ftm = FitTorchModel(model=MyModel(), num_epochs=10, steps_per_epoch=782)
history_df = ftm.fit(train_dataloader, test_dataloader)

英文:

I'm trying to port a tensorflow neural network to pytorch, as an exercise to familiarize myself with both / their nuances. This is the tensorflow network I'm porting to pytorch:

import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import Conv1D, GlobalMaxPooling1D
from tensorflow.keras.datasets import imdb
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=5000)
x_train = sequence.pad_sequences(x_train, maxlen=400, padding=&quot;post&quot;)
x_test = sequence.pad_sequences(x_test, maxlen=400, padding=&quot;post&quot;)
model = Sequential()
model.add(Embedding(5000, 50, input_length=400))
model.add(Dropout(0.2))
model.add(Conv1D(250, 3, padding=&#39;valid&#39;,activation=&#39;relu&#39;,strides=1))
model.add(GlobalMaxPooling1D())
model.add(Dense(250))
model.add(Dropout(0.2))
model.add(Activation(&#39;relu&#39;))
model.add(Dense(1))
model.add(Activation(&#39;sigmoid&#39;))
model.compile(loss=&#39;binary_crossentropy&#39;, optimizer=&#39;adam&#39;, metrics=[&#39;accuracy&#39;])
model.summary()
h2 = model.fit(x_train, y_train, batch_size=32, epochs=10, validation_data=(x_test, y_test))

The shapes of each layer is shown below:

Model: &quot;sequential&quot;
_________________________________________________________________
Layer (type)                Output Shape              Param #   
=================================================================
embedding (Embedding)       (None, 400, 50)           250000    
dropout (Dropout)           (None, 400, 50)           0         
conv1d (Conv1D)             (None, 398, 250)          37750     
global_max_pooling1d (Globa  (None, 250)              0         
lMaxPooling1D)                                                  
dense (Dense)               (None, 250)               62750     
dropout_1 (Dropout)         (None, 250)               0         
activation (Activation)     (None, 250)               0         
dense_1 (Dense)             (None, 1)                 251       
activation_1 (Activation)   (None, 1)                 0         
=================================================================
Total params: 350,751
Trainable params: 350,751
Non-trainable params: 0

And the output of the tensorflow model is:

Epoch 1/10
loss: 0.4043 - accuracy: 0.8021 - val_loss: 0.2764 - val_accuracy: 0.8854
Epoch 2/10
loss: 0.2332 - accuracy: 0.9052 - val_loss: 0.2690 - val_accuracy: 0.8888
Epoch 3/10
loss: 0.1598 - accuracy: 0.9389 - val_loss: 0.2948 - val_accuracy: 0.8832
Epoch 4/10
loss: 0.1112 - accuracy: 0.9600 - val_loss: 0.3015 - val_accuracy: 0.8906
Epoch 5/10
loss: 0.0810 - accuracy: 0.9700 - val_loss: 0.3057 - val_accuracy: 0.8868
Epoch 6/10
loss: 0.0537 - accuracy: 0.9811 - val_loss: 0.4055 - val_accuracy: 0.8868
Epoch 7/10
loss: 0.0408 - accuracy: 0.9860 - val_loss: 0.4083 - val_accuracy: 0.8852
Epoch 8/10
loss: 0.0411 - accuracy: 0.9845 - val_loss: 0.4789 - val_accuracy: 0.8789
Epoch 9/10
loss: 0.0380 - accuracy: 0.9862 - val_loss: 0.4828 - val_accuracy: 0.8827
Epoch 10/10
loss: 0.0329 - accuracy: 0.9879 - val_loss: 0.4999 - val_accuracy: 0.8825

Here's what I have in my PyTorch port over:

from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torch
from tqdm import tqdm
import torch.nn.functional as F
from sklearn.metrics import accuracy_score
class CustomDataset(Dataset):
def __init__(self, x, y):
self.x = x
self.y = y
def __len__(self):
return len(self.y)
def __getitem__(self, idx):
return self.x[idx], self.y[idx]
train_dataloader = DataLoader(CustomDataset(torch.Tensor(x_train), torch.Tensor(y_train)), batch_size=32, shuffle=True)
test_dataloader = DataLoader(CustomDataset(torch.Tensor(x_test), torch.Tensor(y_test)), batch_size=32, shuffle=True)
class MyModel(torch.nn.Module):
def __init__(self, vocab_size=5000, input_len=400, embedding_dims=50, kernel_size=3, filters=250, hidden_dims=250):
super(MyModel, self).__init__()
self.embedding_dims = embedding_dims
self.input_len = input_len
self.embedding = torch.nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dims)
self.dropout1 = torch.nn.Dropout(p=0.2)
self.conv1d = torch.nn.Conv1d(in_channels=embedding_dims, out_channels=filters, kernel_size=kernel_size, padding=(0,), stride=1)
self.pool = torch.nn.AdaptiveMaxPool1d(1)
self.linear1 = torch.nn.Linear(in_features=hidden_dims, out_features=hidden_dims)
self.dropout2 = torch.nn.Dropout(p=0.2)
self.activation = torch.nn.ReLU()
self.output = torch.nn.Linear(in_features=hidden_dims, out_features=1)
self.activation2 = torch.nn.Sigmoid()
def forward(self, x):
x = self.dropout1(self.embedding(x.type(torch.LongTensor)))
x = self.conv1d(x.view(-1, self.embedding_dims, self.input_len))
x = self.pool(x)
x = self.activation(self.dropout2(self.linear1(x.view(-1,x.size()[1]))))
x = self.activation2(self.output(x))
return x
class FitTorchModel():
def __init__(self, model, num_epochs=10, steps_per_epoch=782):
self.model = model
self.epochs = num_epochs
self.steps_per_epoch = steps_per_epoch
def fit(self, train_dataloader, test_dataloader):
opt = torch.optim.Adam(self.model.parameters(), lr=0.001)
crit = torch.nn.BCELoss(reduction = &quot;mean&quot;)
history_df = pd.DataFrame(columns = [&quot;Loss&quot;, &quot;Accuracy&quot;, &quot;Val_Loss&quot;, &quot;Val_Acc&quot;])
for epoch in range(self.epochs):
self.model.train()
print(f&quot;Epoch {epoch}&quot;)
epoch_loss = 0
epoch_acc = 0
it = iter(train_dataloader)
for step in tqdm(range(self.steps_per_epoch)):
opt.zero_grad()
x, y = next(it)
y_pred = self.model(x).view(-1)
loss = crit(y_pred, y)     
epoch_loss += loss.item()
epoch_acc += accuracy_score(y==1, y_pred &gt; 0.5)
loss.backward()
opt.step()
val_loss, val_acc = self.predict_proba(test_dataloader, crit)
df = pd.DataFrame({&quot;Loss&quot;: epoch_loss/(step+1), 
&quot;Accuracy&quot;: epoch_acc/(step+1),
&quot;Val_Loss&quot;: val_loss, &quot;Val_Acc&quot;: val_acc}, index=[0]) 
history_df = pd.concat((history_df, df), ignore_index=True) 
return history_df
def predict_proba(self, test_dataloader, crit):
self.model.eval()
val_loss = 0
val_acc = 0
it = iter(test_dataloader)
with torch.no_grad():    
for step in tqdm(range(self.steps_per_epoch)):
x,y = next(it)
y_pred = self.model(x).view(-1)
batch_loss = crit(y_pred, y)
val_loss += batch_loss.item()
val_acc += accuracy_score(y==1, y_pred &gt; 0.5)
return  val_loss/(step+1), val_acc/(step+1)
ftm = FitTorchModel(model=MyModel(), num_epochs=10, steps_per_epoch=782)
history_df = ftm.fit(train_dataloader, test_dataloader)

The shape of each layer is:

After embedding layer: torch.Size([32, 400, 50])
After dropout1 layer: torch.Size([32, 400, 50])
After convolution1d layer: torch.Size([32, 250, 398])
After maxpooling layer: torch.Size([32, 250, 1])
After linear1 layer: torch.Size([32, 250])
After dropout2 layer: torch.Size([32, 250])
After activation layer: torch.Size([32, 250])
After output layer: torch.Size([32, 1])
After activation2 layer: torch.Size([32, 1])

The output of the pytorch model training is:

       Loss  Accuracy  Val_Loss   Val_Acc
0  0.697899  0.505874  0.692495  0.511629
1  0.693063  0.503477  0.693186  0.503637
2  0.693190  0.496044  0.693149  0.499201
3  0.693181  0.501359  0.693082  0.502038
4  0.693169  0.503237  0.693234  0.495964
5  0.693177  0.500240  0.693154  0.500679
6  0.693069  0.507473  0.693258  0.498881
7  0.693948  0.500320  0.693145  0.501598
8  0.693196  0.499640  0.693164  0.496324
9  0.693170  0.500759  0.693140  0.501918

Couple things: the accuracy hovers around guessing (this is a binary classification task), no matter how many epochs have passed. Secondly, the training loss barely improves. I set the learning rate to the default learning rate described by tensorflow's Adam Optimizer docs. What else am I missing here? I had some trouble with the input / output dimensions for the various layers - did I mess those up at all?

答案1

得分: 2

以下是代码部分的翻译：

class MyModel(torch.nn.Module):
    def __init__(self, vocab_size=5000, input_len=400, embedding_dims=50, kernel_size=3, filters=250, hidden_dims=250):
        super(MyModel, self).__init__()
        self.embedding_dims = embedding_dims
        self.input_len = input_len
        self.embedding = torch.nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dims)
        self.dropout1 = torch.nn.Dropout(p=0.2)
        self.conv1d = torch.nn.Conv1d(in_channels=embedding_dims, out_channels=filters, kernel_size=kernel_size, padding=(0,), stride=1)
        self.pool = torch.nn.AdaptiveMaxPool1d(1)
        self.linear1 = torch.nn.Linear(in_features=hidden_dims, out_features=hidden_dims)
        self.dropout2 = torch.nn.Dropout(p=0.2)
        self.activation = torch.nn.ReLU()
        self.output = torch.nn.Linear(in_features=hidden_dims, out_features=1)
        self.activation2 = torch.nn.Sigmoid()

    def forward(self, x):
        x = self.dropout1(self.embedding(x.type(torch.LongTensor)))
        x = self.activation(self.conv1d(x.moveaxis(-1,-2)))
        x = self.pool(x).squeeze(-1)
        x = self.activation(self.dropout2(self.linear1(x)))
        x = self.activation2(self.output(x))
        return x

请注意，这是代码的翻译部分，没有包括其他内容。

英文:

Some observations:

Use BCEWithLogitsLoss as loss on the output of the last linear layer, before the sigmoid. This includes the sigmoid activation in a more numerically stable fashion.
The TensorFlow model has a ReLU after the Convolution, the pytorch implementations does not.

In general, for debugging, one might want to look at weight.grad of some of your weights after the loss.backward() and see if gradients calculated. Also printing out the value of one of the weights in each iteration to see if your optimizer actually changes the weights can help...

Also, it can depend on the input data:
(Are you sure that x_test is scaled correctly?)
If you are transforming your inputs to Long before embedding them and all x_test, for example, are floats between 0 and 1, they will all be converted to 0! And the network will have a hard time predicting the labels from all zeros as constant input!

But now to the actual issue in this particular case:
Be careful with .view! It might not do what you expect. It just reshapes the tensor but does not move the data around.
What you really want is .moveaxes(-1,2) instead!!

    	Loss	Accuracy	Val_Loss	Val_Acc
0	0.573489	0.671715	0.402601	0.819413
1	0.376908	0.830163	0.33786	    0.850783
2	0.308343	0.868646	0.296171	0.872323
3	0.258806	0.893342	0.319121	0.865849
4	0.227044	0.907649	0.3172  	0.868326
5	0.202789	0.918478	0.281184	0.886549
6	0.179744	0.928549	0.291027	0.886589
7	0.161205	0.93702	    0.329196	0.879156
8	0.145447	0.944094	0.294914	0.889746
9	0.133034	0.949568	0.291476	0.889826

After adding the relu after the convolution and, more importantly, fixing the view!

class MyModel(torch.nn.Module):
    def __init__(self, vocab_size=5000, input_len=400, embedding_dims=50, kernel_size=3, filters=250, hidden_dims=250):
        super(MyModel, self).__init__()
        self.embedding_dims = embedding_dims
        self.input_len = input_len
        self.embedding = torch.nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dims)
        self.dropout1 = torch.nn.Dropout(p=0.2)
        self.conv1d = torch.nn.Conv1d(in_channels=embedding_dims, out_channels=filters, kernel_size=kernel_size, padding=(0,), stride=1)
        self.pool = torch.nn.AdaptiveMaxPool1d(1)
        self.linear1 = torch.nn.Linear(in_features=hidden_dims, out_features=hidden_dims)
        self.dropout2 = torch.nn.Dropout(p=0.2)
        self.activation = torch.nn.ReLU()
        self.output = torch.nn.Linear(in_features=hidden_dims, out_features=1)
        self.activation2 = torch.nn.Sigmoid()

    def forward(self, x):
        x = self.dropout1(self.embedding(x.type(torch.LongTensor)))
        x = self.activation(self.conv1d(x.moveaxis(-1,-2)))
        x = self.pool(x).squeeze(-1)
        x = self.activation(self.dropout2(self.linear1(x)))
        x = self.activation2(self.output(x))
        return x

答案2

得分: 1

fit 函数中你用 tinymodel 初始化了 opt，如下所示：

opt = torch.optim.Adam(tinymodel.parameters(), lr=0.001)

看起来你的优化器没有作用在正确的模型上（参考这个回答，了解优化器和模型参数之间的关系）。

你需要在 fit 函数中替换以下这行代码：

def fit(self, train_dataloader, test_dataloader):
    opt = torch.optim.Adam(self.model.parameters(), lr=0.001)
    # ...

另外，你使用了在训练和测试时行为不同的 Dropout 层。你应该在 fit 和 predict_proba 函数的开头分别添加 self.model.train() 和 self.model.eval()。

英文:

What is tinymodel you init opt with in fit function:

opt = torch.optim.Adam(tinymodel.parameters(), lr=0.001)

It seems like your optimizer is not working on the right model (see this answer on the relation between the optimizer and the parameters of the model).

You need to replace this line in fit function:

        
    def fit(self, train_dataloader, test_dataloader):
        opt = torch.optim.Adam(self.model.parameters(), lr=0.001)
        # ...

Additionally, you are using Dropout layer that has different behavior in train and test.
You should add self.model.train() and self.model.eval() at the beginning of your fit and predict_proba functions respectively.

通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库，让每个人都能够通过互相帮助和分享经验来进步。

将TensorFlow模型转换为PyTorch模型 – 模型没有学习

问题

答案1

答案2

“Wrap each item in my dataset with double quotes not reading to csv”

How can i make shuffle in django Forms?

Popular Python type checkers give a false negative with Any annotation.

使用排列生成单词

What's the correct way to type hint an empty list as a literal in python?

如何在Highcharts Gantt中更改本地化的星期名称

如何在同一个流中使用多个过滤器和映射函数？

如何使用Map/Set来将代码优化到O(n)？

.NET MAUI Android在GitHub Actions上构建失败，错误代码为1。

如何在Playwright视觉比较中屏蔽多个定位器？

在C++中，可以使用可变模板参数来检索类型的内部类型。

selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: stale element not found

Creating and opening a URL to log in to Website via Basic Auth with Robot Framework/Selenium (Python)

AG Grid 在上下文菜单中以大文本形式打开

发表评论