Pytorch: nn.Sequential gives NaN and Cholesky decomposition gives another error.

huangapple go评论60阅读模式
英文:

Pytorch: nn.Sequential gives NaN and Cholesky decomposition gives another error

问题

在尝试将TensorFlow代码转换为PyTorch时遇到了一些问题。下面是关于代码的翻译部分,关于问题的解决方案:

import torch
import torch.nn as nn
import torch.nn.init as init

class Generator(nn.Module):
    def __init__(self, hilbert_size, num_points, noise=None):
        super(Generator, self).__init__()

        self.initializer = nn.init.normal_

        ops = nn.Parameter(torch.empty(1, hilbert_size, hilbert_size, num_points * 2))
        inputs = nn.Parameter(torch.empty(1, num_points))

        layer = nn.Linear(num_points, 16 * 16 * 2, bias=False)
        init.normal_(layer.weight, mean=0.0, std=0.02)

        self.x = nn.Sequential(
            layer,
            nn.LeakyReLU(0.01),
            nn.Unflatten(1, (2, 16, 16))
        )

        self.conv_transpose_1 = nn.Sequential(
            nn.ConvTranspose2d(2, 64, kernel_size=4, stride=1, padding=1, bias=False),
            nn.InstanceNorm2d(64),
            nn.LeakyReLU(0.01),
        )

        self.conv_transpose_2 = nn.Sequential(
            nn.ConvTranspose2d(64, 64, kernel_size=4, stride=1, padding=2, bias=False),
            nn.InstanceNorm2d(64),
            nn.LeakyReLU(0.01),
        )

        self.conv_transpose_3 = nn.Sequential(
            nn.ConvTranspose2d(64, 32, kernel_size=4, stride=1, padding=1, bias=False),
        )

        self.conv_transpose_4 = nn.Sequential(
            nn.ConvTranspose2d(32, 2, kernel_size=4, stride=1, padding=2, bias=False),
        )

        self.density_matrix = DensityMatrix()
        self.expectation = Expectation()
        self.noise = nn.GaussianNoise(noise)

    def forward(self, ops, inputs):
        x = self.x(inputs)
        x = self.conv_transpose_1(x)
        x = self.conv_transpose_2(x)
        x = self.conv_transpose_3(x)
        x = self.conv_transpose_4(x)
        x = self.density_matrix(x)
        complex_ops = convert_to_complex_ops(ops)
        prefactor = 1.0
        x = self.expectation(complex_ops, x, prefactor)
        x = self.noise(x)

        return x

class DensityMatrix(nn.Module):
    def __init__(self):
        super(DensityMatrix, self).__init__()

    def forward(self, inputs):
        T = clean_cholesky(inputs)
        return density_matrix_from_T(T)

def clean_cholesky(img):
    real = img[:, 0, :, :]
    imag = img[:, 1, :, :]

    diag_all = torch.diag(imag)
    diags = torch.linalg.diag(diag_all)

    imag = imag - diags
    imag = torch.linalg.band_part(imag, -1, 0)
    real = torch.linalg.band_part(real, -1, 0)
    T = torch.complex(real, imag)
    return T

至于你的问题:

  1. 对于x = self.conv_transpose_1(x)返回一堆零的问题,可能是由于初始权重问题或激活函数的问题。你可以检查ConvTranspose2d层的权重初始化,确保权重不是零,或者尝试使用不同的激活函数。

  2. 由于torch.linalg.cholesky报错,表示输入矩阵不是正定的。你需要检查输入矩阵是否符合Cholesky分解的要求。

  3. clean_cholesky函数中的错误是因为torch.diag需要一个二维矩阵,而你的img似乎是一个三维矩阵。你需要确保img的维度正确,以便使用torch.diag

希望这些信息对你有所帮助,如果你有进一步的问题或需要更多的帮助,请随时提问。

英文:

In my quest to build a cGAN transforming a code in tensorflow to pytorch, I stucked with the following:

def __init__(self, hilbert_size, num_points, noise=None):
super(Generator, self).__init__()
self.initializer = nn.init.normal_
ops = nn.Parameter(torch.empty(1, hilbert_size, hilbert_size, num_points * 2))
#inputs = torch.empty((1, 1296), requires_grad=True)
#inputs = torch.empty((1, num_points), requires_grad=True)
inputs = nn.Parameter(torch.empty((1, num_points)))
layer = nn.Linear(num_points, 16 * 16 * 2, bias=False)
#init.kaiming_uniform_(layer.weight, a=math.sqrt(5))
init.normal_(layer.weight, mean=0.0, std=0.02)
self.x = nn.Sequential(
layer,
nn.LeakyReLU(),
nn.Unflatten(1, (2,16,16))
#view((-1, 16, 16, 2))
)
'''    
self.x = nn.Sequential(
nn.Linear(num_points, 16 * 16 * 2, bias=False),
nn.LeakyReLU(),
#nn.Reshape((16, 16, 2)),
nn.Unflatten(1, (2,16,16))
#nn.Unflatten(1, (16,16,2))
)
'''
self.conv_transpose_1 = nn.Sequential(
nn.ConvTranspose2d(2, 64, kernel_size=4, stride=1, padding=1, bias=False),
nn.InstanceNorm2d(64),
#nn.InstanceNorm2d(x.shape[1])
nn.LeakyReLU(),
)
self.conv_transpose_2 = nn.Sequential(
nn.ConvTranspose2d(64, 64, kernel_size=4, stride=1, padding=2, bias=False),
nn.InstanceNorm2d(64),
#nn.InstanceNorm2d(x.shape[1])
nn.LeakyReLU(),
)
self.conv_transpose_3 = nn.Sequential(
nn.ConvTranspose2d(64, 32, kernel_size=4, stride=1, padding=1, bias=False),
)
self.conv_transpose_4 = nn.Sequential(
nn.ConvTranspose2d(32, 2, kernel_size=4, stride=1, padding=2, bias=False),
)
self.density_matrix = DensityMatrix()
self.expectation = Expectation()
self.noise = nn.GaussianNoise(noise)
def forward(self, ops, inputs):
x = self.x(inputs)
x = self.conv_transpose_1(x)
x = self.conv_transpose_2(x)
x = self.conv_transpose_3(x)
x = self.conv_transpose_4(x)
x = self.density_matrix(x)
complex_ops = convert_to_complex_ops(ops)
prefactor = 1.0
x = self.expectation(complex_ops, x, prefactor)
x = self.noise(x)
return x

where:

class DensityMatrix(nn.Module):
"""
Density matrix layer that cleans the input matrix into a Cholesky matrix
and then constructs the density matrix for the state
"""
def __init__(self):
super(DensityMatrix, self).__init__()
def forward(self, inputs):
"""
The forward function which applies the Cholesky decomposition
Args:
inputs (`torch.Tensor`): a 4D real valued tensor (batch_size, hilbert_size, hilbert_size, 2)
representing batch_size random outputs from a neural network.
The last dimension is for separating the real and imaginary part
Returns:
dm (`torch.Tensor`): A 3D complex valued tensor (batch_size, hilbert_size, hilbert_size)
representing valid density matrices from a Cholesky decomposition of the
cleaned input
"""
T = clean_cholesky(inputs)
return density_matrix_from_T(T)

and

def clean_cholesky(img):
real = img[:, 0, :, :]
imag = img[:, 1, :, :]
#diag_all = torch.linalg.diag_part(imag, k=0, padding_value=0)
#diag_all = torch.diag(imag, k=0, padding_value=0)
diag_all = torch.diag(imag)
diags = torch.linalg.diag(diag_all)
imag = imag - diags
imag = torch.linalg.band_part(imag, -1, 0)
real = torch.linalg.band_part(real, -1, 0)
T = torch.complex(real, imag)
return T

The inputs are:

In [154]: inputs
Out[154]: 
tensor([[1.9286e-39, 2.1122e-39, 2.2959e-39,  ..., 4.5779e-41,     2.7162e+20,
4.5779e-41]], requires_grad=True)

And:

x = nn.Sequential(
layer,
nn.LeakyReLU(),
nn.Unflatten(1, (2,16,16))
#view((-1, 16, 16, 2))
)

Gives:

In [160]: x
Out[160]: 
Sequential(
(0): Linear(in_features=1296, out_features=512, bias=False)
(1): LeakyReLU(negative_slope=0.01)
(2): Unflatten(dim=1, unflattened_size=(2, 16, 16))
)

But x = self.conv_transpose_1(x) gives:

Out[250]: 
tensor([[[[0., 0., 0.,  ..., 0., 0., 0.],
[0., 0., 0.,  ..., 0., 0., 0.],
[0., 0., 0.,  ..., 0., 0., 0.],
...,
[0., 0., 0.,  ..., 0., 0., 0.],
[0., 0., 0.,  ..., 0., 0., 0.],
[0., 0., 0.,  ..., 0., 0., 0.]],
[[0., 0., 0.,  ..., 0., 0., 0.],
[0., 0., 0.,  ..., 0., 0., 0.],
[0., 0., 0.,  ..., 0., 0., 0.],
...,
[0., 0., 0.,  ..., 0., 0., 0.],
[0., 0., 0.,  ..., 0., 0., 0.],
[0., 0., 0.,  ..., 0., 0., 0.]],
[[0., 0., 0.,  ..., 0., 0., 0.],
[0., 0., 0.,  ..., 0., 0., 0.],
[0., 0., 0.,  ..., 0., 0., 0.],
...,
[0., 0., 0.,  ..., 0., 0., 0.],
[0., 0., 0.,  ..., 0., 0., 0.],
[0., 0., 0.,  ..., 0., 0., 0.]],
...,
[[0., 0., 0.,  ..., 0., 0., 0.],
[0., 0., 0.,  ..., 0., 0., 0.],
[0., 0., 0.,  ..., 0., 0., 0.],
...,
[0., 0., 0.,  ..., 0., 0., 0.],
[0., 0., 0.,  ..., 0., 0., 0.],
[0., 0., 0.,  ..., 0., 0., 0.]],
[[0., 0., 0.,  ..., 0., 0., 0.],
[0., 0., 0.,  ..., 0., 0., 0.],
[0., 0., 0.,  ..., 0., 0., 0.],
...,
[0., 0., 0.,  ..., 0., 0., 0.],
[0., 0., 0.,  ..., 0., 0., 0.],
[0., 0., 0.,  ..., 0., 0., 0.]],
[[0., 0., 0.,  ..., 0., 0., 0.],
[0., 0., 0.,  ..., 0., 0., 0.],
[0., 0., 0.,  ..., 0., 0., 0.],
...,
[0., 0., 0.,  ..., 0., 0., 0.],
[0., 0., 0.,  ..., 0., 0., 0.],
[0., 0., 0.,  ..., 0., 0., 0.]]]], grad_fn=<LeakyReluBackward0>)

A bunch of zeros, and I have no idea why!!!
And when I use torch.linalg.cholesky it gives the error:

_LinAlgError: linalg.cholesky: (Batch element 0): The factorization could not be completed because
the input is not positive-definite (the leading minor of order 1 is not positive-definite).

And if I use my defined clean_cholesky function it gives another error:

ll_impl(self, *input, **kwargs)
Cell In[421], line 24, in DensityMatrix.forward(self, inputs)
10 def forward(self, inputs):
---> 24     T = clean_cholesky(inputs)
25     return density_matrix_from_T(T)
Cell In[426], line 7, in clean_cholesky(img)
3 imag = img[:, :, :, 1]
----> 7 diag_all = torch.diag(imag)
8 diags = torch.linalg.diag(diag_all)
10 imag = imag - diags
RuntimeError: matrix or a vector expected

imag is os the form:

In [541]: imag
Out[541]: 
tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
grad_fn=<SliceBackward0>)

In tensorflow everything works fine and imag is:

In [58]: imag
Out[58]: <KerasTensor: shape=(None, 16, 16) dtype=float64 (created by layer 'tf.__operators__.getitem_3')>

But something is missing when I try to translate the code from tensorflow to pytorch but I really do not know what or where the error is.

This is is really important for me and any help would be very appreciated!

答案1

得分: 1

尝试首先进行以下操作:

  • 修复clean_cholesky中的缩进
  • DensityMatrix添加__init__forward方法

[编辑] 我的意思是这个类需要一个forward方法:

class DensityMatrix(nn.Module):

    def forward(self, inputs):
        T = clean_cholesky(inputs)
        return density_matrix_from_T(T)
英文:

Try first to:

  • Fix the indentation in clean_cholesky
  • Add __init__ and forward methods to DensityMatrix

[Edit] I mean that the class needs a forward:

class DensityMatrix(nn.Module):
def forward(self, inputs):
T = clean_cholesky(inputs)
return density_matrix_from_T(T)

huangapple
  • 本文由 发表于 2023年4月7日 02:27:43
  • 转载请务必保留本文链接:https://go.coder-hub.com/75952663.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定