英文:
Pytorch: nn.Sequential gives NaN and Cholesky decomposition gives another error
问题
在尝试将TensorFlow代码转换为PyTorch时遇到了一些问题。下面是关于代码的翻译部分,关于问题的解决方案:
import torch
import torch.nn as nn
import torch.nn.init as init
class Generator(nn.Module):
def __init__(self, hilbert_size, num_points, noise=None):
super(Generator, self).__init__()
self.initializer = nn.init.normal_
ops = nn.Parameter(torch.empty(1, hilbert_size, hilbert_size, num_points * 2))
inputs = nn.Parameter(torch.empty(1, num_points))
layer = nn.Linear(num_points, 16 * 16 * 2, bias=False)
init.normal_(layer.weight, mean=0.0, std=0.02)
self.x = nn.Sequential(
layer,
nn.LeakyReLU(0.01),
nn.Unflatten(1, (2, 16, 16))
)
self.conv_transpose_1 = nn.Sequential(
nn.ConvTranspose2d(2, 64, kernel_size=4, stride=1, padding=1, bias=False),
nn.InstanceNorm2d(64),
nn.LeakyReLU(0.01),
)
self.conv_transpose_2 = nn.Sequential(
nn.ConvTranspose2d(64, 64, kernel_size=4, stride=1, padding=2, bias=False),
nn.InstanceNorm2d(64),
nn.LeakyReLU(0.01),
)
self.conv_transpose_3 = nn.Sequential(
nn.ConvTranspose2d(64, 32, kernel_size=4, stride=1, padding=1, bias=False),
)
self.conv_transpose_4 = nn.Sequential(
nn.ConvTranspose2d(32, 2, kernel_size=4, stride=1, padding=2, bias=False),
)
self.density_matrix = DensityMatrix()
self.expectation = Expectation()
self.noise = nn.GaussianNoise(noise)
def forward(self, ops, inputs):
x = self.x(inputs)
x = self.conv_transpose_1(x)
x = self.conv_transpose_2(x)
x = self.conv_transpose_3(x)
x = self.conv_transpose_4(x)
x = self.density_matrix(x)
complex_ops = convert_to_complex_ops(ops)
prefactor = 1.0
x = self.expectation(complex_ops, x, prefactor)
x = self.noise(x)
return x
class DensityMatrix(nn.Module):
def __init__(self):
super(DensityMatrix, self).__init__()
def forward(self, inputs):
T = clean_cholesky(inputs)
return density_matrix_from_T(T)
def clean_cholesky(img):
real = img[:, 0, :, :]
imag = img[:, 1, :, :]
diag_all = torch.diag(imag)
diags = torch.linalg.diag(diag_all)
imag = imag - diags
imag = torch.linalg.band_part(imag, -1, 0)
real = torch.linalg.band_part(real, -1, 0)
T = torch.complex(real, imag)
return T
至于你的问题:
-
对于
x = self.conv_transpose_1(x)
返回一堆零的问题,可能是由于初始权重问题或激活函数的问题。你可以检查ConvTranspose2d层的权重初始化,确保权重不是零,或者尝试使用不同的激活函数。 -
由于
torch.linalg.cholesky
报错,表示输入矩阵不是正定的。你需要检查输入矩阵是否符合Cholesky分解的要求。 -
clean_cholesky
函数中的错误是因为torch.diag
需要一个二维矩阵,而你的img
似乎是一个三维矩阵。你需要确保img
的维度正确,以便使用torch.diag
。
希望这些信息对你有所帮助,如果你有进一步的问题或需要更多的帮助,请随时提问。
英文:
In my quest to build a cGAN transforming a code in tensorflow to pytorch, I stucked with the following:
def __init__(self, hilbert_size, num_points, noise=None):
super(Generator, self).__init__()
self.initializer = nn.init.normal_
ops = nn.Parameter(torch.empty(1, hilbert_size, hilbert_size, num_points * 2))
#inputs = torch.empty((1, 1296), requires_grad=True)
#inputs = torch.empty((1, num_points), requires_grad=True)
inputs = nn.Parameter(torch.empty((1, num_points)))
layer = nn.Linear(num_points, 16 * 16 * 2, bias=False)
#init.kaiming_uniform_(layer.weight, a=math.sqrt(5))
init.normal_(layer.weight, mean=0.0, std=0.02)
self.x = nn.Sequential(
layer,
nn.LeakyReLU(),
nn.Unflatten(1, (2,16,16))
#view((-1, 16, 16, 2))
)
'''
self.x = nn.Sequential(
nn.Linear(num_points, 16 * 16 * 2, bias=False),
nn.LeakyReLU(),
#nn.Reshape((16, 16, 2)),
nn.Unflatten(1, (2,16,16))
#nn.Unflatten(1, (16,16,2))
)
'''
self.conv_transpose_1 = nn.Sequential(
nn.ConvTranspose2d(2, 64, kernel_size=4, stride=1, padding=1, bias=False),
nn.InstanceNorm2d(64),
#nn.InstanceNorm2d(x.shape[1])
nn.LeakyReLU(),
)
self.conv_transpose_2 = nn.Sequential(
nn.ConvTranspose2d(64, 64, kernel_size=4, stride=1, padding=2, bias=False),
nn.InstanceNorm2d(64),
#nn.InstanceNorm2d(x.shape[1])
nn.LeakyReLU(),
)
self.conv_transpose_3 = nn.Sequential(
nn.ConvTranspose2d(64, 32, kernel_size=4, stride=1, padding=1, bias=False),
)
self.conv_transpose_4 = nn.Sequential(
nn.ConvTranspose2d(32, 2, kernel_size=4, stride=1, padding=2, bias=False),
)
self.density_matrix = DensityMatrix()
self.expectation = Expectation()
self.noise = nn.GaussianNoise(noise)
def forward(self, ops, inputs):
x = self.x(inputs)
x = self.conv_transpose_1(x)
x = self.conv_transpose_2(x)
x = self.conv_transpose_3(x)
x = self.conv_transpose_4(x)
x = self.density_matrix(x)
complex_ops = convert_to_complex_ops(ops)
prefactor = 1.0
x = self.expectation(complex_ops, x, prefactor)
x = self.noise(x)
return x
where:
class DensityMatrix(nn.Module):
"""
Density matrix layer that cleans the input matrix into a Cholesky matrix
and then constructs the density matrix for the state
"""
def __init__(self):
super(DensityMatrix, self).__init__()
def forward(self, inputs):
"""
The forward function which applies the Cholesky decomposition
Args:
inputs (`torch.Tensor`): a 4D real valued tensor (batch_size, hilbert_size, hilbert_size, 2)
representing batch_size random outputs from a neural network.
The last dimension is for separating the real and imaginary part
Returns:
dm (`torch.Tensor`): A 3D complex valued tensor (batch_size, hilbert_size, hilbert_size)
representing valid density matrices from a Cholesky decomposition of the
cleaned input
"""
T = clean_cholesky(inputs)
return density_matrix_from_T(T)
and
def clean_cholesky(img):
real = img[:, 0, :, :]
imag = img[:, 1, :, :]
#diag_all = torch.linalg.diag_part(imag, k=0, padding_value=0)
#diag_all = torch.diag(imag, k=0, padding_value=0)
diag_all = torch.diag(imag)
diags = torch.linalg.diag(diag_all)
imag = imag - diags
imag = torch.linalg.band_part(imag, -1, 0)
real = torch.linalg.band_part(real, -1, 0)
T = torch.complex(real, imag)
return T
The inputs are:
In [154]: inputs
Out[154]:
tensor([[1.9286e-39, 2.1122e-39, 2.2959e-39, ..., 4.5779e-41, 2.7162e+20,
4.5779e-41]], requires_grad=True)
And:
x = nn.Sequential(
layer,
nn.LeakyReLU(),
nn.Unflatten(1, (2,16,16))
#view((-1, 16, 16, 2))
)
Gives:
In [160]: x
Out[160]:
Sequential(
(0): Linear(in_features=1296, out_features=512, bias=False)
(1): LeakyReLU(negative_slope=0.01)
(2): Unflatten(dim=1, unflattened_size=(2, 16, 16))
)
But x = self.conv_transpose_1(x)
gives:
Out[250]:
tensor([[[[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]],
[[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]],
[[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]],
...,
[[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]],
[[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]],
[[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]]]], grad_fn=<LeakyReluBackward0>)
A bunch of zeros, and I have no idea why!!!
And when I use torch.linalg.cholesky
it gives the error:
_LinAlgError: linalg.cholesky: (Batch element 0): The factorization could not be completed because
the input is not positive-definite (the leading minor of order 1 is not positive-definite).
And if I use my defined clean_cholesky
function it gives another error:
ll_impl(self, *input, **kwargs)
Cell In[421], line 24, in DensityMatrix.forward(self, inputs)
10 def forward(self, inputs):
---> 24 T = clean_cholesky(inputs)
25 return density_matrix_from_T(T)
Cell In[426], line 7, in clean_cholesky(img)
3 imag = img[:, :, :, 1]
----> 7 diag_all = torch.diag(imag)
8 diags = torch.linalg.diag(diag_all)
10 imag = imag - diags
RuntimeError: matrix or a vector expected
imag is os the form:
In [541]: imag
Out[541]:
tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
grad_fn=<SliceBackward0>)
In tensorflow everything works fine and imag is:
In [58]: imag
Out[58]: <KerasTensor: shape=(None, 16, 16) dtype=float64 (created by layer 'tf.__operators__.getitem_3')>
But something is missing when I try to translate the code from tensorflow to pytorch but I really do not know what or where the error is.
This is is really important for me and any help would be very appreciated!
答案1
得分: 1
尝试首先进行以下操作:
- 修复
clean_cholesky
中的缩进 - 向
DensityMatrix
添加__init__
和forward
方法
[编辑] 我的意思是这个类需要一个forward
方法:
class DensityMatrix(nn.Module):
def forward(self, inputs):
T = clean_cholesky(inputs)
return density_matrix_from_T(T)
英文:
Try first to:
- Fix the indentation in
clean_cholesky
- Add
__init__
andforward
methods toDensityMatrix
[Edit] I mean that the class needs a forward
:
class DensityMatrix(nn.Module):
def forward(self, inputs):
T = clean_cholesky(inputs)
return density_matrix_from_T(T)
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论