英文:
Keras custom loss function with three network inputs and model.add_loss
问题
你好,这是你要翻译的部分:
"Hi I need some help with a custom loss function in Keras. I am basically building a UNET with a second input which takes on the weight maps like in the original UNET paper. However I am using this UNET for image synthesis and my loss function is a combination of perceptual loss and pixel loss computed using three inputs (input image, reconstructed image and weight maps). The UNET model is a standard UNET with encoder, decoder and skip connections."
以下是代码部分,不需要翻译:
def synthesis_unet_weights(pretrained_weights=None, input_shape=(SIZE_s, SIZE_s, 3), num_classes=1, is_training=True):
ip = Input(shape=input_shape)
weight_ip = Input(shape=input_shape[:2] + (num_classes,))
# UNET encoder with the first Conv2D layer taking input ip
# ...
# UNET decoder with the last layer up1
# ...
classify = Conv2D(num_classes, (1,1), activation='sigmoid')(up1)
if is_training:
model=Model(inputs=[ip, weight_ip], outputs=[classify])
model.add_loss(perceptual_loss_weight(ip,classify,weight_ip))
return model
else:
model = Model(inputs=[ip], outputs=[classify])
weight_ip=ip
model.add_loss(perceptual_loss_weight(ip,classify,weight_ip))
opt2 = tf.keras.optimizers.Adam(learning_rate=1e-3,clipnorm=1.0)
model.compile(optimizer=opt2)
return model
def perceptual_loss_weight(input_image , reconstruct_image, weights):
# ...
关于你的问题,你提到模型在训练时似乎没有产生正确的预测,而重构的输出图像与输入图像完全相同。这可能是由于你的损失函数实现有问题。你可以检查以下几个方面:
-
确保你的
perceptual_loss_weight
函数按预期工作,它需要计算感知损失和像素损失并返回它们的组合。你可以通过在训练期间将损失打印出来来检查这一点。 -
确保你的感知损失部分正常运行,包括特征提取(通过
LossModel
)和特征之间的损失计算。 -
检查输入数据是否正确传递给模型。确保训练数据包括输入图像和权重图像,并且它们被正确传递到模型中。
-
如果模型在训练期间没有正确工作,你可能需要调整超参数,例如学习率、权重等,以获得更好的性能。
最后,确保你的模型和损失函数的架构与你的任务相匹配。可能需要根据任务的要求进行一些微调。
英文:
Hi I need some help with a custom loss function in Keras. I am basically building a UNET with a second input which takes on the weight maps like in the original UNET paper.
However I am using this UNET for image synthesis and my loss function is a combination of perceptual loss and pixel loss computed using three inputs (input image, reconstructed image and weight maps). The UNET model is a standard UNET with encoder, decoder and skip connections.
Below is my code for the network and for the loss function:
def synthesis_unet_weights(pretrained_weights=None, input_shape=(SIZE_s, SIZE_s, 3), num_classes=1, is_training=True):
ip = Input(shape=input_shape)
weight_ip = Input(shape=input_shape[:2] + (num_classes,))
UNET encoder with the first Conv2D layer taking input ip
#---------------------------------------------------------------------------------------------------------------------------
center = Conv2D(1024, (3,3),padding='same', activation='relu', kernel_initializer=initializer)(pool4)
center = Conv2D(1024, (3,3),padding='same', activation='relu', kernel_initializer=initializer)(center)
#---------------------------------------------------------------------------------------------------------------------------
UNET decoder with the last layer up1
classify = Conv2D(num_classes, (1,1), activation='sigmoid')(up1)
if is_training:
model=Model(inputs=[ip, weight_ip], outputs=[classify])
model.add_loss(perceptual_loss_weight(ip,classify,weight_ip))
return model
else:
model = Model(inputs=[ip], outputs=[classify])
weight_ip=ip
model.add_loss(perceptual_loss_weight(ip,classify,weight_ip))
opt2 = tf.keras.optimizers.Adam(learning_rate=1e-3,clipnorm=1.0)
model.compile(optimizer=opt2)
return model
return model
def perceptual_loss_weight(input_image , reconstruct_image, weights):
input_image = clip_0_1(input_image)
reconstruct_image = tf.concat((reconstruct_image,reconstruct_image,reconstruct_image),axis=-1)
reconstruct_image = clip_0_1(reconstruct_image)
weights = tf.concat((weights,weights,weights),axis=-1)
weights = clip_0_1(weights)
h1_list = LossModel(input_image)
h2_list = LossModel(reconstruct_image)
rc_loss = 0.0
for h1, h2, weight in zip(h1_list, h2_list, selected_layer_weights):
h1 = K.batch_flatten(h1)
h2 = K.batch_flatten(h2)
rc_loss = rc_loss + weight * K.sum(K.square(h1 - h2), axis=-1)
pixel_loss = K.sum(K.square(K.batch_flatten(weights)*K.batch_flatten(input_image) - K.batch_flatten(weights)*K.batch_flatten(reconstruct_image)),axis=1)
return rc_loss+pixel_loss
The weight input is used only for the loss function during training. I managed to train the model (compiling with loss=None) but it is not predicting what it should predict. It looks like the input is just passed through the network (without any modification) directly to the output. The reconstructed output image looks exactly the same as the input image.
答案1
得分: 1
好的,以下是您提供的代码部分的中文翻译:
好的,我找到了概念上的错误。我将输入图像提供给了 loss 函数的 input_image 输入,但实际上应该是 y_true 标签。一个可能的解决方案是向网络提供额外的输入“ip_labels”,这些输入是感知损失 @tf.function 需要的 y_true 标签。下面是作为自定义损失函数的工作解决方案,具有虚拟损失层:
def MyLoss2(input_image, reconstruct_image, weight_ip):
@tf.function
def perceptual_loss(input_image, reconstruct_image):
input_image = clip_0_1(input_image)
reconstruct_image = clip_0_1(reconstruct_image)
weights = clip_0_1(weight_ip)
h1_list = LossModel(input_image)
h2_list = LossModel(reconstruct_image)
rc_loss = 0.0
for h1, h2, weight in zip(h1_list, h2_list, selected_layer_weights):
h1 = K.batch_flatten(h1)
h2 = K.batch_flatten(h2)
rc_loss = rc_loss + weight * K.sum(K.square(h1 - h2), axis=-1)
pixel_loss = K.sum(K.square(K.batch_flatten(weights)*K.batch_flatten(input_image) - K.batch_flatten(weights)*K.batch_flatten(reconstruct_image)),axis=1)
return rc_loss + pixel_loss
return perceptual_loss(input_image, reconstruct_image)
损失层的实现如下:
class DummyLayer(Layer):
def __init__(self, is_training):
super().__init__(is_training)
self.is_training=is_training
def get_config(self):
config = super().get_config()
config.update({
"is_training": self.is_training,
})
return config
def call(self, inputs,is_training):
ip,classify,weight_ip = tf.unstack(inputs,axis=-1)
self.add_loss(MyLoss2(ip, classify, weight_ip))
return inputs
合成 UNET 具有额外的输入 ip_labels:
def synthesis_unet_weights(pretrained_weights=None, input_shape=(SIZE_s, SIZE_s, 3), num_classes=1, is_training=True):
ip = Input(shape=input_shape)
ip_labels = Input(shape=input_shape)
weight_ip = Input(shape=input_shape[:2] + (num_classes,))
down1 = Conv2D(64, (3,3),padding='same', activation=LeakyReLU(alpha=0.3), kernel_initializer=initializer)(ip)
UNET 编码器带有跳跃连接
center = Conv2D(1024, (3,3),padding='same', activation='relu', kernel_initializer=initializer)(pool4)
center = Conv2D(1024, (3,3),padding='same', activation='relu', kernel_initializer=initializer)(center)
UNET 解码器带有最后一层 up1
classify = Conv2D(num_classes, (1,1), activation='sigmoid')(up1)
如果正在训练:
data=tf.stack([ip_labels,(tf.concat((classify,classify,classify),axis=3)),
(tf.concat((weight_ip,weight_ip,weight_ip),axis=3))],axis=-1)
classify = DummyLayer(is_training=True)(data, is_training=True)
inp, classify, weight_inp=tf.unstack(classify,axis=-1)
model=Model(inputs=[ip, ip_labels, weight_ip], outputs=[classify])
opt = tf.keras.optimizers.Adam(learning_rate=1e-3,clipnorm=1.0)
model.compile(optimizer=opt, metrics=['mse','mae'])
return model
else:
data=tf.stack([ip,(tf.concat((classify,classify,classify),axis=3)), ip],axis=-1)
classify=DummyLayer(is_training=False)(data, is_training=False)
inp, classify, weight_inp=tf.unstack(classify,axis=-1)
model = Model(inputs=[ip], outputs=[classify])
opt = tf.keras.optimizers.Adam(learning_rate=1e-3,clipnorm=1.0)
model.compile(optimizer=opt)
return model
return model
进行训练:
model=synthesis_unet_weights()
model.fit([input_images, labels, weight_maps], labels)
我知道使用堆叠和解堆叠张量的代码不够优雅,但它是有效的。
英文:
Okay, I have found the conceptual mistake, I was feeding the input images for input_image input to the loss function. But actually those should be y_true labels. One possible solution is to feed the network an additional input “ip_labels” which are the y_true required by the perceptual loss @tf.function. Below is the working solution written as a custom loss function with a dummy loss layer:
def MyLoss2(input_image, reconstruct_image, weight_ip):
@tf.function
def perceptual_loss(input_image, reconstruct_image):
input_image = clip_0_1(input_image)
reconstruct_image = clip_0_1(reconstruct_image)
weights = clip_0_1(weight_ip)
h1_list = LossModel(input_image)
h2_list = LossModel(reconstruct_image)
rc_loss = 0.0
for h1, h2, weight in zip(h1_list, h2_list, selected_layer_weights):
h1 = K.batch_flatten(h1)
h2 = K.batch_flatten(h2)
rc_loss = rc_loss + weight * K.sum(K.square(h1 - h2), axis=-1)
pixel_loss = K.sum(K.square(K.batch_flatten(weights)*K.batch_flatten(input_image) - K.batch_flatten(weights)*K.batch_flatten(reconstruct_image)),axis=1)
return rc_loss + pixel_loss
return perceptual_loss(input_image, reconstruct_image)
The loss layer is implemented as follows:
class DummyLayer(Layer):
def __init__(self, is_training):
super().__init__(is_training)
self.is_training=is_training
def get_config(self):
config = super().get_config()
config.update({
"is_training": self.is_training,
})
return config
def call(self, inputs,is_training):
ip,classify,weight_ip = tf.unstack(inputs,axis=-1)
self.add_loss(MyLoss2(ip, classify, weight_ip))
return inputs
The synthesis UNET has an additional input ip_labels:
def synthesis_unet_weights(pretrained_weights=None, input_shape=(SIZE_s, SIZE_s, 3), num_classes=1, is_training=True):
ip = Input(shape=input_shape)
ip_labels = Input(shape=input_shape)
weight_ip = Input(shape=input_shape[:2] + (num_classes,))
down1 = Conv2D(64, (3,3),padding='same', activation=LeakyReLU(alpha=0.3), kernel_initializer=initializer)(ip)
UNET encoder with skips
center = Conv2D(1024, (3,3),padding='same', activation='relu', kernel_initializer=initializer)(pool4)
center = Conv2D(1024, (3,3),padding='same', activation='relu', kernel_initializer=initializer)(center)
UNET decoder with the last layer up1
classify = Conv2D(num_classes, (1,1), activation='sigmoid')(up1)
if is_training:
data=tf.stack([ip_labels,(tf.concat((classify,classify,classify),axis=3)),
(tf.concat((weight_ip,weight_ip,weight_ip),axis=3))],axis=-1)
classify = DummyLayer(is_training=True)(data, is_training=True)
inp, classify, weight_inp=tf.unstack(classify,axis=-1)
model=Model(inputs=[ip, ip_labels, weight_ip], outputs=[classify])
opt = tf.keras.optimizers.Adam(learning_rate=1e-3,clipnorm=1.0)
model.compile(optimizer=opt, metrics=['mse','mae'])
return model
else:
data=tf.stack([ip,(tf.concat((classify,classify,classify),axis=3)), ip],axis=-1)
classify=DummyLayer(is_training=False)(data, is_training=False)
inp, classify, weight_inp=tf.unstack(classify,axis=-1)
model = Model(inputs=[ip], outputs=[classify])
opt = tf.keras.optimizers.Adam(learning_rate=1e-3,clipnorm=1.0)
model.compile(optimizer=opt)
return model
return model
To train :
model=synthesis_unet_weights()
model.fit([input_images, labels, weight_maps], labels)
I know the code with stacking and unstacking tensors is not particularly elegant but it is working.
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论