英文:
Difference about "BinaryCrossentropy" and "binary_crossentropy" in tf.keras.losses?
问题
我正在使用TensorFlow 2.0和tf.GradientTape()训练模型,但我发现如果我使用tf.keras.losses.BinaryCrossentropy,模型的准确度为95%,但如果我使用tf.keras.losses.binary_crossentropy,准确度下降到75%。所以我对这两者之间的差异感到困惑。
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
def read_data():
red_wine = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv", sep=";")
white_wine = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv", sep=";")
red_wine["type"] = 1
white_wine["type"] = 0
wines = red_wine.append(white_wine)
return wines
def get_x_y(df):
x = df.iloc[:, :-1].values.astype(np.float32)
y = df.iloc[:, -1].values.astype(np.int32)
return x, y
def build_model():
inputs = layers.Input(shape=(12,))
dense1 = layers.Dense(12, activation="relu", name="dense1")(inputs)
dense2 = layers.Dense(9, activation="relu", name="dense2")(dense1)
outputs = layers.Dense(1, activation="sigmoid", name="outputs")(dense2)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
return model
def generate_dataset(df, batch_size=32, shuffle=True, train_or_test="train"):
x, y = get_x_y(df)
ds = tf.data.Dataset.from_tensor_slices((x, y))
if shuffle:
ds = ds.shuffle(10000)
if train_or_test == "train":
ds = ds.batch(batch_size)
else:
ds = ds.batch(len(df))
return ds
loss_object = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
def train_step(model, optimizer, x, y):
with tf.GradientTape() as tape:
pred = model(x, training=True)
loss = loss_object(y, pred)
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
def train_model(model, train_ds, epochs=10):
for epoch in range(epochs):
print(epoch)
for x, y in train_ds:
train_step(model, optimizer, x, y)
def main():
data = read_data()
train, test = train_test_split(data, test_size=0.2, random_state=23)
train_ds = generate_dataset(train, 32, True, "train")
test_ds = generate_dataset(test, 32, False, "test")
model = build_model()
train_model(model, train_ds, 10)
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy']
)
model.evaluate(test_ds)
main()
英文:
I'm training a model using TensorFlow 2.0 using tf.GradientTape(), but I find that the model's accuracy is 95%
if I use tf.keras.losses.BinaryCrossentropy
, but degrade to 75%
if I use tf.keras.losses.binary_crossentropy
. So I'm confused about the difference about the same metric here?
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
def read_data():
red_wine = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv", sep=";")
white_wine = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv", sep=";")
red_wine["type"] = 1
white_wine["type"] = 0
wines = red_wine.append(white_wine)
return wines
def get_x_y(df):
x = df.iloc[:, :-1].values.astype(np.float32)
y = df.iloc[:, -1].values.astype(np.int32)
return x, y
def build_model():
inputs = layers.Input(shape=(12,))
dense1 = layers.Dense(12, activation="relu", name="dense1")(inputs)
dense2 = layers.Dense(9, activation="relu", name="dense2")(dense1)
outputs = layers.Dense(1, activation = "sigmoid", name="outputs")(dense2)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
return model
def generate_dataset(df, batch_size=32, shuffle=True, train_or_test = "train"):
x, y = get_x_y(df)
ds = tf.data.Dataset.from_tensor_slices((x, y))
if shuffle:
ds = ds.shuffle(10000)
if train_or_test == "train":
ds = ds.batch(batch_size)
else:
ds = ds.batch(len(df))
return ds
# loss_object = tf.keras.losses.binary_crossentropy
loss_object = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
def train_step(model, optimizer, x, y):
with tf.GradientTape() as tape:
pred = model(x, training=True)
loss = loss_object(y, pred)
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
def train_model(model, train_ds, epochs=10):
for epoch in range(epochs):
print(epoch)
for x, y in train_ds:
train_step(model, optimizer, x, y)
def main():
data = read_data()
train, test = train_test_split(data, test_size=0.2, random_state=23)
train_ds = generate_dataset(train, 32, True, "train")
test_ds = generate_dataset(test, 32, False, "test")
model = build_model()
train_model(model, train_ds, 10)
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy']
)
model.evaluate(test_ds)
main()
答案1
得分: 3
以下是要翻译的内容:
"They should indeed work the same; BinaryCrossentropy
uses binary_crossentropy
, with difference apparent in docstring descriptions; former's intended for two class labels, whereas later supports an arbitrary class count. However, if passing in targets in expected format, both apply same preprocessing before calling backend's binary_crossentropy
, which does the actual computing.
The difference you observe is likely a reproducibility issue; ensure you set the random seed - see function below. For a more complete answer on reproducibility, see here.
Function
def reset_seeds(reset_graph_with_backend=None):
if reset_graph_with_backend is not None:
K = reset_graph_with_backend
K.clear_session()
tf.compat.v1.reset_default_graph()
print("KERAS AND TENSORFLOW GRAPHS RESET") # optional
np.random.seed(1)
random.seed(2)
tf.compat.v1.set_random_seed(3)
print("RANDOM SEEDS RESET") # optional
Usage:
import tensorflow as tf
import tensorflow.keras.backend as K
reset_seeds(K)
英文:
They should indeed work the same; BinaryCrossentropy
uses binary_crossentropy
, with difference apparent in docstring descriptions; former's intended for two class labels, whereas later supports an arbitrary class count. However, if passing in targets in expected format, both apply same preprocessing before calling backend's binary_crossentropy
, which does the actual computing.
The difference you observe is likely a reproducibility issue; ensure you set the random seed - see function below. For a more complete answer on reproducibility, see here.
<hr>
Function
def reset_seeds(reset_graph_with_backend=None):
if reset_graph_with_backend is not None:
K = reset_graph_with_backend
K.clear_session()
tf.compat.v1.reset_default_graph()
print("KERAS AND TENSORFLOW GRAPHS RESET") # optional
np.random.seed(1)
random.seed(2)
tf.compat.v1.set_random_seed(3)
print("RANDOM SEEDS RESET") # optional
<hr>
Usage:
import tensorflow as tf
import tensorflow.keras.backend as K
reset_seeds(K)
答案2
得分: 1
- 模型中
outputs
的形状是(None, 1)
,但提供的标签是(None, )
,这导致了与Python的广播机制意义不符。 - 在
tf.keras.losses.BinaryCrossentropy()
源代码中,在计算损失时,y_pred
和y_true
都通过名为squeeze_or_expand_dimensions
的函数处理,而在tf.keras.losses.binary_crossentropy
中则缺少此函数。 - 注意:确保输入数据和模型输出之间的形状一致。
英文:
Thanks, I find the reasons of the inconsistent accuracy:
-
The shape of
outputs
in the model is(None, 1)
, but the feeded label is(None, )
, which cause a wrong meaning with python's broadcast mechanism. -
In the source code of
tf.keras.losses.BinaryCrossentropy()
, while calculating the loss, bothy_pred
andy_true
are processed through a function calledsqueeze_or_expand_dimensions
, which is lacked intf.keras.losses.binary_crossentropy
. -
Note: Take care that whether the shape is consistent between input data and model outputs.
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论