2020年1月6日 21:22:46go评论155阅读模式

英文:

Difference about "BinaryCrossentropy" and "binary_crossentropy" in tf.keras.losses?

问题

我正在使用TensorFlow 2.0和tf.GradientTape()训练模型，但我发现如果我使用tf.keras.losses.BinaryCrossentropy，模型的准确度为95%，但如果我使用tf.keras.losses.binary_crossentropy，准确度下降到75%。所以我对这两者之间的差异感到困惑。

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
def read_data():
    red_wine = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv", sep=";")
    white_wine = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv", sep=";")
    red_wine["type"] = 1
    white_wine["type"] = 0
    wines = red_wine.append(white_wine)
    return wines
def get_x_y(df):
    x = df.iloc[:, :-1].values.astype(np.float32)
    y = df.iloc[:, -1].values.astype(np.int32)
    return x, y
def build_model():
    inputs = layers.Input(shape=(12,))
    dense1 = layers.Dense(12, activation="relu", name="dense1")(inputs)
    dense2 = layers.Dense(9, activation="relu", name="dense2")(dense1)
    outputs = layers.Dense(1, activation="sigmoid", name="outputs")(dense2)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model
def generate_dataset(df, batch_size=32, shuffle=True, train_or_test="train"):
    x, y = get_x_y(df)
    ds = tf.data.Dataset.from_tensor_slices((x, y))
    if shuffle:
        ds = ds.shuffle(10000)
    if train_or_test == "train":
        ds = ds.batch(batch_size)
    else:
        ds = ds.batch(len(df))
    return ds
loss_object = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
def train_step(model, optimizer, x, y):
    with tf.GradientTape() as tape:
        pred = model(x, training=True)
        loss = loss_object(y, pred)
    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
def train_model(model, train_ds, epochs=10):
    for epoch in range(epochs):
        print(epoch)
        for x, y in train_ds:
            train_step(model, optimizer, x, y)
def main():
    data = read_data()
    train, test = train_test_split(data, test_size=0.2, random_state=23)
    train_ds = generate_dataset(train, 32, True, "train")
    test_ds = generate_dataset(test, 32, False, "test")
    model = build_model()
    train_model(model, train_ds, 10)
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy']
                  )
    model.evaluate(test_ds)
main()

英文:

I'm training a model using TensorFlow 2.0 using tf.GradientTape(), but I find that the model's accuracy is 95% if I use tf.keras.losses.BinaryCrossentropy, but degrade to 75% if I use tf.keras.losses.binary_crossentropy. So I'm confused about the difference about the same metric here?

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
def read_data():
red_wine = pd.read_csv(&quot;https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv&quot;, sep=&quot;;&quot;)
white_wine = pd.read_csv(&quot;https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv&quot;, sep=&quot;;&quot;)
red_wine[&quot;type&quot;] = 1
white_wine[&quot;type&quot;] = 0
wines = red_wine.append(white_wine)
return wines
def get_x_y(df):
x = df.iloc[:, :-1].values.astype(np.float32)
y = df.iloc[:, -1].values.astype(np.int32)
return x, y
def build_model():
inputs = layers.Input(shape=(12,))
dense1 = layers.Dense(12, activation=&quot;relu&quot;, name=&quot;dense1&quot;)(inputs)
dense2 = layers.Dense(9, activation=&quot;relu&quot;, name=&quot;dense2&quot;)(dense1)
outputs = layers.Dense(1, activation = &quot;sigmoid&quot;, name=&quot;outputs&quot;)(dense2)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
return model
def generate_dataset(df, batch_size=32, shuffle=True, train_or_test = &quot;train&quot;):
x, y = get_x_y(df)
ds = tf.data.Dataset.from_tensor_slices((x, y))
if shuffle:
ds = ds.shuffle(10000)
if train_or_test == &quot;train&quot;:
ds = ds.batch(batch_size)
else:
ds = ds.batch(len(df))
return ds
# loss_object = tf.keras.losses.binary_crossentropy
loss_object = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
def train_step(model, optimizer, x, y):
with tf.GradientTape() as tape:
pred = model(x, training=True)
loss = loss_object(y, pred)
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
def train_model(model, train_ds, epochs=10):
for epoch in range(epochs):
print(epoch)
for x, y in train_ds:
train_step(model, optimizer, x, y)
def main():
data = read_data()
train, test = train_test_split(data, test_size=0.2, random_state=23)
train_ds = generate_dataset(train, 32, True, &quot;train&quot;)
test_ds = generate_dataset(test, 32, False, &quot;test&quot;)
model = build_model()
train_model(model, train_ds, 10)
model.compile(loss=&#39;binary_crossentropy&#39;,
optimizer=&#39;adam&#39;,
metrics=[&#39;accuracy&#39;]
)
model.evaluate(test_ds)
main()

答案1

得分: 3

以下是要翻译的内容：

"They should indeed work the same; BinaryCrossentropy uses binary_crossentropy, with difference apparent in docstring descriptions; former's intended for two class labels, whereas later supports an arbitrary class count. However, if passing in targets in expected format, both apply same preprocessing before calling backend's binary_crossentropy, which does the actual computing.

The difference you observe is likely a reproducibility issue; ensure you set the random seed - see function below. For a more complete answer on reproducibility, see here.

Function

def reset_seeds(reset_graph_with_backend=None):
    if reset_graph_with_backend is not None:
        K = reset_graph_with_backend
        K.clear_session()
        tf.compat.v1.reset_default_graph()
        print("KERAS AND TENSORFLOW GRAPHS RESET")  # optional
    np.random.seed(1)
    random.seed(2)
    tf.compat.v1.set_random_seed(3)
    print("RANDOM SEEDS RESET")  # optional

Usage:

import tensorflow as tf
import tensorflow.keras.backend as K
reset_seeds(K)

英文:

They should indeed work the same; BinaryCrossentropy uses binary_crossentropy, with difference apparent in docstring descriptions; former's intended for two class labels, whereas later supports an arbitrary class count. However, if passing in targets in expected format, both apply same preprocessing before calling backend's binary_crossentropy, which does the actual computing.

The difference you observe is likely a reproducibility issue; ensure you set the random seed - see function below. For a more complete answer on reproducibility, see here.

<hr>

Function

def reset_seeds(reset_graph_with_backend=None):
    if reset_graph_with_backend is not None:
        K = reset_graph_with_backend
        K.clear_session()
        tf.compat.v1.reset_default_graph()
        print(&quot;KERAS AND TENSORFLOW GRAPHS RESET&quot;)  # optional
    np.random.seed(1)
    random.seed(2)
    tf.compat.v1.set_random_seed(3)
    print(&quot;RANDOM SEEDS RESET&quot;)  # optional

<hr>

Usage:

import tensorflow as tf
import tensorflow.keras.backend as K
reset_seeds(K)

答案2

得分: 1

模型中 outputs 的形状是 (None, 1)，但提供的标签是 (None, )，这导致了与Python的广播机制意义不符。
在 tf.keras.losses.BinaryCrossentropy() 源代码中，在计算损失时，y_pred 和 y_true 都通过名为 squeeze_or_expand_dimensions 的函数处理，而在 tf.keras.losses.binary_crossentropy 中则缺少此函数。
注意：确保输入数据和模型输出之间的形状一致。

英文:

Thanks, I find the reasons of the inconsistent accuracy:

The shape of outputs in the model is (None, 1), but the feeded label is (None, ), which cause a wrong meaning with python's broadcast mechanism.
In the source code of tf.keras.losses.BinaryCrossentropy(), while calculating the loss, both y_pred and y_true are processed through a function called squeeze_or_expand_dimensions, which is lacked in tf.keras.losses.binary_crossentropy.
Note: Take care that whether the shape is consistent between input data and model outputs.

通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库，让每个人都能够通过互相帮助和分享经验来进步。

“BinaryCrossentropy” 和 “binary_crossentropy” 在 tf.keras.losses 中的区别是什么？

问题

答案1

答案2

从UNV文件读取数据

How do I get Selenium in Python to select a dropdown that doesn't appear to have <select> and <option> tags?

抱歉，请求的URL ‘http://localhost:8000/index.html’ 导致错误：文件不存在。

从生成器中筛选异常

如何在Playwright视觉比较中屏蔽多个定位器？

在C++中，可以使用可变模板参数来检索类型的内部类型。

selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: stale element not found

Creating and opening a URL to log in to Website via Basic Auth with Robot Framework/Selenium (Python)

AG Grid 在上下文菜单中以大文本形式打开

What's the correct way to type hint an empty list as a literal in python?

如何在Highcharts Gantt中更改本地化的星期名称

如何在同一个流中使用多个过滤器和映射函数？

如何使用Map/Set来将代码优化到O(n)？

.NET MAUI Android在GitHub Actions上构建失败，错误代码为1。