2023年6月26日 02:31:02go评论103阅读模式

英文:

My CNN does not correctly predict images that are not from the dataset

问题

我想让我的卷积神经网络（CNN）预测我用鼠标绘制的数字。卷积神经网络在使用数据集和测试图像时效果良好，但在绘制的数字上失败了。我认为问题可能出在我绘制的图像缩放时，但我找不到解决方案。

卷积神经网络：

# 代码部分不需翻译

绘制数字并尝试预测的脚本：

# 代码部分不需翻译

我尝试过更改画布尺寸或者更改卷积神经网络的学习率。

英文:

I want my CNN to predict numbers that I draw with the mouse. LA CNN works fine with the dataset and test images but fails with drawn numbers. I think it may be a problem when rescaling the images I drew but I can't find a solution.

The CNN:

from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import  to_categorical
from keras import backend as K
K.set_value(model.optimizer.learning_rate, 0.001)
(train_images, train_labels), (test_images,test_labels) = mnist.load_data()
train_images = train_images.reshape((60000, 28, 28, 1))
train_images = train_images.astype(&quot;float32&quot;)/255
test_images = test_images.reshape((10000, 28, 28, 1))
test_images = test_images.astype(&quot;float32&quot;)/255
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)
model=models.Sequential()
model.add(layers.Conv2D(6,(5,5),activation=&quot;tanh&quot;,input_shape=(28,28,1)))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(16,(5,5),activation=&quot;tanh&quot;))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(120,(4,4),activation=&quot;tanh&quot;))
model.add(layers.Flatten())
model.add(layers.Dense(64,activation=&quot;tanh&quot;))
model.add(layers.Dense(10,activation=&quot;softmax&quot;))
optimizer = keras.optimizers.Adam(lr=0.01)
model.compile(optimizer=&quot;rmsprop&quot;, loss=&quot;categorical_crossentropy&quot;, metrics=[&quot;accuracy&quot;])
model.fit(train_images, train_labels, epochs=5, batch_size=64)
test_loss, test_acc = model.evaluate(test_images,test_labels)
model_json = model.to_json()
with open(&quot;model.json&quot;, &quot;w&quot;) as json_file:
    json_file.write(model_json)
# serializar los pesos a HDF5
model.save_weights(&quot;model.h5&quot;)
print(&quot;Modelo Guardado!&quot;)

The script where I draw the number and try to predict it

import sys
from tkinter import *
from PIL import Image, ImageDraw
from keras.models import model_from_json
import keras.utils as image
import numpy as np
drawing_area=&quot;&quot;
w=50
h=50
x,y=None,None
count=0
image_count=0
image_name=&quot;numero&quot;
pil_image=Image.new(&quot;1&quot;,(w,h),&quot;white&quot;)
draw=ImageDraw.Draw(pil_image)
#CARGAMOS EL MODELO
json_file = open(&#39;model.json&#39;, &#39;r&#39;)
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
loaded_model.load_weights(&quot;model.h5&quot;)
print(&quot;Cargado modelo desde disco.&quot;)
loaded_model.compile(loss=&#39;mean_squared_error&#39;, optimizer=&#39;adam&#39;, metrics=[&#39;binary_accuracy&#39;])
def graficar(event):
    global drawing_area,x,y,count,draw
    newx, newy= event.x, event.y
    if x is None:
        x,y=newx, newy
        return
    count+=1
    sys.stdout.write(&quot;revent count %d&quot; %count)
    drawing_area.create_line((x,y,newx,newy),width=5,smooth=True)
    draw.line((x,y,newx,newy),width=10)
    x,y=newx,newy
def graficar_finalizar(event):
    global x,y
    x,y=None, None
def salir(event):
    sys.exit()
def predecir(event):
    global pil_image, image_name, image_count
    image_count +=1
    file_name = image_name+str(image_count)+&quot;.jpg&quot;
    pil_image = pil_image.resize((28,28),Image.Resampling.LANCZOS)
    pil_image.save(file_name)
    img = image.load_img(file_name,color_mode=&quot;grayscale&quot;)
    img = image.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = np.vstack(
)
    classes = loaded_model.predict(img)
    print(np.argmax(classes))
def limpiar(event):
    global drawing_area, pil_image, draw
    drawing_area.delete(&quot;all&quot;)
    pil_image=Image.new(&quot;1&quot;,(w,h),&quot;white&quot;)
    draw=ImageDraw.Draw(pil_image)
def main():
    global drawing_area
    win=Tk()
    win.title(&quot;Lienzo&quot;)
    drawing_area=Canvas(win,width=w,height=h,bg=&quot;white&quot;)
    drawing_area.bind(&quot;&lt;B1-Motion&gt;&quot;,graficar)
    drawing_area.bind(&quot;&lt;ButtonRelease-1&gt;&quot;,graficar_finalizar)
    drawing_area.pack()
    b1=Button(win,text=&quot;Predecir&quot;,bg=&quot;white&quot;)
    b1.pack()
    b1.bind(&quot;&lt;Button-1&gt;&quot;,predecir)
    b2=Button(win,text=&quot;Limpiar&quot;,bg=&quot;white&quot;)
    b2.pack()
    b2.bind(&quot;&lt;Button-1&gt;&quot;,limpiar)
    b3 = Button(win, text=&quot;Cerrar&quot;, bg=&quot;white&quot;)
    b3.pack()
    b3.bind(&quot;&lt;Button-1&gt;&quot;, salir)
    win.mainloop()
if __name__==&quot;__main__&quot;:
    main()

I have tried changing the canvas dimensions or changing the CNN learning rate

答案1

得分: 1

首先，在你的代码中需要进行两个小的修改。

首先，你需要查看模型已经训练过的图像，并尝试重新创建与模型已训练的图像类似的图像。mnist 数据集中的图像具有黑色背景，其中数字以白色表示。

你的应用程序中绘制的图像具有白色背景，数字为黑色。你需要在你的 predicir 函数中添加一行 img = 255-img。下面是一个示例：

def predecir(event):
    global pil_image, image_name, image_count
    image_count +=1
    file_name = image_name+str(image_count)+".jpg"
    pil_image = pil_image.resize((28,28), Image.Resampling.LANCZOS)
    pil_image.save(file_name)
    img = image.load_img(file_name, color_mode="grayscale")
    img = image.img_to_array(img)
    img = 255-img
    img = np.expand_dims(img, axis=0)
    img = np.vstack(
)
    classes = loaded_model.predict(img)
    print(np.argmax(classes))

其次，你可能也注意到你的应用程序中绘制的数字看起来比数据集中的数字要粗。你可以尝试将 graficar 函数中的 drawing_area.create_line 的宽度从 5 减小到（比如 1 或 2）。在这里：

def graficar(event):
    global drawing_area, x, y, count, draw
    newx, newy = event.x, event.y
    if x is None:
        x, y = newx, newy
        return
    count += 1
    sys.stdout.write("revent count %d" % count)
    drawing_area.create_line((x, y, newx, newy), width=1, smooth=True)
    draw.line((x, y, newx, newy), width=10)
    x, y = newx, newy

在合并这些修改后，你的代码将按预期工作。作为示例，下面是一个数字 7（请原谅我的绘图技能）的示例图像。

英文:

You need to make two minor changes in your code.

Firstly, you need to have a look at the images that the model has been trained on, and you must try to recreate images similar to the ones that the model has been trained on. The images in the mnist dataset have a black background where the digit is represented in white.

The image that is being drawn in your applet has a white background with the digit in black. You need to add a line img = 255-img inside your predicir function. Here's an example:

def predecir(event):
    global pil_image, image_name, image_count
    image_count +=1
    file_name = image_name+str(image_count)+&quot;.jpg&quot;
    pil_image = pil_image.resize((28,28),Image.Resampling.LANCZOS)
    pil_image.save(file_name)
    img = image.load_img(file_name,color_mode=&quot;grayscale&quot;)
    img = image.img_to_array(img)
    img = 255-img
    img = np.expand_dims(img, axis=0)
    img = np.vstack(
)
    classes = loaded_model.predict(img)
    print(np.argmax(classes))

Secondly, you would have also observed that the digits that were being drawn in your applet look somewhat thicker than the ones in the dataset. You can try reducing the width of the drawing_area.create_line inside the graficar function from 5 to (say, 1 or 2). Here:

def graficar(event):
    global drawing_area,x,y,count,draw
    newx, newy= event.x, event.y
    if x is None:
        x,y=newx, newy
        return
    count+=1
    sys.stdout.write(&quot;revent count %d&quot; %count)
    drawing_area.create_line((x,y,newx,newy),width=1,smooth=True)
    draw.line((x,y,newx,newy),width=10)
    x,y=newx,newy

After incorporating these changes, your code is working as expected.
As an example below, I've shown it for the number 7 (please pardon my drawing skills).

通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库，让每个人都能够通过互相帮助和分享经验来进步。

我的卷积神经网络无法正确预测不在数据集中的图像。

问题

答案1

OpenCV polyLines() throws error: (-215:Assertion failed) p.checkVector(2, CV_32S) >= 0 in function 'polylines'

Kalman Filtering in Python

关于缺失的分页元素，需要一些爬取指导。

可以将文本文件的内容转储到一个 Python 列表中吗？

如何在Playwright视觉比较中屏蔽多个定位器？

在C++中，可以使用可变模板参数来检索类型的内部类型。

selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: stale element not found

Creating and opening a URL to log in to Website via Basic Auth with Robot Framework/Selenium (Python)

AG Grid 在上下文菜单中以大文本形式打开

What's the correct way to type hint an empty list as a literal in python?

如何在Highcharts Gantt中更改本地化的星期名称

如何在同一个流中使用多个过滤器和映射函数？

如何使用Map/Set来将代码优化到O(n)？

.NET MAUI Android在GitHub Actions上构建失败，错误代码为1。