英文:
My CNN does not correctly predict images that are not from the dataset
问题
我想让我的卷积神经网络(CNN)预测我用鼠标绘制的数字。卷积神经网络在使用数据集和测试图像时效果良好,但在绘制的数字上失败了。我认为问题可能出在我绘制的图像缩放时,但我找不到解决方案。
卷积神经网络:
# 代码部分不需翻译
绘制数字并尝试预测的脚本:
# 代码部分不需翻译
我尝试过更改画布尺寸或者更改卷积神经网络的学习率。
英文:
I want my CNN to predict numbers that I draw with the mouse. LA CNN works fine with the dataset and test images but fails with drawn numbers. I think it may be a problem when rescaling the images I drew but I can't find a solution.
The CNN:
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from keras import backend as K
K.set_value(model.optimizer.learning_rate, 0.001)
(train_images, train_labels), (test_images,test_labels) = mnist.load_data()
train_images = train_images.reshape((60000, 28, 28, 1))
train_images = train_images.astype("float32")/255
test_images = test_images.reshape((10000, 28, 28, 1))
test_images = test_images.astype("float32")/255
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)
model=models.Sequential()
model.add(layers.Conv2D(6,(5,5),activation="tanh",input_shape=(28,28,1)))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(16,(5,5),activation="tanh"))
model.add(layers.MaxPooling2D(2,2))
model.add(layers.Conv2D(120,(4,4),activation="tanh"))
model.add(layers.Flatten())
model.add(layers.Dense(64,activation="tanh"))
model.add(layers.Dense(10,activation="softmax"))
optimizer = keras.optimizers.Adam(lr=0.01)
model.compile(optimizer="rmsprop", loss="categorical_crossentropy", metrics=["accuracy"])
model.fit(train_images, train_labels, epochs=5, batch_size=64)
test_loss, test_acc = model.evaluate(test_images,test_labels)
model_json = model.to_json()
with open("model.json", "w") as json_file:
json_file.write(model_json)
# serializar los pesos a HDF5
model.save_weights("model.h5")
print("Modelo Guardado!")
The script where I draw the number and try to predict it
import sys
from tkinter import *
from PIL import Image, ImageDraw
from keras.models import model_from_json
import keras.utils as image
import numpy as np
drawing_area=""
w=50
h=50
x,y=None,None
count=0
image_count=0
image_name="numero"
pil_image=Image.new("1",(w,h),"white")
draw=ImageDraw.Draw(pil_image)
#CARGAMOS EL MODELO
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
loaded_model.load_weights("model.h5")
print("Cargado modelo desde disco.")
loaded_model.compile(loss='mean_squared_error', optimizer='adam', metrics=['binary_accuracy'])
def graficar(event):
global drawing_area,x,y,count,draw
newx, newy= event.x, event.y
if x is None:
x,y=newx, newy
return
count+=1
sys.stdout.write("revent count %d" %count)
drawing_area.create_line((x,y,newx,newy),width=5,smooth=True)
draw.line((x,y,newx,newy),width=10)
x,y=newx,newy
def graficar_finalizar(event):
global x,y
x,y=None, None
def salir(event):
sys.exit()
def predecir(event):
global pil_image, image_name, image_count
image_count +=1
file_name = image_name+str(image_count)+".jpg"
pil_image = pil_image.resize((28,28),Image.Resampling.LANCZOS)
pil_image.save(file_name)
img = image.load_img(file_name,color_mode="grayscale")
img = image.img_to_array(img)
img = np.expand_dims(img, axis=0)
img = np.vstack()
classes = loaded_model.predict(img)
print(np.argmax(classes))
def limpiar(event):
global drawing_area, pil_image, draw
drawing_area.delete("all")
pil_image=Image.new("1",(w,h),"white")
draw=ImageDraw.Draw(pil_image)
def main():
global drawing_area
win=Tk()
win.title("Lienzo")
drawing_area=Canvas(win,width=w,height=h,bg="white")
drawing_area.bind("<B1-Motion>",graficar)
drawing_area.bind("<ButtonRelease-1>",graficar_finalizar)
drawing_area.pack()
b1=Button(win,text="Predecir",bg="white")
b1.pack()
b1.bind("<Button-1>",predecir)
b2=Button(win,text="Limpiar",bg="white")
b2.pack()
b2.bind("<Button-1>",limpiar)
b3 = Button(win, text="Cerrar", bg="white")
b3.pack()
b3.bind("<Button-1>", salir)
win.mainloop()
if __name__=="__main__":
main()
I have tried changing the canvas dimensions or changing the CNN learning rate
答案1
得分: 1
首先,在你的代码中需要进行两个小的修改。
首先,你需要查看模型已经训练过的图像,并尝试重新创建与模型已训练的图像类似的图像。mnist
数据集中的图像具有黑色背景,其中数字以白色表示。
你的应用程序中绘制的图像具有白色背景,数字为黑色。你需要在你的 predicir
函数中添加一行 img = 255-img
。下面是一个示例:
def predecir(event):
global pil_image, image_name, image_count
image_count +=1
file_name = image_name+str(image_count)+".jpg"
pil_image = pil_image.resize((28,28), Image.Resampling.LANCZOS)
pil_image.save(file_name)
img = image.load_img(file_name, color_mode="grayscale")
img = image.img_to_array(img)
img = 255-img
img = np.expand_dims(img, axis=0)
img = np.vstack()
classes = loaded_model.predict(img)
print(np.argmax(classes))
其次,你可能也注意到你的应用程序中绘制的数字看起来比数据集中的数字要粗。你可以尝试将 graficar
函数中的 drawing_area.create_line
的宽度从 5 减小到(比如 1 或 2)。在这里:
def graficar(event):
global drawing_area, x, y, count, draw
newx, newy = event.x, event.y
if x is None:
x, y = newx, newy
return
count += 1
sys.stdout.write("revent count %d" % count)
drawing_area.create_line((x, y, newx, newy), width=1, smooth=True)
draw.line((x, y, newx, newy), width=10)
x, y = newx, newy
在合并这些修改后,你的代码将按预期工作。作为示例,下面是一个数字 7(请原谅我的绘图技能)的示例图像。
英文:
You need to make two minor changes in your code.
Firstly, you need to have a look at the images that the model has been trained on, and you must try to recreate images similar to the ones that the model has been trained on. The images in the mnist
dataset have a black background where the digit is represented in white.
The image that is being drawn in your applet has a white background with the digit in black. You need to add a line img = 255-img
inside your predicir
function. Here's an example:
def predecir(event):
global pil_image, image_name, image_count
image_count +=1
file_name = image_name+str(image_count)+".jpg"
pil_image = pil_image.resize((28,28),Image.Resampling.LANCZOS)
pil_image.save(file_name)
img = image.load_img(file_name,color_mode="grayscale")
img = image.img_to_array(img)
img = 255-img
img = np.expand_dims(img, axis=0)
img = np.vstack()
classes = loaded_model.predict(img)
print(np.argmax(classes))
Secondly, you would have also observed that the digits that were being drawn in your applet look somewhat thicker than the ones in the dataset. You can try reducing the width of the drawing_area.create_line
inside the graficar
function from 5 to (say, 1 or 2). Here:
def graficar(event):
global drawing_area,x,y,count,draw
newx, newy= event.x, event.y
if x is None:
x,y=newx, newy
return
count+=1
sys.stdout.write("revent count %d" %count)
drawing_area.create_line((x,y,newx,newy),width=1,smooth=True)
draw.line((x,y,newx,newy),width=10)
x,y=newx,newy
After incorporating these changes, your code is working as expected.
As an example below, I've shown it for the number 7 (please pardon my drawing skills).
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论