YOLO模型未产生预期结果。

huangapple go评论96阅读模式
英文:

YOLO model isn't giving expected result

问题

I used a trained model found in github

model : SVHN

As given in readme.md,

my main.py:

  1. import torch
  2. import PIL.Image as Image
  3. import PIL.ImageEnhance as ImageEnhance
  4. import cv2
  5. import matplotlib.pyplot as plt
  6. import matplotlib.image as img
  7. import numpy as np
  8. from math import *
  9. model = torch.hub.load('icns-distributed-cloud/yolov5-svhn', 'svhn').fuse().eval()
  10. model = model.autoshape()
  11. img1 = Image.open(r"C:\Users\user1\Documents\third.jpg")
  12. # Prediction
  13. h, w = img1.size
  14. pred = []
  15. filter1 = ImageEnhance.Color(img1)
  16. img1 = filter1.enhance(0.0)
  17. img1 = img1.resize((h*10, w*10))
  18. img1.show()
  19. def predict(i, j, h, w):
  20. cropped_img = img1.crop((i, j, i+h, j+w))
  21. f = model(cropped_img)
  22. if f[0] != None and f[0][0][4] <= 0.4:
  23. predict(i, j-w, h, w)
  24. predict(i, j, h, w+w)
  25. predict(i-h, j, h, w)
  26. predict(i, j, h*2, w)
  27. if f[0] != None and f[0][0][4] >= 0.4:
  28. img2 = np.array(cropped_img)
  29. cv2.rectangle(img2, (round(f[0][0][0].item()), round(f[0][0][1].item())),
  30. (round(f[0][0][2].item()), round(f[0][0][3].item())), (255, 100, 0), 6)
  31. plt.imshow(img2)
  32. plt.title('class : '+str(f[0][0][5].item())+" \n confidence : "+str(f[0][0][4].item()))
  33. plt.show()
  34. return f
  35. for i in range(0, h*10, h):
  36. for j in range(0, w*10, w):
  37. f = predict(i, j, h, w)
  38. if f != None:
  39. pred.append(f)
  40. for i in pred:
  41. if i[0] != None:
  42. for x1, y1, x2, y2, conf, clas in i[0]:
  43. if conf > 0.3:
  44. print('box: ({}, {}), ({}, {})'.format(x1, y1, x2, y2))
  45. print('confidence : {}'.format(conf))
  46. print('class: {}'.format(int(clas)))
  47. ''''
  48. print((int(x1/10), int(y1/10)), (int(x2/10), int(y2/10)))
  49. cv2.rectangle(img2, (int(x1/10), int(y1/10)), (int(x2/10), int(y2/10)), (255, 0, 0), 3)
  50. plt.imshow(img2)
  51. plt.show()
  52. '''

The image which I used is :

YOLO模型未产生预期结果。

hence, obviously my expectation is 4,8,4,6.

Initially it detected only 4 as the image isn't clear. for this purpose, I converted RBG image to gray scale. resized the image, cropped and then resized.

but now, my output is :

  1. Using cache found in C:\Users\Sanmitha/.cache\torch\hub\icns-distributed-cloud_yolov5-svhn_master
  2. from n params module arguments
  3. 0 -1 1 3520 models.common.Focus [3, 32, 3]
  4. 1 -1 1 18560 models.common.Conv [32, 64, 3, 2]
  5. 2 -1 1 19904 models.common.BottleneckCSP [64, 64, 1]
  6. 3 -1 1 73984 models.common.Conv [64, 128, 3, 2]
  7. 4 -1 1 161152 models.common.BottleneckCSP [128, 128, 3]
  8. 5 -1 1 295424 models.common.Conv [128, 256, 3, 2]
  9. 6 -1 1 641792 models.common.BottleneckCSP [256, 256, 3]
  10. 7 -1 1 1180672 models.common.Conv [256, 512, 3, 2]
  11. 8 -1 1 656896 models.common.SPP [512, 512, [5, 9, 13]]
  12. 9 -1 1 1248768 models.common.BottleneckCSP [512, 512, 1, False]
  13. 10 -1 1 131584 models.common.Conv [512, 256, 1, 1]
  14. 11 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest']
  15. 12 [-1, 6] 1 0 models.common.Concat [1]
  16. 13 -1 1 378624 models.common.BottleneckCSP [512, 256, 1, False]
  17. 14 -1 1 33024 models.common.Conv [256, 128, 1, 1]
  18. 15 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest']
  19. 16 [-1, 4] 1 0 models.common.Concat [1]
  20. 17 -1 1 95104 models.common.BottleneckCSP [256, 128, 1, False]
  21. 18 -1 1 147712 models.common.Conv [128, 128, 3, 2]
  22. 19 [-1, 14] 1 0 models.common.Concat [1]
  23. 20 -1 1 313088 models.common.BottleneckCSP [256, 256, 1, False]
  24. 21 -1 1 590336 models.common.Conv [256, 256, 3, 2]
  25. 22 [-1, 10] 1 0 models.common.Concat [1]
  26. 23 -1 1 1248768 models.common.BottleneckCSP [512, 512, 1, False]
  27. 24 [17
  28. <details>
  29. <summary>英文:</summary>
  30. I used a trained model found in github
  31. model : [SVHN](https://github.com/icns-distributed-cloud/YOLOv5-SVHN)
  32. As given in readme.md,
  33. my main.py:

import torch
import PIL.Image as Image
import PIL.ImageEnhance as ImageEnhance
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as img
import numpy as np
from math import *

model = torch.hub.load('icns-distributed-cloud/yolov5-svhn', 'svhn').fuse().eval()
model = model.autoshape()
img1= Image.open(r"C:\Users\user1\Documents\third.jpg")

Prediction

h,w=img1.size
pred=[]
filter1=ImageEnhance.Color(img1)
img1=filter1.enhance(0.0)
img1=img1.resize((h10,w10))
img1.show()

def predict(i,j,h,w):
cropped_img = img1.crop((i, j, i+h, j+w))
f=model(cropped_img)
if f[0]!=None and f[0][0][4]<=0.4:
predict(i,j-w,h,w)
predict(i,j,h,w+w)
predict(i-h,j,h,w)
predict(i,j,h*2,w)
if f[0]!=None and f[0][0][4]>=0.4:
img2=np.array(cropped_img)
cv2.rectangle(img2,(round(f[0][0][0].item()),round(f[0][0][1].item())),(round(f[0][0][2].item()),round(f[0][0][3].item())),(255,100,0),6)
plt.imshow(img2)
plt.title('class : '+str(f[0][0][5].item())+" \n confidence : "+str(f[0][0][4].item()))
plt.show()
return f

for i in range(0,h10,h):
for j in range(0,w
10,w):
f=predict(i,j,h,w)
if f!=None:
pred.append(f)

for i in pred:
if i[0]!=None:
for x1, y1, x2, y2, conf, clas in i[0]:
if conf>0.3:
print('box: ({}, {}), ({}, {})'.format(x1, y1, x2, y2))
print('confidence : {}'.format(conf))
print('class: {}'.format(int(clas)))
'''
print((int(x1/10),int(y1/10)),(int(x2/10),int(y2/10)))
cv2.rectangle(img2,(int(x1/10),int(y1/10)),(int(x2/10),int(y2/10)),(255,0,0),3)
plt.imshow(img2)
plt.show()
'''

  1. The image which I used is :
  2. [![third.jpg](https://i.stack.imgur.com/IDbCb.jpg)](https://i.stack.imgur.com/IDbCb.jpg)
  3. hence, obviously my expectation is 4,8,4,6.
  4. Initially it detected only 4 as the image isn&#39;t clear. for this purpose, I converted RBG image to gray scale. resized the image, cropped and then resized.
  5. but now, my output is :

Using cache found in C:\Users\Sanmitha/.cache\torch\hub\icns-distributed-cloud_yolov5-svhn_master

  1. from n params module arguments

0 -1 1 3520 models.common.Focus [3, 32, 3]
1 -1 1 18560 models.common.Conv [32, 64, 3, 2]
2 -1 1 19904 models.common.BottleneckCSP [64, 64, 1]
3 -1 1 73984 models.common.Conv [64, 128, 3, 2]
4 -1 1 161152 models.common.BottleneckCSP [128, 128, 3]
5 -1 1 295424 models.common.Conv [128, 256, 3, 2]
6 -1 1 641792 models.common.BottleneckCSP [256, 256, 3]
7 -1 1 1180672 models.common.Conv [256, 512, 3, 2]
8 -1 1 656896 models.common.SPP [512, 512, [5, 9, 13]]
9 -1 1 1248768 models.common.BottleneckCSP [512, 512, 1, False]
10 -1 1 131584 models.common.Conv [512, 256, 1, 1]
11 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest']
12 [-1, 6] 1 0 models.common.Concat [1]
13 -1 1 378624 models.common.BottleneckCSP [512, 256, 1, False]
14 -1 1 33024 models.common.Conv [256, 128, 1, 1]
15 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest']
16 [-1, 4] 1 0 models.common.Concat [1]
17 -1 1 95104 models.common.BottleneckCSP [256, 128, 1, False]
18 -1 1 147712 models.common.Conv [128, 128, 3, 2]
19 [-1, 14] 1 0 models.common.Concat [1]
20 -1 1 313088 models.common.BottleneckCSP [256, 256, 1, False]
21 -1 1 590336 models.common.Conv [256, 256, 3, 2]
22 [-1, 10] 1 0 models.common.Concat [1]
23 -1 1 1248768 models.common.BottleneckCSP [512, 512, 1, False]
24 [17, 20, 23] 1 43152 models.yolo.Detect [11, [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]], [128, 256, 512]]
Model Summary: 191 layers, 7.28206e+06 parameters, 7.28206e+06 gradients

Fusing layers...
Model Summary: 140 layers, 7.27349e+06 parameters, 7.27349e+06 gradients
Adding autoShape...
C:\Users\user1\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\functional.py:504: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at ..\aten\src\ATen\native\TensorShape.cpp:3484.)
return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
box: (0.0, 0.19499744474887848), (137.242431640625, 128.22288513183594)
confidence : 0.30603668093681335
class: 1
box: (52.23253631591797, 0.0), (190.0, 70.11492156982422)
confidence : 0.41145217418670654
class: 2
box: (54.806610107421875, 3.3701179027557373), (190.0, 278.511474609375)
confidence : 0.4895976781845093
class: 7
box: (55.85855484008789, 0.3548659086227417), (190.0, 197.8501739501953)
confidence : 0.365851491689682
class: 8
box: (0.0, 0.0), (140.3123779296875, 216.81472778320312)
confidence : 0.3351562023162842
class: 4
box: (53.156036376953125, 0.0), (190.0, 72.2713851928711)
confidence : 0.40216246247291565
class: 7
box: (0.0, 0.0), (130.40765380859375, 213.34080505371094)
confidence : 0.3830447793006897
class: 6
box: (51.61797332763672, 0.0), (190.0, 70.91419982910156)
confidence : 0.31078973412513733
class: 7

  1. The detected images are
  2. [![figure1](https://i.stack.imgur.com/qr5AQ.png)](https://i.stack.imgur.com/qr5AQ.png)
  3. [![figure2](https://i.stack.imgur.com/pJlpd.png)](https://i.stack.imgur.com/pJlpd.png)
  4. [![figure3](https://i.stack.imgur.com/jGiY0.png)](https://i.stack.imgur.com/jGiY0.png)
  5. [![figure4](https://i.stack.imgur.com/oEOhy.png)](https://i.stack.imgur.com/oEOhy.png)
  6. [![figure5](https://i.stack.imgur.com/asRk8.png)](https://i.stack.imgur.com/asRk8.png)
  7. [![figure6](https://i.stack.imgur.com/nmi6H.png)](https://i.stack.imgur.com/nmi6H.png)
  8. [![figure7](https://i.stack.imgur.com/rhrbO.png)](https://i.stack.imgur.com/rhrbO.png)
  9. [![figure8](https://i.stack.imgur.com/gBEjo.png)](https://i.stack.imgur.com/gBEjo.png)
  10. [![figure9](https://i.stack.imgur.com/cAlWp.png)](https://i.stack.imgur.com/cAlWp.png)
  11. [![figure10](https://i.stack.imgur.com/iDhES.png)](https://i.stack.imgur.com/iDhES.png)
  12. [![figure11](https://i.stack.imgur.com/Axuqa.png)](https://i.stack.imgur.com/Axuqa.png)
  13. [![figure12](https://i.stack.imgur.com/dz5IN.png)](https://i.stack.imgur.com/dz5IN.png)
  14. [![figure13](https://i.stack.imgur.com/QDEad.png)](https://i.stack.imgur.com/QDEad.png)
  15. I don&#39;t know why this detects other things as digits with good confidence but fails to detect digits with good confidence...
  16. Kindly help me
  17. </details>
  18. # 答案1
  19. **得分**: 2
  20. 你可以尝试文本识别,类似于无需训练的OCR。只需运行您的图像并获取输出,然后可能添加一个简单的模糊匹配过滤器。
  21. <details>
  22. <summary>英文:</summary>
  23. You can try out text recognition, something like OCR which needs no training.
  24. Just run through your images and get the o/p and maybe add a simple fuzzy match filter.
  25. [![enter image description here][1]][1]
  26. [1]: https://i.stack.imgur.com/p9FXZ.jpg
  27. </details>
  28. # 答案2
  29. **得分**: 0
  30. 首先使用CRAFT:字符区域感知文本检测来识别图像中的文本。然后将裁剪的文本图像传递给OCR,比如Tesseract,以从图像中提取文本。
  31. <details>
  32. <summary>英文:</summary>
  33. First use CRAFT: Character-Region Awareness For Text detection to identify the texts in your image. Then pass the cropped text image to OCR like Tesseract to extract the texts from image.
  34. [![enter image description here][1]][1]
  35. [![enter image description here][2]][2]
  36. [![enter image description here][3]][3]
  37. [1]: https://i.stack.imgur.com/FLGtM.jpg
  38. [2]: https://i.stack.imgur.com/XAUz7.jpg
  39. [3]: https://i.stack.imgur.com/QN7wk.jpg
  40. </details>
  41. # 答案3
  42. **得分**: 0
  43. 你可以更新这段代码,它应该适用于你:
  44. ```python
  45. import torch
  46. import numpy as np
  47. from PIL import Image, ImageDraw
  48. # 下载并加载YOLOv5 SVHN模型
  49. model = torch.hub.load('icns-distributed-cloud/yolov5-svhn', 'svhn').fuse().eval()
  50. model = model.autoshape()
  51. # 预处理输入图像
  52. def preprocess_image(image_path):
  53. image = Image.open(image_path)
  54. image = image.resize((640, 640)) # 调整大小以适应模型的输入尺寸
  55. image = torch.from_numpy(np.array(image)).float() / 255.0 # 将像素值归一化为0到1之间
  56. image = image.permute(2, 0, 1).unsqueeze(0) # 添加批处理维度并重新排列通道
  57. return image
  58. # 执行推理
  59. def recognize_numbers(image_path):
  60. image = preprocess_image(image_path)
  61. with torch.no_grad():
  62. prediction = model(image, size=640)
  63. return prediction
  64. # 在图像副本上绘制边界框
  65. def draw_box(image_path, box, output_path):
  66. image = Image.open(image_path)
  67. original_width, original_height = image.size
  68. print(f"图像大小: {original_width}x{original_height}")
  69. draw = ImageDraw.Draw(image)
  70. x1, y1, x2, y2 = box
  71. x1 = int(x1 * original_width / 640)
  72. y1 = int(y1 * original_height / 640)
  73. x2 = int(x2 * original_width / 640)
  74. y2 = int(y2 * original_height / 640)
  75. print(f"新的可绘制边界框 ({x1},{y1},{x2},{y2})")
  76. draw.rectangle((x1, y1, x2, y2), outline='green', width=3)
  77. image.save(output_path)
  78. # 提供图像路径
  79. image_path = '/tmp/IDbCb.jpg'
  80. # 调用函数识别图像中的数字
  81. prediction = recognize_numbers(image_path)
  82. # 找到置信度最高的边界框
  83. highest_confidence = 0
  84. best_box = None
  85. best_class = 0
  86. for pred in prediction[0][0]:
  87. x1, y1, x2, y2, conf, cls = pred[:6].tolist()
  88. if conf > highest_confidence:
  89. highest_confidence = conf
  90. best_box = (int(x1), int(y1), int(x2), int(y2))
  91. best_class = cls
  92. if best_box:
  93. x1, y1, x2, y2 = best_box
  94. print('边界框: ({}, {}), ({}, {})'.format(x1, y1, x2, y2))
  95. print('类别: {}'.format(int(best_class)))
  96. print('最高置信度: {}'.format(highest_confidence))
  97. else:
  98. print('未找到置信度超过阈值的边界框。')
  99. output_path = '/tmp/image-1.png' # 根据需要更改输出路径和文件名
  100. draw_box(image_path, best_box, output_path)
英文:

you can update this code, it should work for you

  1. import torch
  2. import numpy as np
  3. from PIL import Image, ImageDraw
  4. # Download and load the YOLOv5 SVHN model
  5. model = torch.hub.load(&#39;icns-distributed-cloud/yolov5-svhn&#39;, &#39;svhn&#39;).fuse().eval()
  6. model = model.autoshape()
  7. # Preprocess the input image
  8. def preprocess_image(image_path):
  9. image = Image.open(image_path)
  10. image = image.resize((640, 640)) # Resize to the model&#39;s input size
  11. image = torch.from_numpy(np.array(image)).float() / 255.0 # Normalize pixel values between 0 and 1
  12. image = image.permute(2, 0, 1).unsqueeze(0) # Add batch dimension and rearrange channels
  13. return image
  14. # Perform inference
  15. def recognize_numbers(image_path):
  16. image = preprocess_image(image_path)
  17. with torch.no_grad():
  18. prediction = model(image, size=640)
  19. return prediction
  20. # Draw bounding box on image copy
  21. def draw_box(image_path, box, output_path):
  22. image = Image.open(image_path)
  23. original_width, original_height = image.size
  24. print(f&quot;image size: {original_width}x{original_height}&quot;)
  25. draw = ImageDraw.Draw(image)
  26. x1, y1, x2, y2 = box
  27. x1 = int(x1 * original_width / 640)
  28. y1 = int(y1 * original_height / 640)
  29. x2 = int(x2 * original_width / 640)
  30. y2 = int(y2 * original_height / 640)
  31. print(f&quot;new drawable box ({x1},{y1},{x2},{y2})&quot;)
  32. draw.rectangle((x1, y1, x2, y2), outline=&#39;green&#39;, width=3)
  33. image.save(output_path)
  34. # Provide the path to your image
  35. image_path = &#39;/tmp/IDbCb.jpg&#39;
  36. # Call the function to recognize the numbers in the image
  37. prediction = recognize_numbers(image_path)
  38. # Find the box with the highest confidence
  39. highest_confidence = 0
  40. best_box = None
  41. best_class = 0
  42. for pred in prediction[0][0]:
  43. x1, y1, x2, y2, conf, cls = pred[:6].tolist()
  44. if conf &gt; highest_confidence:
  45. highest_confidence = conf
  46. best_box = (int(x1), int(y1), int(x2), int(y2))
  47. best_class = cls
  48. if best_box:
  49. x1, y1, x2, y2 = best_box
  50. print(&#39;Box: ({}, {}), ({}, {})&#39;.format(x1, y1, x2, y2))
  51. print(&#39;Class: {}&#39;.format(int(best_class)))
  52. print(&#39;Highest Confidence: {}&#39;.format(highest_confidence))
  53. else:
  54. print(&#39;No box found with confidence above the threshold.&#39;)
  55. output_path = &#39;/tmp/image-1.png&#39; # Change the output path and file name as per your requirement
  56. draw_box(image_path, best_box, output_path)

huangapple
  • 本文由 发表于 2023年7月10日 19:26:06
  • 转载请务必保留本文链接:https://go.coder-hub.com/76653267.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定