
huangapple go评论89阅读模式

How to extract text from image after applying contour in python?



"So I have applied contouring on a big image and reached the following cropped part of the image:



But now without using any machine learning model, how do I actually get the image to a text variable? I came to know about template matching but I do not understand how do I proceed from here. I do have images of letters and numbers (named according to their image value) stored in a directory, but how do I match each of them and get the text as a string? I don't want to use any ML model or library like pyTesseract.

I would appreciate any help.


The code I have tried for template matching.

def templateMatch(image):
    path = "location"

    for image_path in os.listdir(path + "/characters-images"):
        template = cv2.imread(os.path.join(path, "characters-images", image_path))
        template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)

        template = template.astype(np.uint8)
        image = image.astype(np.uint8)

        res = cv2.matchTemplate(template, image, cv2.TM_SQDIFF_NORMED)
        mn, _, mnLoc, _ = cv2.minMaxLoc(res)

        if res is not None:
            return image_path.replace(".bmp", "")

def match(image):
    plate = ""
    # mask = np.zeros(image.shape, dtype=np.uint8)
    # print(image.shape)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # print(image.shape)
    # print(image)
    thresh = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

    cnts = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    (cnts, _) = contours.sort_contours(cnts, method="left-to-right")

    for con in cnts:
        area = cv2.contourArea(con)

        if 800 > area > 200:
            x, y, w, h = cv2.boundingRect(con)
            # cv2.drawContours(mask, [c], 1, (255, 0, 0), 2)
            temp = thresh[y:y+h, x:x+w]

            character = templateMatching(temp)

            if character is not None:
                plate += character

    return plate



So I have applied contouring on a big image and reached the following cropped part of the image:



But now without using any machine learning model, how do I actually get the image to a text variable? I came to know about template matching but I do not understand how do I proceed from here. I do have images of letters and numbers (named according to their image value) stored in a directory, but how do I match each of them and get the text as a string? I don't want to use any ML model or library like pyTesseract.

I would appreciate any help.


The code I have tried for template matching.

def templateMatch(image):
    path = "location"

    for image_path in os.listdir(path + "/characters-images"):
        template = cv2.imread(os.path.join(path, "characters-images", image_path))
        template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)

        template = template.astype(np.uint8)
        image = image.astype(np.uint8)

        res = cv2.matchTemplate(template, image, cv2.TM_SQDIFF_NORMED)
        mn, _, mnLoc, _ = cv2.minMaxLoc(res)

        if res is not None:
            return image_path.replace(".bmp", "")

def match(image):
    plate = ""
    # mask = np.zeros(image.shape, dtype=np.uint8)
    # print(image.shape)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # print(image.shape)
    # print(image)
    thresh = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

    cnts = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    (cnts, _) = contours.sort_contours(cnts, method="left-to-right")

    for con in cnts:
        area = cv2.contourArea(con)

        if 800 > area > 200:
            x, y, w, h = cv2.boundingRect(con)
            # cv2.drawContours(mask, [c], 1, (255, 0, 0), 2)
            temp = thresh[y:y+h, x:x+w]

            character = templateMatching(temp)

            if character is not None:
                plate += character

    return plate


得分: 3


模板匹配用于在给定模板的情况下定位图像中的对象,而不是从图像中提取文本。将模板与图像中对象的位置进行匹配将无法获取文本作为字符串。有关如何应用动态比例变化模板匹配的示例,请参考如何隔离轮廓内的所有内容、缩放它并测试与图像的相似性?Python OpenCV线检测以检测图像中的X符号。我不明白为什么不想使用OCR库。如果要将文本提取为字符串变量,您应该使用某种深度/机器学习技术。PyTesseract可能是最简单的方法。这里是使用PyTesseract的解决方案:





我们使用--psm 6配置选项告诉Pytesseract假设一块统一的文本。查看这里以获取更多配置选项。Pytesseract的结果:

XS NB 23


import cv2
import numpy as np
import pytesseract

pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

# Load image, create mask, grayscale, Otsu's threshold
image = cv2.imread('1.png')
mask = np.zeros(image.shape, dtype=np.uint8)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

# Filter for ROI using contour area and aspect ratio
cnts = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    area = cv2.contourArea(c)
    peri = cv2.arcLength(c, True)
    approx = cv2.approxPolyDP(c, 0.05 * peri, True)
    x,y,w,h = cv2.boundingRect(approx)
    aspect_ratio = w / float(h)
    if area > 2000 and aspect_ratio > .5:
        mask[y:y+h, x:x+w] = image[y:y+h, x:x+w]

# Perform OCR with Pytesseract
data = pytesseract.image_to_string(mask, lang='eng', config='--psm 6')

cv2.imshow('thresh', thresh)
cv2.imshow('mask', mask)


&gt; How do I actually get the image to a text variable? I came to know about template matching but I do not understand how do I proceed from here.

Template matching is used to locate a object in an image given a template, ***not*** to extract text from an image. Matching a template with the position of the object in the image will not help to get the text as a string. For examples on how to apply dynamic scale variant template matching, take a look at [how to isolate everything inside of a contour, scale it, and test the similarity to an image?](https://stackoverflow.com/questions/59401389/how-to-isolate-everything-inside-of-a-contour-scale-it-and-test-the-similarity/59402625) and [Python OpenCV line detection to detect X symbol in image](https://stackoverflow.com/questions/58837175/python-opencv-line-detection-to-detect-x-symbol-in-image). I don&#39;t understand why would wouldn&#39;t want to use an OCR library. If you want to extract text from the image as a string variable, you should use some type of deep/machine learning. PyTesseract is probably the easiest. Here&#39;s a solution using PyTesseract


The idea is to obtain a binary image using Otsu&#39;s threshold then perform contour area and aspect ratio filtering to extract the letter/number ROIs. From here we use Numpy slicing to crop each ROI onto a blank mask then apply OCR using Pytesseract. Here&#39;s a visualization of each step:

Binary image

&lt;img src=&quot;https://i.stack.imgur.com/aZ2l3.png&quot; width=&quot;450&quot;&gt;

Detected ROIs highlighted in green

&lt;img src=&quot;https://i.stack.imgur.com/8BQbA.png&quot; width=&quot;450&quot;&gt;

Isolated ROIs on a blank mask ready for OCR

&lt;img src=&quot;https://i.stack.imgur.com/0DXOS.png&quot; width=&quot;450&quot;&gt;

We use the `--psm 6` configuration option to tell Pytesseract to assume a uniform block of text. Look [here for more configuration options](https://stackoverflow.com/questions/44619077/pytesseract-ocr-multiple-config-options). Result from Pytesseract:

&gt; XS NB 23


import cv2
import numpy as np
import pytesseract

pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

Load image, create mask, grayscale, Otsu's threshold

image = cv2.imread('1.png')
mask = np.zeros(image.shape, dtype=np.uint8)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)1

Filter for ROI using contour area and aspect ratio

cnts = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts1
for c in cnts:
area = cv2.contourArea(c)
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.05 * peri, True)
x,y,w,h = cv2.boundingRect(approx)
aspect_ratio = w / float(h)
if area > 2000 and aspect_ratio > .5:
mask[y:y+h, x:x+w] = image[y:y+h, x:x+w]

Perfrom OCR with Pytesseract

data = pytesseract.image_to_string(mask, lang='eng', config='--psm 6')

cv2.imshow('thresh', thresh)
cv2.imshow('mask', mask)

  [1]: https://i.stack.imgur.com/aZ2l3.png
  [2]: https://i.stack.imgur.com/8BQbA.png
  [3]: https://i.stack.imgur.com/0DXOS.png


# 答案2
**得分**: 0



An option is to consider the bounding box around the characters and to compute the correlation score between a character at hand and those in the training set. You will keep the largest correlation score. (One of SAD, SSD, normalized grayscale correlation or just Hamming distance if your work on a binary image).

You will need to develop a suitable strategy to ensure that the tested characters and the learnt characters have compatible sizes and are properly overlaid.


  • 本文由 发表于 2020年1月6日 19:58:08
  • 转载请务必保留本文链接:https://go.coder-hub.com/59611711.html



:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:
