HOG手写数字识别不起作用

如何解决HOG手写数字识别不起作用

我正在读一本OpenCV书中有关手写数字识别的一章，尽管我仔细阅读了一下，但我认为一切都得到了正确处理，但出现了错误public async Task<TestTable[]> GetAllEmployees() { IQueryable<TestTable> query = _context.TestTable; return await query.ToArrayAsync(); }。我尝试用Google搜索答案，似乎很多其他人遇到了非常相似的问题，但没有提供真正的答案。

任何人都可以阐明为什么此Expected 2D array,got 1D array instead方法不返回2D数组吗？我正在阅读一些文档，显然它默认情况下会返回一个平面1D数组，所以我不知道为什么这个feature.hog()方法抱怨为什么期望使用2D数组。再说一遍，我想读的这本书是我想在2015年发行的，所以也许有什么变化吗？

这是我要运行的文件：

classify.py

model.predict()

这是为此编写的自定义生猪模块：

hog.py

# -*- coding: utf-8 -*-
"""
Created on Tue Nov  3 13:01:39 2020

@author: User
"""


from __future__ import print_function
from sklearn.externals import joblib
from pyimagesearch.hog import HOG
from pyimagesearch import dataset
import argparse
import mahotas
import cv2

ap = argparse.ArgumentParser()
ap.add_argument('-m','--model',required=True,help='Path to model')
ap.add_argument('-i','--image',help='Path to image')
args=vars(ap.parse_args())

model = joblib.load(args['model'])

hog = HOG(orientations=18,pixelsPerCell=(10,10),cellsPerBlock=(1,1),normalize=True)

image = cv2.imread(args["image"])
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)

blurred = cv2.GaussianBlur(gray,(5,5),0)
edged = cv2.Canny(blurred,30,150)
(_,cnts,_) = cv2.findContours(edged.copy(),cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
cnts = sorted([(c,cv2.boundingRect(c)[0]) for c in cnts],key=lambda x: x[1])

for (c,_) in cnts:
    (x,y,w,h) = cv2.boundingRect(c)
    
    if w >= 7 and h>= 20:
        roi = gray[y:y+h,x:x+w]
        thresh = roi.copy()
        T = mahotas.thresholding.otsu(roi)
        thresh[thresh > T] = 255
        thresh = cv2.bitwise_not(thresh)
        
        thresh = dataset.deskew(thresh,72)
        thresh = dataset.center_extent(thresh,(72,72))
        
        cv2.imshow("thresh",thresh)
        
        hist = hog.describe(thresh)
        digit = model.predict(hist)[0] #this is where it errors
        print("I think that number is: {}".format(digit))
        
        cv2.rectangle(image,(x,y),(x+w,y+h),(0,255,0),1)
        cv2.putText(image,str(digit),(x-10,y-10),cv2.FONT_HERSHEY_SIMPLEX,1.2,2)
        cv2.imshow("image",image)
        cv2.waitKey(0)

这是为此产生“训练模型”的原因：

train.py

# -*- coding: utf-8 -*-
"""
Created on Tue Nov  3 11:22:38 2020

@author: User
"""

from skimage import feature

class HOG:
    def __init__(self,orientations=9,pixelsPerCell=(14,14),normalize=False):
        self.orientations = orientations
        self.pixelsPerCell = pixelsPerCell
        self.cellsPerBlock = cellsPerBlock
        self.normalize = normalize
    
    def describe(self,image):
        '''
        (2017-11-28) Update for skimage: In  scikit-image==0.12,the  
        normalise  parameter has been updated to  transform_sqrt . The  
        transform_sqrt  performs the exact same operation,only with a 
        different name. If you’re using an older version of  scikit-image  
        (again,before the v0.12 release),then you’ll want to change 
        transform_sqrt  to  normalise . In  scikit-image==0.15  the default 
        value of  block_norm="L1"  has been deprecated and changed to  
        block_norm="L2-Hys" . Therefore,for this lesson we’ll explicitly 
        specify  block_norm="L1" . Doing this will avoid it switching to  
        "L2-Hys"  with version updates without us knowing (and yielding 
        incorrect car logo identification results). You can read about L1 and 
        L2 norms here:
        https://gurus.pyimagesearch.com/lesson-sample-histogram-of-oriented-gradients-and-car-logo-recognition/#tour_modal
        '''
        hist = feature.hog(image,orientations=self.orientations,pixels_per_cell=self.pixelsPerCell,cells_per_block=self.cellsPerBlock,transform_sqrt =self.normalize,block_norm="L1")
        return hist

dataset.py

# -*- coding: utf-8 -*-
"""
Created on Tue Nov  3 11:57:26 2020

@author: User
"""

from sklearn.externals import joblib
from sklearn.svm import LinearSVC
from pyimagesearch.hog import HOG
from pyimagesearch import dataset
import argparse

ap = argparse.ArgumentParser()
ap.add_argument('-d','--dataset',help='Path to dataset')
ap.add_argument('-m',help='path to where model will be stored')
args=vars(ap.parse_args())

(digits,target) = dataset.load_digits(args['dataset'])

data = []

hog = HOG(orientations=9,normalize=True)

for image in digits:
    image = dataset.deskew(image,20)
    image = dataset.center_extent(image,(20,20))
    
    hist = hog.describe(image)
    data.append(hist)

model = LinearSVC(random_state=42)
model.fit(data,target)

joblib.dump(model,args['model'])

，如果需要的话，请使用此自定义# -*- coding: utf-8 -*- """ Created on Tue Nov 3 11:35:04 2020 @author: User """ from . import imutils import numpy as np import mahotas import cv2 def load_digits(datasetPath): data = np.genfromtxt(datasetPath,delimiter=',',dtype='uint8') target = data[:,0] data = data[:,1:].reshape(data.shape[0],28,28) return (data,target) def deskew(image,width): (h,w) = image.shape[:2] moments = cv2.moments(image) skew = moments['mu11'] / moments['mu02'] M = np.float32([ [1,skew,-0.5 * w * skew],[0,1,0]]) image = cv2.warpAffine(image,M,(w,h),flags = cv2.WARP_INVERSE_MAP | cv2.INTER_LINEAR) image = imutils.resize(image,width=width) return image def center_extent(image,size): (eW,eH) = size if image.shape[1] > image.shape[0]: image = imutils.resize(image,width=eW) else: image = imutils.resize(image,height=eH) extent = np.zeros((eH,eW),dtype = 'uint8') offsetX = (eW - image.shape[1]) // 2 offsetY = (eH - image.shape[0]) // 2 extent[offsetY:offsetY + image.shape[0],offsetX:offsetX + image.shape[1]] = image CM = mahotas.center_of_mass(extent) (cY,cX) = np.round(CM).astype('int32') (dX,dY) = ((size[0] // 2) - cX,(size[1] // 2) - cY) M = np.float32([[1,dX],dY]]) extent = cv2.warpAffine(extent,size) return extent模块

imutils.py

imutils

并且我正在使用此数据found here (the train.csv file)，并且通过此脚本将其减少为5000行：

# -*- coding: utf-8 -*-
"""
Created on Tue Sep 29 16:27:16 2020

@author: User
"""

import numpy as np
import cv2

def translate(image,x,y):
    M = np.float32([[1,x],y]])
    shifted = cv2.warpAffine(image,(image.shape[1],image.shape[0]))
    return shifted

def rotate(image,angle,center=None,scale=1.0):
    (h,w) = image.shape[:2]
    if not center:
        center = (w // 2,h // 2)
    M = cv2.getRotationMatrix2D(center,scale)
    rotated = cv2.warpAffine(image,h))
    return rotated

def resize(image,width=None,height=None,inter=cv2.INTER_AREA):
    dim = None
    (h,w) = image.shape[:2]
    
    if width is None and height is None:
        return image
    
    if width is None:
        r = height / float(h)
        dim = (int(w*r),height)
    
    else:
        r = width / float(w)
        dim = (width,int(h*r))
    
    resized = cv2.resize(image,dim,interpolation = inter)
    return resized

解决方法

我已经想通了一段时间，但现在有机会发帖，所以我想分享我的发现。

我认为当我试图“重塑”数组时，我实际上是在重塑错误的数组，这就是为什么它一直给我一个错误。

所以我想将我拥有的一维数组转换为二维数组，我选择了这一行：digit = model.predict(hist)[0]

并将其更改为：digit = model.predict(hist.reshape(1,-1))[0]

HOG手写数字识别不起作用

如何解决HOG手写数字识别不起作用

解决方法

相关推荐