一、亮出效果

二、實現步驟
基本思路
其實,搞定兩點就成,第一是能識別數字,第二是能切分數字。
-
對於影像識別,一般的套路是下面這樣的(CNN卷積神經網路):

-
對於影像切割,一般的套路是下面的這樣(橫向縱向投影法):

2.1 準備資料
對於男友,找一個油嘴滑舌的花花公子,不如找一個悶葫蘆IT男,親手把他培養成你期望的樣子。
索引:0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
字元:0 1 2 3 4 5 6 7 8 9 = + - × ÷

2.1.1 準備字型

2.1.2 生成圖片
from __future__ import print_function
from PIL import Image
from PIL import ImageFont
from PIL import ImageDraw
import os
import shutil
import time
# %% 要生成的文字
label_dict = {0:
'0'
, 1:
'1'
, 2:
'2'
, 3:
'3'
, 4:
'4'
, 5:
'5'
, 6:
'6'
, 7:
'7'
, 8:
'8'
, 9:
'9'
, 10:
'='
, 11:
'+'
, 12:
'-'
, 13:
'×'
, 14:
'÷'
}
# 文字對應的資料夾,給每一個分類建一個檔案
for
value,char
in
label_dict.items():
train_images_dir =
"dataset"
+
"/"
+str(value)
if
os.path.isdir(train_images_dir):
shutil.rmtree(train_images_dir)
os.makedirs(train_images_dir)
# %% 生成圖片
def makeImage(label_dict, font_path, width=24, height=24, rotate = 0):
# 從字典中取出鍵值對
for
value,char
in
label_dict.items():
# 建立一個黑色背景的圖片,大小是24*24
img = Image.new(
"RGB"
, (width, height),
"black"
)
draw = ImageDraw.Draw(img)
# 載入一種字型,字型大小是圖片寬度的90%
font = ImageFont.truetype(font_path, int(width*0.9))
# 獲取字型的寬高
font_width, font_height = draw.textsize(char, font)
# 計算字型繪製的x,y座標,主要是讓文字畫在圖示中心
x = (width - font_width-font.getoffset(char)[0]) / 2
y = (height - font_height-font.getoffset(char)[1]) / 2
# 繪製圖片,在那裡畫,畫啥,什麼顏色,什麼字型
draw.text((x,y), char, (255, 255, 255), font)
# 設定圖片傾斜角度
img = img.rotate(rotate)
# 命名檔案儲存,命名規則:dataset/編號/img-編號_r-選擇角度_時間戳.png
time_value = int(round(time.time() * 1000))
img_path =
"dataset/{}/img-{}_r-{}_{}.png"
.format(value,value,rotate,time_value)
img.save(img_path)
# %% 存放字型的路徑
font_dir =
"./fonts"
for
font_name
in
os.listdir(font_dir):
# 把每種字型都取出來,每種字型都生成一批圖片
path_font_file = os.path.join(font_dir, font_name)
# 傾斜角度從-10到10度,每個角度都生成一批圖片
for
k
in
range(-10, 10, 1):
# 每個字元都生成圖片
makeImage(label_dict, path_font_file, rotate = k)
draw.text((x,y), char, (255, 255, 255), font)



2.2 訓練資料
2.2.1 構建模型
你先看程式碼,外行感覺好深奧,內行偷偷地笑。
# %% 匯入必要的包
import tensorflow as tf
import numpy as np
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
import pathlib
import cv2
# %% 構建模型
def create_model():
model = Sequential([
layers.experimental.preprocessing.Rescaling(1./255, input_shape=(24, 24, 1)),
layers.Conv2D(24,3,activation=
'relu'
),
layers.MaxPooling2D((2,2)),
layers.Conv2D(64,3, activation=
'relu'
),
layers.MaxPooling2D((2,2)),
layers.Flatten(),
layers.Dense(128, activation=
'relu'
),
layers.Dense(15)]
)
model.compile(optimizer=
'adam'
,
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=[
'accuracy'
])
return
model

2.2.2 卷積層 Conv2D
各個職能部門的調查員,蒐集和整理某單位區域內的特定資料。我們輸入的是一個影像,它是由畫素組成的,這就是R e s c a l i n g ( 1. / 255 , i n p u t s h a p e = ( 24 , 24 , 1 ) ) Rescaling(1./255, input_shape=(24, 24, 1))Rescaling(1./255,input shape=(24,24,1))中,input_shape輸入形狀是24*24畫素1個通道(彩色是RGB 3個通道)的影像。




2.2.3 池化層 MaxPooling2D
說白了就是四捨五入。


2.2.4 全連線層 Dense
弱水三千,只取一瓢。

_________________________________________________________________
Layer (
type
) Output Shape Param
#
=================================================================
rescaling_2 (Rescaling) (None, 24, 24, 1) 0
_________________________________________________________________
conv2d_4 (Conv2D) (None, 22, 22, 24) 240
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 11, 11, 24) 0
_________________________________________________________________
conv2d_5 (Conv2D) (None, 9, 9, 64) 13888
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 4, 4, 64) 0
_________________________________________________________________
flatten_2 (Flatten) (None, 1024) 0
_________________________________________________________________
dense_4 (Dense) (None, 128) 131200
_________________________________________________________________
dense_5 (Dense) (None, 15) 1935
=================================================================
Total params: 147,263
Trainable params: 147,263
Non-trainable params: 0
_________________________________________________________________
2.2.5 訓練資料
執行就完了。
# 統計資料夾下的所有圖片數量
data_dir = pathlib.Path(
'dataset'
)
# 從資料夾下讀取圖片,生成資料集
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
data_dir,
# 從哪個檔案獲取資料
color_mode=
"grayscale"
,
# 獲取資料的顏色為灰度
image_size=(24, 24),
# 圖片的大小尺寸
batch_size=32
# 多少個圖片為一個批次
)
# 資料集的分類,對應dataset資料夾下有多少圖片分類
class_names = train_ds.class_names
# 儲存資料集分類
np.save(
"class_name.npy"
, class_names)
# 資料集快取處理
AUTOTUNE = tf.data.experimental.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
# 建立模型
model = create_model()
# 訓練模型,epochs=10,所有資料集訓練10遍
model.fit(train_ds,epochs=10)
# 儲存訓練後的權重
model.save_weights(
'checkpoint/char_checkpoint'
)
Found 3900 files belonging to 15 classes.
Epoch 1/10 122/122 [=========] - 2s 19ms/step - loss: 0.5795 - accuracy: 0.8615
Epoch 2/10 122/122 [=========] - 2s 18ms/step - loss: 0.0100 - accuracy: 0.9992
Epoch 3/10 122/122 [=========] - 2s 19ms/step - loss: 0.0027 - accuracy: 1.0000
Epoch 4/10 122/122 [=========] - 2s 19ms/step - loss: 0.0013 - accuracy: 1.0000
Epoch 5/10 122/122 [=========] - 2s 20ms/step - loss: 8.4216e-04 - accuracy: 1.0000
Epoch 6/10 122/122 [=========] - 2s 18ms/step - loss: 5.5273e-04 - accuracy: 1.0000
Epoch 7/10 122/122 [=========] - 3s 21ms/step - loss: 4.0966e-04 - accuracy: 1.0000
Epoch 8/10 122/122 [=========] - 2s 20ms/step - loss: 3.0308e-04 - accuracy: 1.0000
Epoch 9/10 122/122 [=========] - 3s 23ms/step - loss: 2.3446e-04 - accuracy: 1.0000
Epoch 10/10 122/122 [=========] - 3s 21ms/step - loss: 1.8971e-04 - accuracy: 1.0000
char_checkpoint.data-00000-of-00001
char_checkpoint.index
checkpoint
2.3 預測資料
終於到了享受成果的時候了。
# 設定待識別的圖片
img1=cv2.imread(
'img1.png'
,0)
img2=cv2.imread(
'img2.png'
,0)
imgs = np.array([img1,img2])
# 構建模型
model = create_model()
# 載入前期訓練好的權重
model.load_weights(
'checkpoint/char_checkpoint'
)
# 讀出圖片分類
class_name = np.load(
'class_name.npy'
)
# 預測圖片,獲取預測值
predicts = model.predict(imgs)
results = []
# 儲存結果的陣列
for
predict
in
predicts:
#遍歷每一個預測結果
index = np.argmax(predict)
# 尋找最大值
result = class_name[index]
# 取出字元
results.append(result)
print
(results)


下面這張大圖片,怎麼把它搞一搞,搞成單個小數字的圖片。

2.4 切割影像


2.4.1 投影大法
最有效的方法,往往都是用迴圈實現的。

import numpy as np
import cv2
from PIL import Image, ImageDraw, ImageFont
import PIL
import matplotlib.pyplot as plt
import os
import shutil
from numpy.core.records import array
from numpy.core.shape_base import block
import time
# 整幅圖片的Y軸投影,傳入圖片陣列,圖片經過二值化並反色
def img_y_shadow(img_b):
### 計算投影 ###
(h,w)=img_b.shape
# 初始化一個跟影像高一樣長度的陣列,用於記錄每一行的黑點個數
a=[0
for
z
in
range(0,h)]
# 遍歷每一列,記錄下這一列包含多少有效畫素點
for
i
in
range(0,h):
for
j
in
range(0,w):
if
img_b[i,j]==255:
a[i]+=1
return
a

# 展示圖片
def img_show_array(a):
plt.imshow(a)
plt.show()
# 展示投影圖, 輸入引數arr是圖片的二維陣列,direction是x,y軸
def show_shadow(arr, direction =
'x'
):
a_max = max(arr)
if
direction ==
'x'
:
# x軸方向的投影
a_shadow = np.zeros((a_max, len(arr)), dtype=int)
for
i
in
range(0,len(arr)):
if
arr[i] == 0:
continue
for
j
in
range(0, arr[i]):
a_shadow[j][i] = 255
elif
direction ==
'y'
:
# y軸方向的投影
a_shadow = np.zeros((len(arr),a_max), dtype=int)
for
i
in
range(0,len(arr)):
if
arr[i] == 0:
continue
for
j
in
range(0, arr[i]):
a_shadow[i][j] = 255
img_show_array(a_shadow)
# 讀入圖片
img_path =
'question.jpg'
img=cv2.imread(img_path,0)
thresh = 200
# 二值化並且反色
ret,img_b=cv2.threshold(img,thresh,255,cv2.THRESH_BINARY_INV)

img_y_shadow_a = img_y_shadow(img_b)
show_shadow(img_y_shadow_a,
'y'
)
# 如果要顯示投影

2.4.2 根據投影找區域
最有效的方法,往往還得用迴圈來實現。
# 圖片獲取文字塊,傳入投影列表,返回標記的陣列區域座標[[左,上,右,下]]
def img2rows(a,w,h):
### 根據投影切分圖塊 ###
inLine = False
# 是否已經開始切分
start = 0
# 某次切分的起始索引
mark_boxs = []
for
i
in
range(0,len(a)):
if
inLine == False and a[i] > 10:
inLine = True
start = i
# 記錄這次選中的區域[左,上,右,下],上下就是圖片,左右是start到當前
elif
i-start >5 and a[i] < 10 and inLine:
inLine = False
if
i-start > 10:
top = max(start-1, 0)
bottom = min(h, i+1)
box = [0, top, w, bottom]
mark_boxs.append(box)
return
mark_boxs


(img_h,img_w)=img.shape
row_mark_boxs = img2rows(img_y_shadow_a,img_w,img_h)
print
(row_mark_boxs)
2.4.3 根據區域切圖片
最有效的方法,最終也得用迴圈來實現。這也是計算機體現它強大的地方。
# 裁剪圖片,img 圖片陣列, mark_boxs 區域標記
def cut_img(img, mark_boxs):
img_items = []
# 存放裁剪好的圖片
for
i
in
range(0,len(mark_boxs)):
img_org = img.copy()
box = mark_boxs[i]
# 裁剪圖片
img_item = img_org[box[1]:box[3], box[0]:box[2]]
img_items.append(img_item)
return
img_items
# 儲存圖片
def save_imgs(dir_name, imgs):
if
os.path.exists(dir_name):
shutil.rmtree(dir_name)
if
not os.path.exists(dir_name):
os.makedirs(dir_name)
img_paths = []
for
i
in
range(0,len(imgs)):
file_path = dir_name+
'/part_'
+str(i)+
'.jpg'
cv2.imwrite(file_path,imgs[i])
img_paths.append(file_path)
return
img_paths
# 切圖並儲存
row_imgs = cut_img(img, row_mark_boxs)
imgs = save_imgs(
'rows'
, row_imgs)
# 如果要儲存切圖
print
(imgs)

2.4.4 迴圈可去油膩
還是迴圈。橫著行我們掌握了,那麼針對每一行圖片,我們豎著切成三塊是不是也會了,一個道理。


kernel=np.ones((3,3),np.uint8)
# 膨脹核大小
row_img_b=cv2.dilate(img_b,kernel,iterations=6)
# 影像膨脹6次



def divImg(img_path, save_file = False):
img_o=cv2.imread(img_path,1)
# 讀入圖片
img=cv2.imread(img_path,0)
(img_h,img_w)=img.shape
thresh = 200
# 二值化整個圖,用於分行
ret,img_b=cv2.threshold(img,thresh,255,cv2.THRESH_BINARY_INV)
# 計算投影,並擷取整個圖片的行
img_y_shadow_a = img_y_shadow(img_b)
row_mark_boxs = img2rows(img_y_shadow_a,img_w,img_h)
# 切行的圖片,切的是原圖
row_imgs = cut_img(img, row_mark_boxs)
all_mark_boxs = []
all_char_imgs = []
# ===============從行切塊======================
for
i
in
range(0,len(row_imgs)):
row_img = row_imgs[i]
(row_img_h,row_img_w)=row_img.shape
# 二值化一行的圖,用於切塊
ret,row_img_b=cv2.threshold(row_img,thresh,255,cv2.THRESH_BINARY_INV)
kernel=np.ones((3,3),np.uint8)
#影像膨脹6次
row_img_b_d=cv2.dilate(row_img_b,kernel,iterations=6)
img_x_shadow_a = img_x_shadow(row_img_b_d)
block_mark_boxs = row2blocks(img_x_shadow_a, row_img_w, row_img_h)
row_char_boxs = []
row_char_imgs = []
# 切塊的圖,切的是原圖
block_imgs = cut_img(row_img, block_mark_boxs)
if
save_file:
b_imgs = save_imgs(
'cuts/row_'
+str(i), block_imgs)
# 如果要儲存切圖
print
(b_imgs)
# =============從塊切字====================
for
j
in
range(0,len(block_imgs)):
block_img = block_imgs[j]
(block_img_h,block_img_w)=block_img.shape
# 二值化塊,因為要切字元圖片了
ret,block_img_b=cv2.threshold(block_img,thresh,255,cv2.THRESH_BINARY_INV)
block_img_x_shadow_a = img_x_shadow(block_img_b)
row_top = row_mark_boxs[i][1]
block_left = block_mark_boxs[j][0]
char_mark_boxs,abs_char_mark_boxs = block2chars(block_img_x_shadow_a, block_img_w, block_img_h,row_top,block_left)
row_char_boxs.append(abs_char_mark_boxs)
# 切的是二值化的圖
char_imgs = cut_img(block_img_b, char_mark_boxs, True)
row_char_imgs.append(char_imgs)
if
save_file:
c_imgs = save_imgs(
'cuts/row_'
+str(i)+
'/blocks_'
+str(j), char_imgs)
# 如果要儲存切圖
print
(c_imgs)
all_mark_boxs.append(row_char_boxs)
all_char_imgs.append(row_char_imgs)
return
all_mark_boxs,all_char_imgs,img_o

2.5 識別
迴圈,迴圈,還是TM迴圈!
all_mark_boxs,all_char_imgs,img_o = divImg(path,save)
model = cnn.create_model()
model.load_weights(
'checkpoint/char_checkpoint'
)
class_name = np.load(
'class_name.npy'
)
# 遍歷行
for
i
in
range(0,len(all_char_imgs)):
row_imgs = all_char_imgs[i]
# 遍歷塊
for
j
in
range(0,len(row_imgs)):
block_imgs = row_imgs[j]
block_imgs = np.array(block_imgs)
results = cnn.predict(model, block_imgs, class_name)
print
(
'recognize result:'
,results)

recognize result: [
'1'
,
'0'
,
'12'
,
'2'
,
'10'
]
recognize result: [
'8'
,
'12'
,
'6'
,
'10'
]
recognize result: [
'1'
,
'0'
,
'12'
,
'7'
,
'10'
]
recognize result: [
'1'
,
'0'
,
'-'
,
'2'
,
'='
]
recognize result: [
'8'
,
'-'
,
'6'
,
'='
]
recognize result: [
'1'
,
'0'
,
'-'
,
'7'
,
'='
]

2.6 計算並反饋
# 計算數值並返回結果 引數chars:['8', '-', '6', '=']
def calculation(chars):
cstr =
''
.join(chars)
result =
''
if
(
"="in
cstr):
# 有等號
str_arr = cstr.split(
'='
)
c_str = str_arr[0]
r_str = str_arr[1]
c_str = c_str.replace(
"×"
,
"*"
)
c_str = c_str.replace(
"÷"
,
"/"
)
try:
c_r = int(
eval
(c_str))
except Exception as e:
print
(
"Exception"
,e)
if
r_str ==
""
:
result = c_r
else
:
if
str(c_r) == str(r_str):
result =
"√"
else
:
result =
"×"
return
result
recognize result: [
'8'
,
'×'
,
'4'
,
'='
]
calculate result: 32
recognize result: [
'2'
,
'-'
,
'1'
,
'='
,
'1'
]
calculate result: √
recognize result: [
'1'
,
'0'
,
'-'
,
'5'
,
'='
]
calculate result: 5
2.6.2 反饋
# 繪製文字
def cv2ImgAddText(img, text, left, top, textColor=(255, 0, 0), textSize=20):
if
(isinstance(img, np.ndarray)):
# 判斷是否OpenCV圖片型別
img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
# 建立一個可以在給定影像上繪圖的物件
draw = ImageDraw.Draw(img)
# 字型的格式
fontStyle = ImageFont.truetype(
"fonts/fangzheng_shusong.ttf"
, textSize, encoding=
"utf-8"
)
# 繪製文字
draw.text((left, top), text, textColor, font=fontStyle)
# 轉換回OpenCV格式
return
cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR)
# 獲取切圖標註,切圖圖片,原圖圖圖片
all_mark_boxs,all_char_imgs,img_o = divImg(path,save)
# 恢復模型,用於圖片識別
model = cnn.create_model()
model.load_weights(
'checkpoint/char_checkpoint'
)
class_name = np.load(
'class_name.npy'
)
# 遍歷行
for
i
in
range(0,len(all_char_imgs)):
row_imgs = all_char_imgs[i]
# 遍歷塊
for
j
in
range(0,len(row_imgs)):
block_imgs = row_imgs[j]
block_imgs = np.array(block_imgs)
# 圖片識別
results = cnn.predict(model, block_imgs, class_name)
print
(
'recognize result:'
,results)
# 計算結果
result = calculation(results)
print
(
'calculate result:'
,result)
# 獲取塊的標註座標
block_mark = all_mark_boxs[i][j]
# 獲取結果的座標,寫在塊的最後一個字
answer_box = block_mark[-1]
# 計算最後一個字的位置
x = answer_box[2]
y = answer_box[3]
iw = answer_box[2] - answer_box[0]
ih = answer_box[3] - answer_box[1]
# 計算字型大小
textSize = max(iw,ih)
# 根據結果設定字型顏色
if
str(result) ==
"√"
:
color = (0, 255, 0)
elif
str(result) ==
"×"
:
color = (255, 0, 0)
else
:
color = (192, 192,192)
# 將結果寫到原圖上
img_o = cv2ImgAddText(img_o, str(result), answer_box[2], answer_box[1],color, textSize)
# 將寫滿結果的原圖儲存
cv2.imwrite(
'result.jpg'
, img_o)
