mirror of
https://github.com/MarcZierle/photo-log-backend.git
synced 2025-01-07 14:57:58 +00:00
426 lines
13 KiB
Python
426 lines
13 KiB
Python
|
import cv2
|
||
|
import imutils
|
||
|
import random
|
||
|
import numpy as np
|
||
|
import math
|
||
|
from PIL import Image, ImageDraw
|
||
|
import itertools
|
||
|
import pytesseract
|
||
|
|
||
|
|
||
|
def preprocess_image(image, blur_params=[20,20,25], canny_params=[25,250]):
|
||
|
"""
|
||
|
Turns the image into grayscale, applies a bilateral filter
|
||
|
to smooth out unimportant parts of the image, and returns
|
||
|
the Canny filtered result.
|
||
|
image: cv2.imread image
|
||
|
"""
|
||
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||
|
gray = cv2.bilateralFilter(gray, blur_params[0], blur_params[1], blur_params[2],)
|
||
|
edged = cv2.Canny(gray, canny_params[0], canny_params[1])
|
||
|
return edged
|
||
|
|
||
|
|
||
|
def hough_transform(edged):
|
||
|
"""
|
||
|
Returns and image representing the hough space of the edge image.
|
||
|
edged: Canny filtered cv2 image
|
||
|
"""
|
||
|
max_d = np.sqrt(edged.shape[0]**2 + edged.shape[1]**2)
|
||
|
hough_space = [[0] * (2 * math.ceil(max_d)) for i in range(157*2)]
|
||
|
for y in range(edged.shape[0]):
|
||
|
for x in range(edged.shape[1]):
|
||
|
pixel = edged[y][x]
|
||
|
if not pixel > 0:
|
||
|
continue
|
||
|
for alpha in range(157*2):
|
||
|
d = x * math.cos(alpha/2.0/100.0) + y * math.sin(alpha/2.0/100.0)
|
||
|
hough_space[alpha][round(d)+math.ceil(max_d)] += 1
|
||
|
return hough_space
|
||
|
|
||
|
|
||
|
#TODO fix removal on "the other site" of the hough space => negative indexes in try / except
|
||
|
def get_max_params(hough_space, num_params, rm_radius=7):
|
||
|
"""
|
||
|
Iterates over the maxima of hough space image.
|
||
|
After each maximum found a circle of the radius rm_radius is beeing "cut out"
|
||
|
of the hough space image.
|
||
|
Returns an array of tuples containing the maximum parameters (alpha, d).
|
||
|
hough_space: hough space image
|
||
|
num_params: number of the maxima to be found
|
||
|
rm_radius: optional = 7; the radius to be cut out around the maximum found
|
||
|
"""
|
||
|
all_max_params = []
|
||
|
for i in range(num_params):
|
||
|
hough_array = np.array(hough_space)
|
||
|
max_params = np.unravel_index(hough_array.argmax(), hough_array.shape)
|
||
|
|
||
|
if -math.inf in max_params:
|
||
|
break
|
||
|
|
||
|
alpha = max_params[0]/2.0/100.0
|
||
|
d = max_params[1] - hough_array.shape[1]/2.0
|
||
|
all_max_params.append((alpha, d))
|
||
|
|
||
|
for yi in range(rm_radius*2+1):
|
||
|
for xi in range(rm_radius*2+1):
|
||
|
if math.sqrt((-rm_radius+yi)**2 + (-rm_radius+xi)**2) <= rm_radius:
|
||
|
try:
|
||
|
hough_space[abs(max_params[0]-rm_radius+yi)][abs(max_params[1]-rm_radius+xi)] = -math.inf
|
||
|
except Exception:
|
||
|
pass
|
||
|
|
||
|
return all_max_params, hough_space
|
||
|
|
||
|
|
||
|
def _draw_hough_space(hough_space):
|
||
|
max_val = np.amax(hough_space)
|
||
|
img = Image.new(mode="RGB", size=(len(hough_space[0]), len(hough_space)))
|
||
|
pixels = img.load()
|
||
|
for y in range(len(hough_space)):
|
||
|
for x in range(len(hough_space[0])):
|
||
|
if hough_space[y][x] == -math.inf:
|
||
|
hough_space[y][x] = 0
|
||
|
val = int(hough_space[y][x] / float(max_val) * 255)
|
||
|
pixels[x,y] = (val,val,val)
|
||
|
return img
|
||
|
|
||
|
|
||
|
def _draw_lines(image, all_max_params, resize_height):
|
||
|
img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||
|
img = Image.fromarray(img)
|
||
|
new_height = resize_height
|
||
|
new_width = int(new_height * img.size[0] / img.size[1])
|
||
|
img = img.resize((new_width, new_height), Image.ANTIALIAS)
|
||
|
pixels = img.load()
|
||
|
|
||
|
for (alpha,d) in all_max_params:
|
||
|
dist = 0.5 # line thickness (distance image pixels to line coordinates)
|
||
|
for y in range(img.size[1]):
|
||
|
for x in range(img.size[0]):
|
||
|
val = (x * math.cos(alpha) + y * math.sin(alpha)) - d
|
||
|
if val <= dist and val >= -dist:
|
||
|
pixels[x,y] = (0,200,255)
|
||
|
|
||
|
return img
|
||
|
|
||
|
|
||
|
def get_intersect(a1, a2, b1, b2):
|
||
|
"""
|
||
|
Returns the point of intersection of the lines passing through a2,a1 and b2,b1.
|
||
|
a1: [x, y] a point on the first line
|
||
|
a2: [x, y] another point on the first line
|
||
|
b1: [x, y] a point on the second line
|
||
|
b2: [x, y] another point on the second line
|
||
|
"""
|
||
|
s = np.vstack([a1,a2,b1,b2]) # s for stacked
|
||
|
h = np.hstack((s, np.ones((4, 1)))) # h for homogeneous
|
||
|
l1 = np.cross(h[0], h[1]) # get first line
|
||
|
l2 = np.cross(h[2], h[3]) # get second line
|
||
|
x, y, z = np.cross(l1, l2) # point of intersection
|
||
|
if z == 0: # lines are parallel
|
||
|
return (float('inf'), float('inf'))
|
||
|
return (x/z, y/z)
|
||
|
|
||
|
|
||
|
def insert_intersection(intersections, params1, params2, inters_pt):
|
||
|
if not params1 in intersections:
|
||
|
intersections[params1] = []
|
||
|
intersections[params1].append((params2, inters_pt))
|
||
|
|
||
|
if not params2 in intersections:
|
||
|
intersections[params2] = []
|
||
|
intersections[params2].append((params1, inters_pt))
|
||
|
|
||
|
|
||
|
# TODO fix wrong random intersection points
|
||
|
def find_intersections(all_max_params, img_size, allowed_angle_diff=3):
|
||
|
"""
|
||
|
Takes hough space parameters of found lines and returns a list of tuples of
|
||
|
intersection points.
|
||
|
all_max_params: (alpha, d) tuple list of hough space parameters
|
||
|
img_size: the size of the in which the intersections are to be found in
|
||
|
"""
|
||
|
intersections_by_lines = {}
|
||
|
all_intersections = []
|
||
|
|
||
|
other_max_params = all_max_params.copy()
|
||
|
for (alpha1,d1) in all_max_params:
|
||
|
for (alpha2,d2) in other_max_params:
|
||
|
if alpha1 == alpha2 and d1 == d2:
|
||
|
continue
|
||
|
|
||
|
y = random.randint(0, img_size[0])
|
||
|
x = (d1 - (y * math.sin(alpha1))) / math.cos(alpha1)
|
||
|
a1 = [x,y]
|
||
|
|
||
|
y = random.randint(0, img_size[0])
|
||
|
x = (d1 - (y * math.sin(alpha1))) / math.cos(alpha1)
|
||
|
a2 = [x,y]
|
||
|
|
||
|
y = random.randint(0, img_size[0])
|
||
|
x = (d2 - (y * math.sin(alpha2))) / math.cos(alpha2)
|
||
|
b1 = [x,y]
|
||
|
|
||
|
y = random.randint(0, img_size[0])
|
||
|
x = (d2 - (y * math.sin(alpha2))) / math.cos(alpha2)
|
||
|
b2 = [x,y]
|
||
|
|
||
|
# get intersection point of two lines, where each line
|
||
|
# is given by two random points of each line
|
||
|
# a1, a1: two [x,y] points on line a
|
||
|
# b1, b1: two [x,y] points on line b
|
||
|
inters = get_intersect(a1,a2, b1,b2)
|
||
|
|
||
|
# are lines parallel or is intersection outside of the image?
|
||
|
if math.inf in inters:
|
||
|
continue
|
||
|
if inters[0] < 0 or inters[0] >= img_size[1]:
|
||
|
continue
|
||
|
if inters[1] < 0 or inters[1] >= img_size[0]:
|
||
|
continue
|
||
|
|
||
|
# calculate vectors of each line 1 and 2
|
||
|
p1 = [a1[0]-a2[0], a1[1]-a2[1]]
|
||
|
p2 = [b1[0]-b2[0], b1[1]-b2[1]]
|
||
|
|
||
|
inters = (round(inters[0]),round(inters[1]))
|
||
|
|
||
|
try:
|
||
|
dot_product = p1[0]*p2[0] + p1[1]*p2[1]
|
||
|
abs_p1 = math.sqrt(p1[0]**2 + p1[1]**2)
|
||
|
abs_p2 = math.sqrt(p2[0]**2 + p2[1]**2)
|
||
|
|
||
|
angle = math.degrees(math.acos(dot_product / (abs_p1 * abs_p2)))
|
||
|
angle_diff = abs(abs(angle) - 90)
|
||
|
except ValueError as e:
|
||
|
print(e)
|
||
|
continue
|
||
|
|
||
|
if not(angle_diff > allowed_angle_diff):
|
||
|
all_intersections.append(inters)
|
||
|
params1 = (alpha1, d1)
|
||
|
params2 = (alpha2, d2)
|
||
|
insert_intersection(intersections_by_lines, params1, params2, inters)
|
||
|
|
||
|
other_max_params.remove((alpha1, d1))
|
||
|
|
||
|
return intersections_by_lines, all_intersections
|
||
|
|
||
|
|
||
|
def _draw_intersections(image, intersections):
|
||
|
image = image.copy()
|
||
|
for inters in intersections:
|
||
|
color = (0,255,0)
|
||
|
pt_radius = 1
|
||
|
draw = ImageDraw.Draw(image)
|
||
|
draw.ellipse((inters[0]-pt_radius,inters[1]-pt_radius,inters[0]+pt_radius,inters[1]+pt_radius), fill=color, outline=color)
|
||
|
return image
|
||
|
|
||
|
|
||
|
def get_n_connections(start, intersections_by_lines, n):
|
||
|
if n <= 0:
|
||
|
return [[start]]
|
||
|
|
||
|
connections = []
|
||
|
for line in intersections_by_lines[start]:
|
||
|
neighbors_connections = get_n_connections(line[0], intersections_by_lines, n-1)
|
||
|
for con in neighbors_connections:
|
||
|
if not start in con:
|
||
|
if len(con) == 1:
|
||
|
connections.append([start, con[0]])
|
||
|
else:
|
||
|
connections.append([start] + con)
|
||
|
|
||
|
return connections
|
||
|
|
||
|
|
||
|
def get_cycles(intersections_by_lines, n=4):
|
||
|
start = list(intersections_by_lines.keys())[0]
|
||
|
connections = get_n_connections(start, intersections_by_lines, n=n-1)
|
||
|
|
||
|
cycles = []
|
||
|
for connection in connections:
|
||
|
last_vertex = connection[-1]
|
||
|
# can we get back to the beginning?
|
||
|
if start in [con[0] for con in intersections_by_lines[last_vertex]]:
|
||
|
cycles.append(connection + [start])
|
||
|
|
||
|
return cycles
|
||
|
|
||
|
|
||
|
def get_intersection_points_from_lines(rects_lines, intersections_by_lines):
|
||
|
rects_points = []
|
||
|
for rect in rects_lines:
|
||
|
points = []
|
||
|
for i in range(len(rect)-1):
|
||
|
line1 = rect[i]
|
||
|
line2 = rect[i+1]
|
||
|
points.append(list(filter(lambda con: con[0]==line2, intersections_by_lines[line1]))[0][1])
|
||
|
points.append(points[0])
|
||
|
rects_points.append(points)
|
||
|
return rects_points
|
||
|
|
||
|
|
||
|
def remove_small_rects(rects_points, image_shape, min_image_coverage=0.3):
|
||
|
image_area = image_shape[0] * image_shape[1]
|
||
|
possible_rects = []
|
||
|
for rect in rects_points:
|
||
|
pt1 = rect[0]
|
||
|
pt2 = rect[1]
|
||
|
pt3 = rect[2]
|
||
|
len_side1 = math.sqrt((pt1[0]-pt2[0])**2 + (pt1[1]-pt2[1])**2)
|
||
|
len_side2 = math.sqrt((pt2[0]-pt3[0])**2 + (pt2[1]-pt3[1])**2)
|
||
|
|
||
|
area = len_side1 * len_side2
|
||
|
|
||
|
if area >= min_image_coverage * image_area:
|
||
|
possible_rects.append(rect)
|
||
|
return possible_rects
|
||
|
|
||
|
|
||
|
def _draw_rects(rects_points, image):
|
||
|
image_lines = image.copy()
|
||
|
draw = ImageDraw.Draw(image_lines)
|
||
|
for rect in rects_points:
|
||
|
rect = [(pt[0],pt[1]) for pt in rect]
|
||
|
draw.line(rect, width=2, fill='yellow')
|
||
|
return image_lines
|
||
|
|
||
|
|
||
|
def crop_warp_image(image, rect):
|
||
|
#rect = np.asarray(rect[:4])
|
||
|
|
||
|
rect = np.array(rect,dtype = "float32").reshape(4,2)
|
||
|
|
||
|
ordered_points = np.zeros((4, 2), dtype = "float32")
|
||
|
s = rect.sum(axis = 1)
|
||
|
ordered_points[0] = rect[np.argmin(s)]
|
||
|
ordered_points[2] = rect[np.argmax(s)]
|
||
|
|
||
|
diff = np.diff(rect, axis = 1)
|
||
|
ordered_points[1] = rect[np.argmin(diff)]
|
||
|
ordered_points[3] = rect[np.argmax(diff)]
|
||
|
|
||
|
(tl, tr, br, bl) = ordered_points
|
||
|
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
|
||
|
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
|
||
|
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
|
||
|
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
|
||
|
|
||
|
maxWidth = max(int(widthA), int(widthB))
|
||
|
maxHeight = max(int(heightA), int(heightB))
|
||
|
|
||
|
dst_rect = np.array([
|
||
|
[0, 0],
|
||
|
[maxWidth - 1, 0],
|
||
|
[maxWidth - 1, maxHeight - 1],
|
||
|
[0, maxHeight - 1]], dtype = "float32")
|
||
|
|
||
|
M = cv2.getPerspectiveTransform(ordered_points, dst_rect)
|
||
|
warped_img = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
|
||
|
|
||
|
return warped_img
|
||
|
|
||
|
|
||
|
def get_rect_bounding_box(rect):
|
||
|
min_x = min([pt[0] for pt in rect])
|
||
|
max_x = max([pt[0] for pt in rect])
|
||
|
min_y = min([pt[1] for pt in rect])
|
||
|
max_y = max([pt[1] for pt in rect])
|
||
|
|
||
|
bbox = [[min_x, min_y], [max_x, min_y], [max_x, max_y], [min_x, max_y]]
|
||
|
|
||
|
return bbox
|
||
|
|
||
|
|
||
|
def _cv2ImageToPIL(cv2_image):
|
||
|
#cv2_image = cv2.cvtColor(cv2_image, cv2.COLOR_BGR2RGB)
|
||
|
pil_image = Image.fromarray(cv2_image)
|
||
|
return pil_image
|
||
|
|
||
|
|
||
|
def _PILImageToCv2(pil_image):
|
||
|
cv2_image = np.asarray(pil_image)
|
||
|
return cv2_image
|
||
|
|
||
|
|
||
|
def autocrop(original_image, DEBUG=False):
|
||
|
"""
|
||
|
Automatically crops an image to size of the flip chart in the given image,
|
||
|
and returns a cropped PIL image.
|
||
|
image: the PIL image to crop
|
||
|
"""
|
||
|
|
||
|
resize_height = 300
|
||
|
blur_params = [35,15,40] # size, color, space
|
||
|
canny_params = [80,250]
|
||
|
num_hough_params = 50
|
||
|
hough_rm_radius = 20
|
||
|
line_angle_diff = 3
|
||
|
min_image_coverage = 0.3
|
||
|
|
||
|
original_image = _PILImageToCv2(original_image)
|
||
|
|
||
|
# shrink image to a smaller size to speed up autocropping
|
||
|
ratio = original_image.shape[0] / float(resize_height)
|
||
|
image_small = imutils.resize(original_image, height=resize_height)
|
||
|
|
||
|
edge_image = preprocess_image(image_small, blur_params=blur_params, canny_params=canny_params)
|
||
|
|
||
|
if DEBUG:
|
||
|
cv2.imwrite('debug_out/edges.png', edge_image)
|
||
|
|
||
|
hough_space = hough_transform(edge_image)
|
||
|
all_max_params, hough_space = get_max_params(hough_space, num_params=num_hough_params, rm_radius=hough_rm_radius)
|
||
|
|
||
|
if DEBUG:
|
||
|
hough_space_image = _draw_hough_space(hough_space)
|
||
|
hough_space_image.save("debug_out/hough.png")
|
||
|
image_lines = _draw_lines(original_image, all_max_params, resize_height)
|
||
|
|
||
|
intersections_by_lines, all_intersections = find_intersections(all_max_params, image_small.shape, allowed_angle_diff=line_angle_diff)
|
||
|
|
||
|
if DEBUG:
|
||
|
image_lines = _draw_intersections(image_lines, all_intersections)
|
||
|
|
||
|
rects_lines = get_cycles(intersections_by_lines)
|
||
|
rects_points = get_intersection_points_from_lines(rects_lines, intersections_by_lines)
|
||
|
rects_points = remove_small_rects(rects_points, image_small.shape, min_image_coverage=min_image_coverage)
|
||
|
|
||
|
if DEBUG:
|
||
|
image_lines = _draw_rects(rects_points, image_lines)
|
||
|
image_lines.save("debug_out/line.png")
|
||
|
|
||
|
# TODO: find best rectangle
|
||
|
try:
|
||
|
best_rect = rects_points[0]
|
||
|
best_rect = [(round(pt[0]*ratio), round(pt[1]*ratio)) for pt in best_rect]
|
||
|
best_rect = best_rect[:4]
|
||
|
|
||
|
cropped_image = crop_warp_image(original_image, best_rect)
|
||
|
|
||
|
cropped_image = _cv2ImageToPIL(cropped_image)
|
||
|
|
||
|
bbox = get_rect_bounding_box(best_rect)
|
||
|
except Exception as e:
|
||
|
cropped_image = _cv2ImageToPIL(original_image)
|
||
|
best_rect = None
|
||
|
bbox = None
|
||
|
|
||
|
intersections_as_list = [[round(i[0]*ratio), round(i[1]*ratio)] for i in all_intersections]
|
||
|
|
||
|
return cropped_image, best_rect, bbox, intersections_as_list
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
#img = Image.open('example_imgs/bill.png')
|
||
|
img = Image.open('example_imgs/image4.jpg')
|
||
|
|
||
|
cropped_image, rect, bbox, intersections = autocrop(img, DEBUG=True)
|
||
|
|
||
|
cropped_image.save('debug_out/cropped.png')
|
||
|
print(rect)
|
||
|
print(bbox)
|
||
|
print(intersections)
|