backend/photo_log/autocrop/autocrop.py

426 lines
13 KiB
Python
Executable File

import cv2
import imutils
import random
import numpy as np
import math
from PIL import Image, ImageDraw
import itertools
import pytesseract
def preprocess_image(image, blur_params=[20,20,25], canny_params=[25,250]):
"""
Turns the image into grayscale, applies a bilateral filter
to smooth out unimportant parts of the image, and returns
the Canny filtered result.
image: cv2.imread image
"""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, blur_params[0], blur_params[1], blur_params[2],)
edged = cv2.Canny(gray, canny_params[0], canny_params[1])
return edged
def hough_transform(edged):
"""
Returns and image representing the hough space of the edge image.
edged: Canny filtered cv2 image
"""
max_d = np.sqrt(edged.shape[0]**2 + edged.shape[1]**2)
hough_space = [[0] * (2 * math.ceil(max_d)) for i in range(157*2)]
for y in range(edged.shape[0]):
for x in range(edged.shape[1]):
pixel = edged[y][x]
if not pixel > 0:
continue
for alpha in range(157*2):
d = x * math.cos(alpha/2.0/100.0) + y * math.sin(alpha/2.0/100.0)
hough_space[alpha][round(d)+math.ceil(max_d)] += 1
return hough_space
#TODO fix removal on "the other site" of the hough space => negative indexes in try / except
def get_max_params(hough_space, num_params, rm_radius=7):
"""
Iterates over the maxima of hough space image.
After each maximum found a circle of the radius rm_radius is beeing "cut out"
of the hough space image.
Returns an array of tuples containing the maximum parameters (alpha, d).
hough_space: hough space image
num_params: number of the maxima to be found
rm_radius: optional = 7; the radius to be cut out around the maximum found
"""
all_max_params = []
for i in range(num_params):
hough_array = np.array(hough_space)
max_params = np.unravel_index(hough_array.argmax(), hough_array.shape)
if -math.inf in max_params:
break
alpha = max_params[0]/2.0/100.0
d = max_params[1] - hough_array.shape[1]/2.0
all_max_params.append((alpha, d))
for yi in range(rm_radius*2+1):
for xi in range(rm_radius*2+1):
if math.sqrt((-rm_radius+yi)**2 + (-rm_radius+xi)**2) <= rm_radius:
try:
hough_space[abs(max_params[0]-rm_radius+yi)][abs(max_params[1]-rm_radius+xi)] = -math.inf
except Exception:
pass
return all_max_params, hough_space
def _draw_hough_space(hough_space):
max_val = np.amax(hough_space)
img = Image.new(mode="RGB", size=(len(hough_space[0]), len(hough_space)))
pixels = img.load()
for y in range(len(hough_space)):
for x in range(len(hough_space[0])):
if hough_space[y][x] == -math.inf:
hough_space[y][x] = 0
val = int(hough_space[y][x] / float(max_val) * 255)
pixels[x,y] = (val,val,val)
return img
def _draw_lines(image, all_max_params, resize_height):
img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
img = Image.fromarray(img)
new_height = resize_height
new_width = int(new_height * img.size[0] / img.size[1])
img = img.resize((new_width, new_height), Image.ANTIALIAS)
pixels = img.load()
for (alpha,d) in all_max_params:
dist = 0.5 # line thickness (distance image pixels to line coordinates)
for y in range(img.size[1]):
for x in range(img.size[0]):
val = (x * math.cos(alpha) + y * math.sin(alpha)) - d
if val <= dist and val >= -dist:
pixels[x,y] = (0,200,255)
return img
def get_intersect(a1, a2, b1, b2):
"""
Returns the point of intersection of the lines passing through a2,a1 and b2,b1.
a1: [x, y] a point on the first line
a2: [x, y] another point on the first line
b1: [x, y] a point on the second line
b2: [x, y] another point on the second line
"""
s = np.vstack([a1,a2,b1,b2]) # s for stacked
h = np.hstack((s, np.ones((4, 1)))) # h for homogeneous
l1 = np.cross(h[0], h[1]) # get first line
l2 = np.cross(h[2], h[3]) # get second line
x, y, z = np.cross(l1, l2) # point of intersection
if z == 0: # lines are parallel
return (float('inf'), float('inf'))
return (x/z, y/z)
def insert_intersection(intersections, params1, params2, inters_pt):
if not params1 in intersections:
intersections[params1] = []
intersections[params1].append((params2, inters_pt))
if not params2 in intersections:
intersections[params2] = []
intersections[params2].append((params1, inters_pt))
# TODO fix wrong random intersection points
def find_intersections(all_max_params, img_size, allowed_angle_diff=3):
"""
Takes hough space parameters of found lines and returns a list of tuples of
intersection points.
all_max_params: (alpha, d) tuple list of hough space parameters
img_size: the size of the in which the intersections are to be found in
"""
intersections_by_lines = {}
all_intersections = []
other_max_params = all_max_params.copy()
for (alpha1,d1) in all_max_params:
for (alpha2,d2) in other_max_params:
if alpha1 == alpha2 and d1 == d2:
continue
y = random.randint(0, img_size[0])
x = (d1 - (y * math.sin(alpha1))) / math.cos(alpha1)
a1 = [x,y]
y = random.randint(0, img_size[0])
x = (d1 - (y * math.sin(alpha1))) / math.cos(alpha1)
a2 = [x,y]
y = random.randint(0, img_size[0])
x = (d2 - (y * math.sin(alpha2))) / math.cos(alpha2)
b1 = [x,y]
y = random.randint(0, img_size[0])
x = (d2 - (y * math.sin(alpha2))) / math.cos(alpha2)
b2 = [x,y]
# get intersection point of two lines, where each line
# is given by two random points of each line
# a1, a1: two [x,y] points on line a
# b1, b1: two [x,y] points on line b
inters = get_intersect(a1,a2, b1,b2)
# are lines parallel or is intersection outside of the image?
if math.inf in inters:
continue
if inters[0] < 0 or inters[0] >= img_size[1]:
continue
if inters[1] < 0 or inters[1] >= img_size[0]:
continue
# calculate vectors of each line 1 and 2
p1 = [a1[0]-a2[0], a1[1]-a2[1]]
p2 = [b1[0]-b2[0], b1[1]-b2[1]]
inters = (round(inters[0]),round(inters[1]))
try:
dot_product = p1[0]*p2[0] + p1[1]*p2[1]
abs_p1 = math.sqrt(p1[0]**2 + p1[1]**2)
abs_p2 = math.sqrt(p2[0]**2 + p2[1]**2)
angle = math.degrees(math.acos(dot_product / (abs_p1 * abs_p2)))
angle_diff = abs(abs(angle) - 90)
except ValueError as e:
print(e)
continue
if not(angle_diff > allowed_angle_diff):
all_intersections.append(inters)
params1 = (alpha1, d1)
params2 = (alpha2, d2)
insert_intersection(intersections_by_lines, params1, params2, inters)
other_max_params.remove((alpha1, d1))
return intersections_by_lines, all_intersections
def _draw_intersections(image, intersections):
image = image.copy()
for inters in intersections:
color = (0,255,0)
pt_radius = 1
draw = ImageDraw.Draw(image)
draw.ellipse((inters[0]-pt_radius,inters[1]-pt_radius,inters[0]+pt_radius,inters[1]+pt_radius), fill=color, outline=color)
return image
def get_n_connections(start, intersections_by_lines, n):
if n <= 0:
return [[start]]
connections = []
for line in intersections_by_lines[start]:
neighbors_connections = get_n_connections(line[0], intersections_by_lines, n-1)
for con in neighbors_connections:
if not start in con:
if len(con) == 1:
connections.append([start, con[0]])
else:
connections.append([start] + con)
return connections
def get_cycles(intersections_by_lines, n=4):
start = list(intersections_by_lines.keys())[0]
connections = get_n_connections(start, intersections_by_lines, n=n-1)
cycles = []
for connection in connections:
last_vertex = connection[-1]
# can we get back to the beginning?
if start in [con[0] for con in intersections_by_lines[last_vertex]]:
cycles.append(connection + [start])
return cycles
def get_intersection_points_from_lines(rects_lines, intersections_by_lines):
rects_points = []
for rect in rects_lines:
points = []
for i in range(len(rect)-1):
line1 = rect[i]
line2 = rect[i+1]
points.append(list(filter(lambda con: con[0]==line2, intersections_by_lines[line1]))[0][1])
points.append(points[0])
rects_points.append(points)
return rects_points
def remove_small_rects(rects_points, image_shape, min_image_coverage=0.3):
image_area = image_shape[0] * image_shape[1]
possible_rects = []
for rect in rects_points:
pt1 = rect[0]
pt2 = rect[1]
pt3 = rect[2]
len_side1 = math.sqrt((pt1[0]-pt2[0])**2 + (pt1[1]-pt2[1])**2)
len_side2 = math.sqrt((pt2[0]-pt3[0])**2 + (pt2[1]-pt3[1])**2)
area = len_side1 * len_side2
if area >= min_image_coverage * image_area:
possible_rects.append(rect)
return possible_rects
def _draw_rects(rects_points, image):
image_lines = image.copy()
draw = ImageDraw.Draw(image_lines)
for rect in rects_points:
rect = [(pt[0],pt[1]) for pt in rect]
draw.line(rect, width=2, fill='yellow')
return image_lines
def crop_warp_image(image, rect):
#rect = np.asarray(rect[:4])
rect = np.array(rect,dtype = "float32").reshape(4,2)
ordered_points = np.zeros((4, 2), dtype = "float32")
s = rect.sum(axis = 1)
ordered_points[0] = rect[np.argmin(s)]
ordered_points[2] = rect[np.argmax(s)]
diff = np.diff(rect, axis = 1)
ordered_points[1] = rect[np.argmin(diff)]
ordered_points[3] = rect[np.argmax(diff)]
(tl, tr, br, bl) = ordered_points
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
maxHeight = max(int(heightA), int(heightB))
dst_rect = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
M = cv2.getPerspectiveTransform(ordered_points, dst_rect)
warped_img = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
return warped_img
def get_rect_bounding_box(rect):
min_x = min([pt[0] for pt in rect])
max_x = max([pt[0] for pt in rect])
min_y = min([pt[1] for pt in rect])
max_y = max([pt[1] for pt in rect])
bbox = [[min_x, min_y], [max_x, min_y], [max_x, max_y], [min_x, max_y]]
return bbox
def _cv2ImageToPIL(cv2_image):
#cv2_image = cv2.cvtColor(cv2_image, cv2.COLOR_BGR2RGB)
pil_image = Image.fromarray(cv2_image)
return pil_image
def _PILImageToCv2(pil_image):
cv2_image = np.asarray(pil_image)
return cv2_image
def autocrop(original_image, DEBUG=False):
"""
Automatically crops an image to size of the flip chart in the given image,
and returns a cropped PIL image.
image: the PIL image to crop
"""
resize_height = 300
blur_params = [35,15,40] # size, color, space
canny_params = [80,250]
num_hough_params = 50
hough_rm_radius = 20
line_angle_diff = 3
min_image_coverage = 0.3
original_image = _PILImageToCv2(original_image)
# shrink image to a smaller size to speed up autocropping
ratio = original_image.shape[0] / float(resize_height)
image_small = imutils.resize(original_image, height=resize_height)
edge_image = preprocess_image(image_small, blur_params=blur_params, canny_params=canny_params)
if DEBUG:
cv2.imwrite('debug_out/edges.png', edge_image)
hough_space = hough_transform(edge_image)
all_max_params, hough_space = get_max_params(hough_space, num_params=num_hough_params, rm_radius=hough_rm_radius)
if DEBUG:
hough_space_image = _draw_hough_space(hough_space)
hough_space_image.save("debug_out/hough.png")
image_lines = _draw_lines(original_image, all_max_params, resize_height)
intersections_by_lines, all_intersections = find_intersections(all_max_params, image_small.shape, allowed_angle_diff=line_angle_diff)
if DEBUG:
image_lines = _draw_intersections(image_lines, all_intersections)
rects_lines = get_cycles(intersections_by_lines)
rects_points = get_intersection_points_from_lines(rects_lines, intersections_by_lines)
rects_points = remove_small_rects(rects_points, image_small.shape, min_image_coverage=min_image_coverage)
if DEBUG:
image_lines = _draw_rects(rects_points, image_lines)
image_lines.save("debug_out/line.png")
# TODO: find best rectangle
try:
best_rect = rects_points[0]
best_rect = [(round(pt[0]*ratio), round(pt[1]*ratio)) for pt in best_rect]
best_rect = best_rect[:4]
cropped_image = crop_warp_image(original_image, best_rect)
cropped_image = _cv2ImageToPIL(cropped_image)
bbox = get_rect_bounding_box(best_rect)
except Exception as e:
cropped_image = _cv2ImageToPIL(original_image)
best_rect = None
bbox = None
intersections_as_list = [[round(i[0]*ratio), round(i[1]*ratio)] for i in all_intersections]
return cropped_image, best_rect, bbox, intersections_as_list
if __name__ == "__main__":
#img = Image.open('example_imgs/bill.png')
img = Image.open('example_imgs/image4.jpg')
cropped_image, rect, bbox, intersections = autocrop(img, DEBUG=True)
cropped_image.save('debug_out/cropped.png')
print(rect)
print(bbox)
print(intersections)