backend/photo_log/autocrop/autocrop.py

426 lines
13 KiB
Python
Raw Permalink Normal View History

import cv2
import imutils
import random
import numpy as np
import math
from PIL import Image, ImageDraw
import itertools
import pytesseract
def preprocess_image(image, blur_params=[20,20,25], canny_params=[25,250]):
"""
Turns the image into grayscale, applies a bilateral filter
to smooth out unimportant parts of the image, and returns
the Canny filtered result.
image: cv2.imread image
"""
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, blur_params[0], blur_params[1], blur_params[2],)
edged = cv2.Canny(gray, canny_params[0], canny_params[1])
return edged
def hough_transform(edged):
"""
Returns and image representing the hough space of the edge image.
edged: Canny filtered cv2 image
"""
max_d = np.sqrt(edged.shape[0]**2 + edged.shape[1]**2)
hough_space = [[0] * (2 * math.ceil(max_d)) for i in range(157*2)]
for y in range(edged.shape[0]):
for x in range(edged.shape[1]):
pixel = edged[y][x]
if not pixel > 0:
continue
for alpha in range(157*2):
d = x * math.cos(alpha/2.0/100.0) + y * math.sin(alpha/2.0/100.0)
hough_space[alpha][round(d)+math.ceil(max_d)] += 1
return hough_space
#TODO fix removal on "the other site" of the hough space => negative indexes in try / except
def get_max_params(hough_space, num_params, rm_radius=7):
"""
Iterates over the maxima of hough space image.
After each maximum found a circle of the radius rm_radius is beeing "cut out"
of the hough space image.
Returns an array of tuples containing the maximum parameters (alpha, d).
hough_space: hough space image
num_params: number of the maxima to be found
rm_radius: optional = 7; the radius to be cut out around the maximum found
"""
all_max_params = []
for i in range(num_params):
hough_array = np.array(hough_space)
max_params = np.unravel_index(hough_array.argmax(), hough_array.shape)
if -math.inf in max_params:
break
alpha = max_params[0]/2.0/100.0
d = max_params[1] - hough_array.shape[1]/2.0
all_max_params.append((alpha, d))
for yi in range(rm_radius*2+1):
for xi in range(rm_radius*2+1):
if math.sqrt((-rm_radius+yi)**2 + (-rm_radius+xi)**2) <= rm_radius:
try:
hough_space[abs(max_params[0]-rm_radius+yi)][abs(max_params[1]-rm_radius+xi)] = -math.inf
except Exception:
pass
return all_max_params, hough_space
def _draw_hough_space(hough_space):
max_val = np.amax(hough_space)
img = Image.new(mode="RGB", size=(len(hough_space[0]), len(hough_space)))
pixels = img.load()
for y in range(len(hough_space)):
for x in range(len(hough_space[0])):
if hough_space[y][x] == -math.inf:
hough_space[y][x] = 0
val = int(hough_space[y][x] / float(max_val) * 255)
pixels[x,y] = (val,val,val)
return img
def _draw_lines(image, all_max_params, resize_height):
img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
img = Image.fromarray(img)
new_height = resize_height
new_width = int(new_height * img.size[0] / img.size[1])
img = img.resize((new_width, new_height), Image.ANTIALIAS)
pixels = img.load()
for (alpha,d) in all_max_params:
dist = 0.5 # line thickness (distance image pixels to line coordinates)
for y in range(img.size[1]):
for x in range(img.size[0]):
val = (x * math.cos(alpha) + y * math.sin(alpha)) - d
if val <= dist and val >= -dist:
pixels[x,y] = (0,200,255)
return img
def get_intersect(a1, a2, b1, b2):
"""
Returns the point of intersection of the lines passing through a2,a1 and b2,b1.
a1: [x, y] a point on the first line
a2: [x, y] another point on the first line
b1: [x, y] a point on the second line
b2: [x, y] another point on the second line
"""
s = np.vstack([a1,a2,b1,b2]) # s for stacked
h = np.hstack((s, np.ones((4, 1)))) # h for homogeneous
l1 = np.cross(h[0], h[1]) # get first line
l2 = np.cross(h[2], h[3]) # get second line
x, y, z = np.cross(l1, l2) # point of intersection
if z == 0: # lines are parallel
return (float('inf'), float('inf'))
return (x/z, y/z)
def insert_intersection(intersections, params1, params2, inters_pt):
if not params1 in intersections:
intersections[params1] = []
intersections[params1].append((params2, inters_pt))
if not params2 in intersections:
intersections[params2] = []
intersections[params2].append((params1, inters_pt))
# TODO fix wrong random intersection points
def find_intersections(all_max_params, img_size, allowed_angle_diff=3):
"""
Takes hough space parameters of found lines and returns a list of tuples of
intersection points.
all_max_params: (alpha, d) tuple list of hough space parameters
img_size: the size of the in which the intersections are to be found in
"""
intersections_by_lines = {}
all_intersections = []
other_max_params = all_max_params.copy()
for (alpha1,d1) in all_max_params:
for (alpha2,d2) in other_max_params:
if alpha1 == alpha2 and d1 == d2:
continue
y = random.randint(0, img_size[0])
x = (d1 - (y * math.sin(alpha1))) / math.cos(alpha1)
a1 = [x,y]
y = random.randint(0, img_size[0])
x = (d1 - (y * math.sin(alpha1))) / math.cos(alpha1)
a2 = [x,y]
y = random.randint(0, img_size[0])
x = (d2 - (y * math.sin(alpha2))) / math.cos(alpha2)
b1 = [x,y]
y = random.randint(0, img_size[0])
x = (d2 - (y * math.sin(alpha2))) / math.cos(alpha2)
b2 = [x,y]
# get intersection point of two lines, where each line
# is given by two random points of each line
# a1, a1: two [x,y] points on line a
# b1, b1: two [x,y] points on line b
inters = get_intersect(a1,a2, b1,b2)
# are lines parallel or is intersection outside of the image?
if math.inf in inters:
continue
if inters[0] < 0 or inters[0] >= img_size[1]:
continue
if inters[1] < 0 or inters[1] >= img_size[0]:
continue
# calculate vectors of each line 1 and 2
p1 = [a1[0]-a2[0], a1[1]-a2[1]]
p2 = [b1[0]-b2[0], b1[1]-b2[1]]
inters = (round(inters[0]),round(inters[1]))
try:
dot_product = p1[0]*p2[0] + p1[1]*p2[1]
abs_p1 = math.sqrt(p1[0]**2 + p1[1]**2)
abs_p2 = math.sqrt(p2[0]**2 + p2[1]**2)
angle = math.degrees(math.acos(dot_product / (abs_p1 * abs_p2)))
angle_diff = abs(abs(angle) - 90)
except ValueError as e:
print(e)
continue
if not(angle_diff > allowed_angle_diff):
all_intersections.append(inters)
params1 = (alpha1, d1)
params2 = (alpha2, d2)
insert_intersection(intersections_by_lines, params1, params2, inters)
other_max_params.remove((alpha1, d1))
return intersections_by_lines, all_intersections
def _draw_intersections(image, intersections):
image = image.copy()
for inters in intersections:
color = (0,255,0)
pt_radius = 1
draw = ImageDraw.Draw(image)
draw.ellipse((inters[0]-pt_radius,inters[1]-pt_radius,inters[0]+pt_radius,inters[1]+pt_radius), fill=color, outline=color)
return image
def get_n_connections(start, intersections_by_lines, n):
if n <= 0:
return [[start]]
connections = []
for line in intersections_by_lines[start]:
neighbors_connections = get_n_connections(line[0], intersections_by_lines, n-1)
for con in neighbors_connections:
if not start in con:
if len(con) == 1:
connections.append([start, con[0]])
else:
connections.append([start] + con)
return connections
def get_cycles(intersections_by_lines, n=4):
start = list(intersections_by_lines.keys())[0]
connections = get_n_connections(start, intersections_by_lines, n=n-1)
cycles = []
for connection in connections:
last_vertex = connection[-1]
# can we get back to the beginning?
if start in [con[0] for con in intersections_by_lines[last_vertex]]:
cycles.append(connection + [start])
return cycles
def get_intersection_points_from_lines(rects_lines, intersections_by_lines):
rects_points = []
for rect in rects_lines:
points = []
for i in range(len(rect)-1):
line1 = rect[i]
line2 = rect[i+1]
points.append(list(filter(lambda con: con[0]==line2, intersections_by_lines[line1]))[0][1])
points.append(points[0])
rects_points.append(points)
return rects_points
def remove_small_rects(rects_points, image_shape, min_image_coverage=0.3):
image_area = image_shape[0] * image_shape[1]
possible_rects = []
for rect in rects_points:
pt1 = rect[0]
pt2 = rect[1]
pt3 = rect[2]
len_side1 = math.sqrt((pt1[0]-pt2[0])**2 + (pt1[1]-pt2[1])**2)
len_side2 = math.sqrt((pt2[0]-pt3[0])**2 + (pt2[1]-pt3[1])**2)
area = len_side1 * len_side2
if area >= min_image_coverage * image_area:
possible_rects.append(rect)
return possible_rects
def _draw_rects(rects_points, image):
image_lines = image.copy()
draw = ImageDraw.Draw(image_lines)
for rect in rects_points:
rect = [(pt[0],pt[1]) for pt in rect]
draw.line(rect, width=2, fill='yellow')
return image_lines
def crop_warp_image(image, rect):
#rect = np.asarray(rect[:4])
rect = np.array(rect,dtype = "float32").reshape(4,2)
ordered_points = np.zeros((4, 2), dtype = "float32")
s = rect.sum(axis = 1)
ordered_points[0] = rect[np.argmin(s)]
ordered_points[2] = rect[np.argmax(s)]
diff = np.diff(rect, axis = 1)
ordered_points[1] = rect[np.argmin(diff)]
ordered_points[3] = rect[np.argmax(diff)]
(tl, tr, br, bl) = ordered_points
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
maxHeight = max(int(heightA), int(heightB))
dst_rect = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
M = cv2.getPerspectiveTransform(ordered_points, dst_rect)
warped_img = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
return warped_img
def get_rect_bounding_box(rect):
min_x = min([pt[0] for pt in rect])
max_x = max([pt[0] for pt in rect])
min_y = min([pt[1] for pt in rect])
max_y = max([pt[1] for pt in rect])
bbox = [[min_x, min_y], [max_x, min_y], [max_x, max_y], [min_x, max_y]]
return bbox
def _cv2ImageToPIL(cv2_image):
#cv2_image = cv2.cvtColor(cv2_image, cv2.COLOR_BGR2RGB)
pil_image = Image.fromarray(cv2_image)
return pil_image
def _PILImageToCv2(pil_image):
cv2_image = np.asarray(pil_image)
return cv2_image
def autocrop(original_image, DEBUG=False):
"""
Automatically crops an image to size of the flip chart in the given image,
and returns a cropped PIL image.
image: the PIL image to crop
"""
resize_height = 300
blur_params = [35,15,40] # size, color, space
canny_params = [80,250]
num_hough_params = 50
hough_rm_radius = 20
line_angle_diff = 3
min_image_coverage = 0.3
original_image = _PILImageToCv2(original_image)
# shrink image to a smaller size to speed up autocropping
ratio = original_image.shape[0] / float(resize_height)
image_small = imutils.resize(original_image, height=resize_height)
edge_image = preprocess_image(image_small, blur_params=blur_params, canny_params=canny_params)
if DEBUG:
cv2.imwrite('debug_out/edges.png', edge_image)
hough_space = hough_transform(edge_image)
all_max_params, hough_space = get_max_params(hough_space, num_params=num_hough_params, rm_radius=hough_rm_radius)
if DEBUG:
hough_space_image = _draw_hough_space(hough_space)
hough_space_image.save("debug_out/hough.png")
image_lines = _draw_lines(original_image, all_max_params, resize_height)
intersections_by_lines, all_intersections = find_intersections(all_max_params, image_small.shape, allowed_angle_diff=line_angle_diff)
if DEBUG:
image_lines = _draw_intersections(image_lines, all_intersections)
rects_lines = get_cycles(intersections_by_lines)
rects_points = get_intersection_points_from_lines(rects_lines, intersections_by_lines)
rects_points = remove_small_rects(rects_points, image_small.shape, min_image_coverage=min_image_coverage)
if DEBUG:
image_lines = _draw_rects(rects_points, image_lines)
image_lines.save("debug_out/line.png")
# TODO: find best rectangle
try:
best_rect = rects_points[0]
best_rect = [(round(pt[0]*ratio), round(pt[1]*ratio)) for pt in best_rect]
best_rect = best_rect[:4]
cropped_image = crop_warp_image(original_image, best_rect)
cropped_image = _cv2ImageToPIL(cropped_image)
bbox = get_rect_bounding_box(best_rect)
except Exception as e:
cropped_image = _cv2ImageToPIL(original_image)
best_rect = None
bbox = None
intersections_as_list = [[round(i[0]*ratio), round(i[1]*ratio)] for i in all_intersections]
return cropped_image, best_rect, bbox, intersections_as_list
if __name__ == "__main__":
#img = Image.open('example_imgs/bill.png')
img = Image.open('example_imgs/image4.jpg')
cropped_image, rect, bbox, intersections = autocrop(img, DEBUG=True)
cropped_image.save('debug_out/cropped.png')
print(rect)
print(bbox)
print(intersections)