-
Notifications
You must be signed in to change notification settings - Fork 1
/
epis.py
369 lines (290 loc) · 11.9 KB
/
epis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
from cupyx.scipy.sparse import identity, csr_matrix, hstack, vstack
from cupyx.scipy.sparse.linalg import spsolve
from pathlib import Path
import numpy as np
import cupy as cp
import cv2
import sys
import random
def affinity(pi_lab: np.ndarray, pj_lab: np.ndarray, p_kappa: float, p_sigma: float) -> float:
"""
Compute the affinity between two CIE-LAB pixels. In other words, how similar they are.
A higher affinity indicates more similarity, vice-versa.
Parameters:
pi_lab - a pixel of shape (3)
pj_lab - a pixel of shape (3)
p_kappa - ?
p_sigma - lightness weight
Output:
affinity - the affinity between CIE-LAB pixels p_i and p_j, value ranges from [0.0, 1.0]
"""
pi_lab[0] = pi_lab[0] * p_kappa
pj_lab[0] = pj_lab[0] * p_kappa
return np.exp(-p_sigma * np.linalg.norm(pi_lab - pj_lab))
def compute_pairs(
img_lab: np.ndarray,
p_h: int,
p_kappa: float, p_sigma: float
) -> (cp.ndarray, cp.ndarray, cp.ndarray, cp.ndarray):
"""
Computes several helpful arrays used in the efficient construction of the global matrix (M) used in compute_L().
This is orders of magnitude faster to compute than an iterative approach to constructing the sparse global matrix.
Pixel_i is the kth pixel in the image. Pixel_j is a pixel within Pixel_i's neighborhood.
val_pos[k] = affinity(pair1[k], pair2[k])
val_neg[k] = -1 * affinity(pair1[k], pair2[k])
TODO: compute pair1, pair2, val_pos, val_neg using matrix vectorization instead of iteratively.
Parameters:
img_lab - a np.ndarray of shape (height, width, 3) where dimension [2] is a CIE-LAB pixel.
p_h - the size of local neighborhood used in creating the M matrix
p_kappa - ?
p_sigma - lightness weight in affinity function
Output:
pair1 - an array of absolute positions of pixel_i corresponding to pair2
pair2 - an array of absolute positions of pixel_j corresponding to pair1
val_pos - an array of affinities between pixel_i and pixel_j corresponding to pair1 and pair2
val_neg - an array of negative affinities between pixel_i and pixel_j corresponding to pair1 and pair2
"""
n = img_lab.shape[0] * img_lab.shape[1]
m_l = n * (p_h ** 2)
pair1 = cp.zeros((m_l), dtype=cp.uint64)
pair2 = cp.zeros((m_l), dtype=cp.uint64)
val_pos = cp.zeros((m_l), dtype=cp.float64)
val_neg = cp.zeros((m_l), dtype=cp.float64)
k = 0
for y1 in range(0, img_lab.shape[0]):
for x1 in range(0, img_lab.shape[1]):
pixel_i = img_lab[y1, x1]
i = y1 * img_lab.shape[1] + x1
for y2 in range(y1, y1 + p_h):
for x2 in range(x1, x1 + p_h):
if y2 < img_lab.shape[0] and x2 < img_lab.shape[1]:
pixel_j = img_lab[y2, x2]
j = y2 * img_lab.shape[1] + x2
pair1[k] = i
pair2[k] = j
result = affinity(pixel_i, pixel_j, p_kappa, p_sigma)
val_pos[k] = result
val_neg[k] = -result
k += 1
return (pair1, pair2, val_pos, val_neg)
def compute_max_grad(img: np.ndarray, pair1: np.ndarray, pair2: np.ndarray) -> np.ndarray:
"""
Computes the maximum gradient along the lines between pixel_i and pixel_j represented by pair1 and pair2 respectively.
Parameters:
img - a np.ndarray of shape (height, width, 3) where dimension [2] is a RGB pixel.
pair1 - an array of absolute positions of pixel_i corresponding to pair2
pair2 - an array of absolute positions of pixel_j corresponding to pair1
Output:
max_gradients - a np.ndarray of shape (len(pair1), 1) where dimension [1] is the maximum gradient along
the path between pixel_i and pixel_j.
"""
img = img.copy()
img = img / 255.0
gradients_x = np.mean(cv2.Sobel(src=img, ddepth=-1, dx=1, dy=0, ksize=3), 2)
gradients_y = np.mean(cv2.Sobel(src=img, ddepth=-1, dx=0, dy=1, ksize=3), 2)
gradients = np.sqrt(np.square(gradients_x) + np.square(gradients_y))
max_gradients = np.zeros_like(pair1)
i = 0
for (p1, p2) in zip(pair1, pair2):
x1 = int(p1 % img.shape[1])
y1 = int(p1 // img.shape[1])
x2 = int(p2 % img.shape[1])
y2 = int(p2 // img.shape[1])
if x2 > x1 and y2 > y1:
xstep = 1
ystep = 1
elif x2 > x1 and y2 < y1:
xstep = 1
ystep = -1
elif x2 < x1 and y2 > y1:
xstep = -1
ystep = 1
else:
xstep = -1
ystep = -1
dx = x1
dy = y1
max_gradient_along_line = 0.0
j = 0
while dx < x2 and dy < y2:
if j > 5:
break
if gradients[dy, dx] > max_gradient_along_line:
max_gradient_along_line = gradients[dy, dx]
dx += xstep
dy += ystep
j += 1
max_gradients[i] = max_gradient_along_line
i += 1
return max_gradients
def compute_L(
img: np.ndarray,
p_h: int,
p_kappa: float, p_sigma: float, p_eta: float, preserve_edges: bool
) -> csr_matrix:
"""
Compute the local sparsity matrix (L) used in the split-bergman algorithm.
Parameters:
img - a np.ndarray of shape (height, width, 3) where dimension [2] is a RGB pixel.
p_h - the size of local neighborhood for computing pairs
p_kappa - ?
p_sigma - lightness weight used in affinity()
p_eta - weight for maximum gradient
preserve_edges - controls whether to preserve edges
Output:
L - a csr_matrix that is an identity of M matrices.
"""
img = cp.asnumpy(img)
n = img.shape[0] * img.shape[1]
m_l = n * (p_h ** 2)
img_lab = cp.array(cv2.cvtColor(img, cv2.COLOR_BGR2LAB), dtype=cp.float64)
img_lab[:,:,0] = (img_lab[:,:,0] / 100.0) * 10.0
img_lab[:,:,1] = (img_lab[:,:,1] / 220.0) * 120.0
img_lab[:,:,2] = (img_lab[:,:,2] / 220.0) * 120.0
pair1, pair2, val_pos, val_neg = compute_pairs(img_lab, p_h, p_kappa, p_sigma)
if preserve_edges:
max_gradients = cp.array(compute_max_grad(img, cp.asnumpy(pair1), cp.asnumpy(pair2)))
max_gradients = np.exp((max_gradients * p_eta) * -p_sigma)
val_pos = cp.maximum(val_pos, max_gradients)
val_neg = cp.minimum(val_neg, -1 * max_gradients)
rows = cp.asarray(np.arange(start=0, stop=m_l, dtype=np.uint64), dtype=cp.uint64)
rows = cp.hstack([rows, rows])
cols = cp.hstack([pair1, pair2])
vals = cp.hstack([val_pos, val_neg])
M = csr_matrix((vals, (rows, cols)), shape=(m_l, n), dtype=cp.float64)
O = csr_matrix((m_l, n), dtype=cp.float64)
return vstack(
[
hstack([M, O, O]),
hstack([O, M, O]),
hstack([O, O, M])
],
)
def rgb2z(img: np.ndarray) -> np.ndarray:
"""
Convert a rgb image of shape (height, width, 3) into an array of shape (height * width * 3)
where each of the R, G, and B values are concatenated to form a single 1-D array.
Parameters:
img - a np.ndarray of shape (height, width, 3)
Output:
z - a np.ndarray of shape (height * width * 3)
"""
z_r = img[:,:,0].flatten()
z_g = img[:,:,1].flatten()
z_b = img[:,:,2].flatten()
return cp.concatenate((z_r.T, z_g.T, z_b.T)).T
def z2rgb(z: np.ndarray, height, width):
"""
Convert an array of shape (height * width * 3) that is the concatenation of the R, G, and B values of a image
back into an image of shape (height, width, 3)
Parameters:
z - a np.ndarray of shape (height * width * 3)
height - the original height of the image
width - the original width of the image
Output:
img - a np.ndarray of shape (height, width, 3)
"""
assert z.shape[0] == (height * width * 3)
length = z.shape[0] // 3
z_r = z[0:length]
z_g = z[length:2*length]
z_b = z[2*length:]
img = cp.zeros((height, width, 3), dtype=np.uint8)
img[:,:,0] = z_r.reshape((height, width))
img[:,:,1] = z_g.reshape((height, width))
img[:,:,2] = z_b.reshape((height, width))
return img
def shrink(y, p_gamma):
"""
???
Parameters:
y - ?
p_gamma - the regularization term weight
Output:
d - ?
"""
y_norm = cp.linalg.norm(y)
return (y / y_norm) * max(y_norm - p_gamma, 0)
def image_flatten(
img: np.ndarray,
p_iter: int, p_h: int,
p_alpha: float, p_epsilon: float, p_theta: float, p_lambda: float, p_kappa: float, p_sigma: float, p_eta: float,
preserve_edges: bool
) -> np.ndarray:
"""
L1 Flatten an image using the provided parameters.
Parameters:
img - a np.ndarray of shape (height, width, 3)
p_iter - the number of iterations for the L1 energy optimization algorithm
p_h - the size of local neighborhood used in creating the M matrix
p_alpha - the local sparseness weight
p_epsilon - a threshold to control the L1 energy optimization algorithm to stop
p_theta - the image approximation term
p_lambda - the regularization term weight (controls how "flat" the image should become)
p_kappa - ?
p_sigma - lightness weight in affinity function
p_eta - weight for maximum gradient (controls the prevalence of the gradients used for edge-preserving)
preserve_edges - controls whether edge preserving is applied
Output:
img_flat - a np.ndarray of shape (height, width, 3) that is the flattened version of `img`
"""
zin = rgb2z(cp.asarray(img))
z = cp.zeros((3, zin.shape[0]), dtype=cp.float64)
z[0] = zin
L = p_alpha * compute_L(z2rgb(z[0], img.shape[0], img.shape[1]), p_h, p_kappa, p_sigma, p_eta, preserve_edges)
LT = L.T
LTL = LT @ L
d = cp.zeros((3, L.shape[0]), dtype=cp.float64)
b = cp.zeros((3, L.shape[0]), dtype=cp.float64)
I = identity(LTL.shape[0], dtype=cp.float64, format='csr')
A = p_theta * I + p_lambda * LTL
i = 0
print("\n---- OPTIMIZATION ------------------")
while i < p_iter and cp.linalg.norm(z[1] - z[0]) > p_epsilon:
print("Iteration {}".format(i))
print("Difference: {}\n".format(cp.linalg.norm(z[1] - z[0])))
v = p_theta * zin + cp.asarray(p_lambda * LT @ (d[1] - b[1]))
z[2] = spsolve(A, v)
d[2] = shrink(L @ z[2] + b[1], 1.0 / p_lambda)
b[2] = b[1] + L @ z[2] - d[2]
for j in range(0, 2):
z[j] = z[j+1]
d[j] = d[j+1]
b[j] = b[j+1]
i = i + 1
return z2rgb(z[2], img.shape[0], img.shape[1])
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python epis.py [filepath of image]")
sys.exit(1)
filepath = Path(sys.argv[1])
if not filepath.exists():
print("Filepath `{}` does not exist.".format(filepath))
sys.exit(1)
if filepath.is_dir():
print("Filepath `{}` is a directory. Filepath must be a file.".format(filepath))
sys.exit(1)
if not filepath.suffix in [".jpg", ".jpeg", ".png", ".webp"]:
print("Filepath `{}` is not a supported filetype.".format(filepath))
sys.exit(1)
img = cv2.imread(cv2.samples.findFile(str(filepath)))
assert img is not None, "File `{}` could not be read".format(filepath)
# default parameters
p_iter = 4
p_h = 5
p_alpha = 20.0
p_epsilon = 0.01
p_theta = 50.0
p_lambda = 128.0
p_kappa = 1.0
p_sigma = 0.9
p_eta = 0.1
preserve_edges = False
img_flat = cp.asnumpy(image_flatten(
img, p_iter, p_h, p_alpha, p_epsilon, p_theta, p_lambda, p_kappa, p_sigma, p_eta, preserve_edges
))
if preserve_edges:
new_stem = filepath.stem + "_smoothed"
else:
new_stem = filepath.stem + "_flattened"
cv2.imwrite(str(filepath.with_stem(new_stem)), img_flat)