90 #define INTER_RESIZE_COEF_BITS 11
91 #define INTER_RESIZE_COEF_SCALE (1 << 11)
92 #define NE10_MAX_ESIZE 16
94 static inline ne10_uint32_t ne10_align_size (ne10_int32_t sz, ne10_int32_t n)
96 return (sz + n - 1) & -n;
99 static inline ne10_int32_t ne10_floor (ne10_float32_t a)
101 return ( ( (a) >= 0) ? ( (ne10_int32_t) a) : ( (ne10_int32_t) a - 1));
104 static inline ne10_int32_t ne10_clip (ne10_int32_t x, ne10_int32_t a, ne10_int32_t b)
106 return (x >= a ? (x < b ? x : b - 1) : a);
109 static inline ne10_uint8_t ne10_cast_op (ne10_int32_t val)
111 ne10_int32_t bits = INTER_RESIZE_COEF_BITS * 2;
112 ne10_int32_t SHIFT = bits;
113 ne10_int32_t DELTA = 1 << (bits - 1) ;
114 ne10_int32_t temp = NE10_MIN (255, NE10_MAX (0, (val + DELTA) >> SHIFT));
115 return (ne10_uint8_t) (temp);
118 static void ne10_img_hresize_linear_c (
const ne10_uint8_t** src,
121 const ne10_int32_t* xofs,
122 const ne10_int16_t* alpha,
131 ne10_int32_t dx0 = 0;
137 const ne10_uint8_t *S0 = src[k], *S1 = src[k + 1];
138 ne10_int32_t *D0 = dst[k], *D1 = dst[k + 1];
139 for (dx = dx0; dx < xmax; dx++)
141 ne10_int32_t sx = xofs[dx];
142 ne10_int32_t a0 = alpha[dx * 2], a1 = alpha[dx * 2 + 1];
143 ne10_int32_t t0 = S0[sx] * a0 + S0[sx + cn] * a1;
144 ne10_int32_t t1 = S1[sx] * a0 + S1[sx + cn] * a1;
149 for (; dx < dwidth; dx++)
151 ne10_int32_t sx = xofs[dx];
152 D0[dx] = (ne10_int32_t) S0[sx] * INTER_RESIZE_COEF_SCALE;
153 D1[dx] = (ne10_int32_t) S1[sx] * INTER_RESIZE_COEF_SCALE;
161 const ne10_uint8_t *S = src[k];
162 ne10_int32_t *D = dst[k];
163 for (dx = 0; dx < xmax; dx++)
165 ne10_int32_t sx = xofs[dx];
166 D[dx] = S[sx] * alpha[dx * 2] + S[sx + cn] * alpha[dx * 2 + 1];
169 for (; dx < dwidth; dx++)
170 D[dx] = (ne10_int32_t) S[xofs[dx]] * INTER_RESIZE_COEF_SCALE;
175 static void ne10_img_vresize_linear_c (
const ne10_int32_t** src, ne10_uint8_t* dst,
const ne10_int16_t* beta, ne10_int32_t width)
177 ne10_int32_t b0 = beta[0], b1 = beta[1];
178 const ne10_int32_t *S0 = src[0], *S1 = src[1];
181 for (; x <= width - 4; x += 4)
184 t0 = S0[x] * b0 + S1[x] * b1;
185 t1 = S0[x + 1] * b0 + S1[x + 1] * b1;
186 dst[x] = ne10_cast_op (t0);
187 dst[x + 1] = ne10_cast_op (t1);
188 t0 = S0[x + 2] * b0 + S1[x + 2] * b1;
189 t1 = S0[x + 3] * b0 + S1[x + 3] * b1;
190 dst[x + 2] = ne10_cast_op (t0);
191 dst[x + 3] = ne10_cast_op (t1);
194 for (; x < width; x++)
195 dst[x] = ne10_cast_op (S0[x] * b0 + S1[x] * b1);
198 static void ne10_img_resize_generic_linear_c (ne10_uint8_t* src,
200 const ne10_int32_t* xofs,
201 const ne10_int16_t* _alpha,
202 const ne10_int32_t* yofs,
203 const ne10_int16_t* _beta,
209 ne10_int32_t srcstep,
212 ne10_int32_t channels)
215 const ne10_int16_t* alpha = _alpha;
216 const ne10_int16_t* beta = _beta;
217 ne10_int32_t cn = channels;
221 ne10_int32_t bufstep = (ne10_int32_t) ne10_align_size (dstw, 16);
222 ne10_int32_t dststep = (ne10_int32_t) ne10_align_size (dstw, 4);
225 ne10_int32_t *buffer_ = (ne10_int32_t*) NE10_MALLOC (bufstep * ksize *
sizeof (ne10_int32_t));
227 const ne10_uint8_t* srows[NE10_MAX_ESIZE];
228 ne10_int32_t* rows[NE10_MAX_ESIZE];
229 ne10_int32_t prev_sy[NE10_MAX_ESIZE];
234 for (k = 0; k < ksize; k++)
237 rows[k] = (ne10_int32_t*) buffer_ + bufstep * k;
241 for (dy = 0; dy < dsth; dy++, beta += ksize)
243 ne10_int32_t sy0 = yofs[dy], k, k0 = ksize, k1 = 0, ksize2 = ksize / 2;
245 for (k = 0; k < ksize; k++)
247 ne10_int32_t sy = ne10_clip (sy0 - ksize2 + 1 + k, 0, srch);
248 for (k1 = NE10_MAX (k1, k); k1 < ksize; k1++)
250 if (sy == prev_sy[k1])
253 memcpy (rows[k], rows[k1], bufstep *
sizeof (rows[0][0]));
258 k0 = NE10_MIN (k0, k);
259 srows[k] = (
const ne10_uint8_t*) (src + srcstep * sy);
264 ne10_img_hresize_linear_c (srows + k0, rows + k0, ksize - k0, xofs, alpha,
265 srcw, dstw, cn, xmin, xmax);
267 ne10_img_vresize_linear_c ( (
const ne10_int32_t**) rows, (ne10_uint8_t*) (dst + dststep * dy), beta, dstw);
273 static void ne10_img_resize_cal_offset_linear (ne10_int32_t* xofs,
274 ne10_int16_t* ialpha,
285 ne10_int32_t channels)
287 ne10_float32_t inv_scale_x = (ne10_float32_t) dstw / srcw;
288 ne10_float32_t inv_scale_y = (ne10_float32_t) dsth / srch;
290 ne10_int32_t cn = channels;
291 ne10_float32_t scale_x = 1. / inv_scale_x;
292 ne10_float32_t scale_y = 1. / inv_scale_y;
293 ne10_int32_t k, sx, sy, dx, dy;
296 ne10_float32_t fx, fy;
298 ne10_float32_t cbuf[NE10_MAX_ESIZE];
300 for (dx = 0; dx < dstw; dx++)
302 fx = (ne10_float32_t) ( (dx + 0.5) * scale_x - 0.5);
303 sx = ne10_floor (fx);
313 if (sx + ksize2 >= srcw)
315 *xmax = NE10_MIN (*xmax, dx);
317 fx = 0, sx = srcw - 1;
320 for (k = 0, sx *= cn; k < cn; k++)
321 xofs[dx * cn + k] = sx + k;
326 for (k = 0; k < ksize; k++)
327 ialpha[dx * cn * ksize + k] = (ne10_int16_t) (cbuf[k] * INTER_RESIZE_COEF_SCALE);
328 for (; k < cn * ksize; k++)
329 ialpha[dx * cn * ksize + k] = ialpha[dx * cn * ksize + k - ksize];
332 for (dy = 0; dy < dsth; dy++)
334 fy = (ne10_float32_t) ( (dy + 0.5) * scale_y - 0.5);
335 sy = ne10_floor (fy);
343 for (k = 0; k < ksize; k++)
344 ibeta[dy * ksize + k] = (ne10_int16_t) (cbuf[k] * INTER_RESIZE_COEF_SCALE);
368 ne10_uint32_t dst_width,
369 ne10_uint32_t dst_height,
371 ne10_uint32_t src_width,
372 ne10_uint32_t src_height,
373 ne10_uint32_t src_stride)
375 ne10_int32_t dstw = dst_width;
376 ne10_int32_t dsth = dst_height;
377 ne10_int32_t srcw = src_width;
378 ne10_int32_t srch = src_height;
383 ne10_int32_t xmin = 0;
384 ne10_int32_t xmax = dstw;
385 ne10_int32_t width = dstw * cn;
387 ne10_int32_t ksize = 0, ksize2;
391 ne10_uint8_t *buffer_ = (ne10_uint8_t*) NE10_MALLOC ( (width + dsth) * (
sizeof (ne10_int32_t) +
sizeof (ne10_float32_t) * ksize));
393 ne10_int32_t* xofs = (ne10_int32_t*) buffer_;
394 ne10_int32_t* yofs = xofs + width;
395 ne10_int16_t* ialpha = (ne10_int16_t*) (yofs + dsth);
396 ne10_int16_t* ibeta = ialpha + width * ksize;
398 ne10_img_resize_cal_offset_linear (xofs, ialpha, yofs, ibeta, &xmin, &xmax, ksize, ksize2, srcw, srch, dstw, dsth, cn);
400 ne10_img_resize_generic_linear_c (src, dst, xofs, ialpha, yofs, ibeta, xmin, xmax, ksize, srcw, srch, src_stride, dstw, dsth, cn);
404 extern void ne10_img_hresize_4channels_linear_neon (
const ne10_uint8_t** src,
407 const ne10_int32_t* xofs,
408 const ne10_int16_t* alpha,
414 extern void ne10_img_vresize_linear_neon (
const ne10_int32_t** src, ne10_uint8_t* dst,
const ne10_int16_t* beta, ne10_int32_t width);
416 static void ne10_img_resize_generic_linear_neon (ne10_uint8_t* src,
418 const ne10_int32_t* xofs,
419 const ne10_int16_t* _alpha,
420 const ne10_int32_t* yofs,
421 const ne10_int16_t* _beta,
427 ne10_int32_t srcstep,
430 ne10_int32_t channels)
433 const ne10_int16_t* alpha = _alpha;
434 const ne10_int16_t* beta = _beta;
435 ne10_int32_t cn = channels;
439 ne10_int32_t bufstep = (ne10_int32_t) ne10_align_size (dstw, 16);
440 ne10_int32_t dststep = (ne10_int32_t) ne10_align_size (dstw, 4);
443 ne10_int32_t *buffer_ = (ne10_int32_t*) NE10_MALLOC (bufstep * ksize *
sizeof (ne10_int32_t));
445 const ne10_uint8_t* srows[NE10_MAX_ESIZE];
446 ne10_int32_t* rows[NE10_MAX_ESIZE];
447 ne10_int32_t prev_sy[NE10_MAX_ESIZE];
452 for (k = 0; k < ksize; k++)
455 rows[k] = (ne10_int32_t*) buffer_ + bufstep * k;
459 for (dy = 0; dy < dsth; dy++, beta += ksize)
461 ne10_int32_t sy0 = yofs[dy], k, k0 = ksize, k1 = 0, ksize2 = ksize / 2;
463 for (k = 0; k < ksize; k++)
465 ne10_int32_t sy = ne10_clip (sy0 - ksize2 + 1 + k, 0, srch);
466 for (k1 = NE10_MAX (k1, k); k1 < ksize; k1++)
468 if (sy == prev_sy[k1])
471 memcpy (rows[k], rows[k1], bufstep *
sizeof (rows[0][0]));
476 k0 = NE10_MIN (k0, k);
477 srows[k] = (
const ne10_uint8_t*) (src + srcstep * sy);
484 ne10_img_hresize_4channels_linear_neon (srows + k0, rows + k0, ksize - k0, xofs, alpha,
485 srcw, dstw, cn, xmin, xmax);
487 ne10_img_hresize_linear_c (srows + k0, rows + k0, ksize - k0, xofs, alpha,
488 srcw, dstw, cn, xmin, xmax);
490 ne10_img_vresize_linear_neon ( (
const ne10_int32_t**) rows, (ne10_uint8_t*) (dst + dststep * dy), beta, dstw);
509 ne10_uint32_t dst_width,
510 ne10_uint32_t dst_height,
512 ne10_uint32_t src_width,
513 ne10_uint32_t src_height,
514 ne10_uint32_t src_stride)
516 ne10_int32_t dstw = dst_width;
517 ne10_int32_t dsth = dst_height;
518 ne10_int32_t srcw = src_width;
519 ne10_int32_t srch = src_height;
524 ne10_int32_t xmin = 0;
525 ne10_int32_t xmax = dstw;
526 ne10_int32_t width = dstw * cn;
528 ne10_int32_t ksize = 0, ksize2;
532 ne10_uint8_t *buffer_ = (ne10_uint8_t*) NE10_MALLOC ( (width + dsth) * (
sizeof (ne10_int32_t) +
sizeof (ne10_float32_t) * ksize));
534 ne10_int32_t* xofs = (ne10_int32_t*) buffer_;
535 ne10_int32_t* yofs = xofs + width;
536 ne10_int16_t* ialpha = (ne10_int16_t*) (yofs + dsth);
537 ne10_int16_t* ibeta = ialpha + width * ksize;
539 ne10_img_resize_cal_offset_linear (xofs, ialpha, yofs, ibeta, &xmin, &xmax, ksize, ksize2, srcw, srch, dstw, dsth, cn);
541 ne10_img_resize_generic_linear_neon (src, dst, xofs, ialpha, yofs, ibeta, xmin, xmax, ksize, srcw, srch, src_stride, dstw, dsth, cn);
void ne10_img_resize_bilinear_rgba_neon(ne10_uint8_t *dst, ne10_uint32_t dst_width, ne10_uint32_t dst_height, ne10_uint8_t *src, ne10_uint32_t src_width, ne10_uint32_t src_height, ne10_uint32_t src_stride)
image resize of 8-bit data.
void ne10_img_resize_bilinear_rgba_c(ne10_uint8_t *dst, ne10_uint32_t dst_width, ne10_uint32_t dst_height, ne10_uint8_t *src, ne10_uint32_t src_width, ne10_uint32_t src_height, ne10_uint32_t src_stride)
image resize of 8-bit data.