83 void ne10_img_boxfilter_row_border (
const ne10_uint8_t* src,
86 ne10_int32_t src_stride,
87 ne10_int32_t dst_stride,
90 ne10_int32_t *border_l_p,
91 ne10_int32_t *border_r_p)
93 assert (kernel.x <= src_sz.x);
96 *border_l_p = anchor.x;
97 *border_r_p = kernel.x - (anchor.x + 1);
101 const ne10_uint8_t *src_row;
102 ne10_uint8_t *dst_row;
104 for (y = 0; y < src_sz.y; y++)
106 src_row = src + y * src_stride;
107 dst_row = dst + y * dst_stride;
108 ne10_float32_t sum[RGBA_CH];
111 ne10_int32_t offset = kernel.x - *border_l_p - 1;
112 for (k = 0; k < RGBA_CH; k++)
115 for (x = 0; x < offset; x++)
117 sum[k] += * (src_row + x * RGBA_CH + k);
121 for (k = 0; k < RGBA_CH; k++)
123 for (x = 0; x < *border_l_p; x++)
125 sum[k] += * (src_row + (offset + x) *
127 * (dst_row + x * RGBA_CH + k) = sum[k] /
133 for (k = 0; k < RGBA_CH; k++)
136 for (x = 0; x < kernel.x; x++)
138 sum[k] += * (src_row + (src_sz.x - kernel.x + x) *
143 for (k = 0; k < RGBA_CH; k++)
145 for (x = 0; x < *border_r_p; x++)
147 sum[k] -= * (src_row + (src_sz.x - kernel.x + x) *
149 * (dst_row + (src_sz.x - *border_r_p + x) *
150 RGBA_CH + k) = sum[k] / kernel.x;
156 void ne10_img_boxfilter_row_c (
const ne10_uint8_t *src,
159 ne10_int32_t src_stride,
160 ne10_int32_t dst_stride,
163 ne10_int32_t border_l,
164 ne10_int32_t border_r)
167 assert ( (kernel.x > 0) && (kernel.x < (1 << 16)));
168 assert (kernel.x <= src_sz.x);
170 ne10_int32_t x, y, k;
172 for (y = 0; y < src_sz.y; y++)
174 const ne10_uint8_t *src_row = src + y * src_stride;
175 ne10_uint8_t *dst_row = dst + y * dst_stride;
176 ne10_float32_t sum[RGBA_CH];
178 for (k = 0; k < RGBA_CH; k++)
182 for (x = 0; x < kernel.x; x++)
184 sum[k] += * (src_row + x * RGBA_CH + k);
187 * (dst_row + border_l * RGBA_CH + k) = sum[k] /
191 ne10_uint32_t prev = (anchor.x + 1) * RGBA_CH;
192 ne10_uint32_t next = (kernel.x - anchor.x - 1) * RGBA_CH;
193 const ne10_uint8_t *src_pixel = src_row + (1 + border_l) * RGBA_CH;
194 const ne10_uint8_t *src_pixel_end = src_row + (src_sz.x - border_r) *
196 ne10_uint8_t *dst_pixel = dst_row + (1 + border_l) * RGBA_CH;
198 for (k = 0; src_pixel < src_pixel_end; src_pixel++, dst_pixel++)
200 sum[k] += src_pixel[next] - * (src_pixel - prev);
201 *dst_pixel = sum[k] / kernel.x;
208 void ne10_img_boxfilter_col_border (
const ne10_uint8_t *src,
211 ne10_int32_t src_stride,
212 ne10_int32_t dst_stride,
215 ne10_int32_t *border_t_p,
216 ne10_int32_t *border_b_p)
218 assert (kernel.y <= src_sz.y);
221 *border_t_p = anchor.y;
222 *border_b_p = kernel.y - (anchor.y + 1);
224 ne10_int32_t x, y, k;
225 const ne10_uint8_t *src_col;
226 ne10_uint8_t *dst_col;
228 for (x = 0; x < src_sz.x; x++)
230 src_col = src + x * RGBA_CH;
231 dst_col = dst + x * RGBA_CH;
232 ne10_float32_t sum[RGBA_CH];
235 ne10_int32_t offset = kernel.y - *border_t_p - 1;
236 for (k = 0; k < RGBA_CH; k++)
240 for (y = 0; y < offset; y++)
242 sum[k] += * (src_col + y * src_stride + k);
246 for (k = 0; k < RGBA_CH; k++)
248 for (y = 0; y < *border_t_p; y++)
250 sum[k] += * (src_col + (offset + y) *
252 * (dst_col + y * dst_stride + k) = sum[k] /
258 for (k = 0; k < RGBA_CH; k++)
261 for (y = 0; y < kernel.y; y++)
263 sum[k] += * (src_col + (src_sz.y - kernel.y + y) *
268 for (k = 0; k < RGBA_CH; k++)
270 for (y = 0; y < *border_b_p; y++)
272 sum[k] -= * (src_col + (src_sz.y - kernel.y + y) *
274 * (dst_col + (src_sz.y - *border_b_p + y) * dst_stride + k) =
281 void ne10_img_boxfilter_col_c (
const ne10_uint8_t *src,
284 ne10_int32_t src_stride,
285 ne10_int32_t dst_stride,
288 ne10_int32_t border_t,
289 ne10_int32_t border_b)
292 assert ( (kernel.y > 0) && (kernel.y < (1 << 16)));
293 assert (kernel.y <= src_sz.y);
295 ne10_int32_t x, y, k;
297 for (x = 0; x < src_sz.x; x++)
299 const ne10_uint8_t *src_col = src + x * RGBA_CH;
300 ne10_uint8_t *dst_col = dst + x * RGBA_CH;
301 ne10_float32_t sum[RGBA_CH];
303 for (k = 0; k < RGBA_CH; k++)
307 for (y = 0; y < kernel.y; y++)
309 sum[k] += * (src_col + y * src_stride + k);
312 * (dst_col + border_t * dst_stride + k) = sum[k] / kernel.y;
315 ne10_uint32_t prev = (anchor.y + 1) * src_stride;
316 ne10_uint32_t next = (kernel.y - anchor.y - 1) * src_stride;
317 const ne10_uint8_t *src_pixel = src_col + (1 + border_t) * src_stride;
318 const ne10_uint8_t *src_end = src_col + (src_sz.y - border_b) *
320 ne10_uint8_t *dst_pixel = dst_col + (1 + border_t) * dst_stride;
322 while (src_pixel < src_end)
324 for (k = 0; k < RGBA_CH; k++)
326 sum[k] += src_pixel[next + k] - * (src_pixel - prev + k);
327 * (dst_pixel + k) = sum[k] / kernel.y;
329 dst_pixel += dst_stride;
330 src_pixel += src_stride;
357 ne10_int32_t src_stride,
358 ne10_int32_t dst_stride,
361 ne10_int32_t border_l, border_r, border_t, border_b;
364 assert (src != 0 && dst != 0);
365 assert (src_sz.x > 0 && src_sz.y > 0);
366 assert (src_stride > 0 && dst_stride > 0);
367 assert (kernel.x > 0 && kernel.x <= src_sz.x
368 && kernel.y > 0 && kernel.y <= src_sz.y);
370 anchor.x = kernel.x / 2;
371 anchor.y = kernel.y / 2;
373 ne10_uint8_t *dst_buf = (ne10_uint8_t *) malloc (
sizeof (ne10_uint8_t) *
381 "ERROR: buffer allocation fails!\nallocation size: %d\n",
382 sizeof (ne10_uint8_t) *
389 ne10_int32_t dst_buf_stride = src_sz.x * RGBA_CH;
392 ne10_img_boxfilter_row_border (src,
404 ne10_img_boxfilter_row_c (src,
417 ne10_img_boxfilter_col_border (dst_buf,
428 ne10_img_boxfilter_col_c (dst_buf,