From f7fc4bf1a2a1ac141a6a7262f9075ab802422831 Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Mon, 2 Sep 2013 18:06:45 +0800 Subject: [PATCH] Make the following changes - Add C implementations, doc and test cases for image resize/rotate - fix the bug in NEON version of image resize - add a header file for external macro definitions --- common/NE10_mask_table.c | 2 +- common/NE10_mask_table.h | 4 +- inc/NE10.h | 10 + inc/NE10_imgproc.h | 90 ++--- inc/NE10_macros.h | 57 ++++ inc/NE10_types.h | 1 + modules/CMakeLists.txt | 2 + modules/imgproc/NE10_init_imgproc.c | 44 +-- modules/imgproc/NE10_resize.c | 549 +++++++++++++++++++++++++++++++ modules/imgproc/NE10_resize.neon.s | 62 ++-- modules/imgproc/NE10_rotate.c | 316 ++++++++++++++++++ modules/imgproc/NE10_rotate.neon.s | 4 +- modules/imgproc/test/test_main.c | 59 ++++ modules/imgproc/test/test_suite_resize.c | 207 ++++++++++++ modules/imgproc/test/test_suite_rotate.c | 203 ++++++++++++ test/CMakeLists.txt | 42 +++ test/include/unit_test_common.h | 2 + test/src/unit_test_common.c | 25 ++ 18 files changed, 1571 insertions(+), 108 deletions(-) create mode 100644 inc/NE10_macros.h create mode 100644 modules/imgproc/NE10_resize.c create mode 100644 modules/imgproc/NE10_rotate.c create mode 100644 modules/imgproc/test/test_main.c create mode 100644 modules/imgproc/test/test_suite_resize.c create mode 100644 modules/imgproc/test/test_suite_rotate.c diff --git a/common/NE10_mask_table.c b/common/NE10_mask_table.c index 0db75a8..d03d6bd 100644 --- a/common/NE10_mask_table.c +++ b/common/NE10_mask_table.c @@ -68,7 +68,7 @@ const ne10_uint32_t ne10_divLookUpTable[DIV_LOOKUP_TABLE_SIZE]= 272,271,270,269,267,266,265,264,263,262,261,260,259,258,257 }; -const ne10_uint64_t ne10_vresize_mask_residual_table[VRESIZE_MASK_TABLE_SIZE] = +const ne10_uint64_t ne10_img_vresize_linear_mask_residual_table[NE10_VRESIZE_LINEAR_MASK_TABLE_SIZE] = { 0x00000000000000FF, 0x000000000000FFFF, 0x0000000000FFFFFF, 0x00000000FFFFFFFF, diff --git a/common/NE10_mask_table.h b/common/NE10_mask_table.h index 6381231..1b9dbdf 100644 --- a/common/NE10_mask_table.h +++ b/common/NE10_mask_table.h @@ -44,8 +44,8 @@ extern const ne10_uint32_t ne10_divLookUpTable[DIV_LOOKUP_TABLE_SIZE] \ asm ("ne10_divLookUpTable"); /* mask table for imgproc module */ -#define VRESIZE_MASK_TABLE_SIZE 7 -extern const ne10_uint64_t ne10_vresize_mask_residual_table[VRESIZE_MASK_TABLE_SIZE] \ +#define NE10_VRESIZE_LINEAR_MASK_TABLE_SIZE 7 +extern const ne10_uint64_t ne10_img_vresize_linear_mask_residual_table[NE10_VRESIZE_LINEAR_MASK_TABLE_SIZE] \ asm ("ne10_vresize_mask_residual_table"); #endif diff --git a/inc/NE10.h b/inc/NE10.h index 148bc86..02ebace 100644 --- a/inc/NE10.h +++ b/inc/NE10.h @@ -64,6 +64,10 @@ * │   │   ├── @link groupDSPs dsp module@endlink that provides a set of signal processing functions, such as complex/real FFT/IFFT, FIR and IIR * │   │   └── test * │   │   └── directory for test files + * │   ├── imgproc + * │   │   ├── @link groupIMGPROCs imgproc module@endlink that provides a set of image processing functions, such as image resize, image rotate + * │   │   └── test + * │   │   └── directory for test files * │   ├── math * │   │   ├── @link groupMaths math module@endlink that provides a set of vector/matrix algebra functions * │   │   └── test @@ -148,7 +152,13 @@ extern "C" { #endif +#include +#include +#include +#include + #include "NE10_types.h" +#include "NE10_macros.h" #include "NE10_init.h" #include "NE10_math.h" #include "NE10_dsp.h" diff --git a/inc/NE10_imgproc.h b/inc/NE10_imgproc.h index f080875..527a6ce 100644 --- a/inc/NE10_imgproc.h +++ b/inc/NE10_imgproc.h @@ -46,55 +46,57 @@ extern "C" { /* image resize functions*/ /* function pointers*/ - extern void (*ne10_vresize) (const ne10_int32_t** src, - ne10_uint8_t* dst, - const ne10_int16_t* beta, - ne10_int32_t width); - extern void (*ne10_hresize_4channels) (const ne10_uint8_t** src, - ne10_int32_t** dst, - ne10_int32_t count, - const ne10_int32_t* xofs, - const ne10_int16_t* alpha, - ne10_int32_t swidth, - ne10_int32_t dwidth, - ne10_int32_t cn, - ne10_int32_t xmin, - ne10_int32_t xmax); + extern void (*ne10_img_resize_bilinear_rgba) (ne10_uint8_t* dst, + ne10_uint32_t dst_width, + ne10_uint32_t dst_height, + ne10_uint8_t* src, + ne10_uint32_t src_width, + ne10_uint32_t src_height, + ne10_uint32_t src_stride); + /* C version*/ + extern void ne10_img_resize_bilinear_rgba_c (ne10_uint8_t* dst, + ne10_uint32_t dst_width, + ne10_uint32_t dst_height, + ne10_uint8_t* src, + ne10_uint32_t src_width, + ne10_uint32_t src_height, + ne10_uint32_t src_stride); /* NEON version*/ - extern void ne10_vresize_neon (const ne10_int32_t** src, - ne10_uint8_t* dst, - const ne10_int16_t* beta, - ne10_int32_t width) - asm ("ne10_vresize_neon"); - extern void ne10_hresize_4channels_neon (const ne10_uint8_t** src, - ne10_int32_t** dst, - ne10_int32_t count, - const ne10_int32_t* xofs, - const ne10_int16_t* alpha, - ne10_int32_t swidth, - ne10_int32_t dwidth, - ne10_int32_t cn, - ne10_int32_t xmin, - ne10_int32_t xmax) asm ("ne10_hresize_4channels_neon"); + extern void ne10_img_resize_bilinear_rgba_neon (ne10_uint8_t* dst, + ne10_uint32_t dst_width, + ne10_uint32_t dst_height, + ne10_uint8_t* src, + ne10_uint32_t src_width, + ne10_uint32_t src_height, + ne10_uint32_t src_stride) + asm ("ne10_img_resize_bilinear_rgba_neon"); /* image rotate functions*/ /* function pointers*/ - extern void (*ne10_img_rotate_get_quad_rangle_subpix) (ne10_uint8_t* dst, - const ne10_uint8_t* src, - ne10_int32_t swidth, - ne10_int32_t sheight, - ne10_int32_t dwidth, - ne10_int32_t dheight, - ne10_float32_t* matrix); + extern void (*ne10_img_rotate_rgba) (ne10_uint8_t* dst, + ne10_uint32_t* dst_width, + ne10_uint32_t* dst_height, + ne10_uint8_t* src, + ne10_uint32_t src_width, + ne10_uint32_t src_height, + ne10_int32_t angle); + /* C version*/ + extern void ne10_img_rotate_rgba_c (ne10_uint8_t* dst, + ne10_uint32_t* dst_width, + ne10_uint32_t* dst_height, + ne10_uint8_t* src, + ne10_uint32_t src_width, + ne10_uint32_t src_height, + ne10_int32_t angle); /* NEON version*/ - extern void ne10_img_rotate_get_quad_rangle_subpix_neon (ne10_uint8_t* dst, - const ne10_uint8_t* src, - ne10_int32_t swidth, - ne10_int32_t sheight, - ne10_int32_t dwidth, - ne10_int32_t dheight, - ne10_float32_t* matrix) - asm ("ne10_img_rotate_get_quad_rangle_subpix_neon"); + extern void ne10_img_rotate_rgba_neon (ne10_uint8_t* dst, + ne10_uint32_t* dst_width, + ne10_uint32_t* dst_height, + ne10_uint8_t* src, + ne10_uint32_t src_width, + ne10_uint32_t src_height, + ne10_int32_t angle) + asm ("ne10_img_rotate_rgba_neon"); #ifdef __cplusplus } diff --git a/inc/NE10_macros.h b/inc/NE10_macros.h new file mode 100644 index 0000000..4232555 --- /dev/null +++ b/inc/NE10_macros.h @@ -0,0 +1,57 @@ +/* + * Copyright 2013 ARM Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of ARM Limited nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NE10 Library : inc/NE10_macros.h + */ + +/** NE10 defines a number of macros for use in its function signatures. + * The macros are defined within this header file. + */ + +#ifndef NE10_MACROS_H +#define NE10_MACROS_H + +///////////////////////////////////////////////////////// +// some external macro definitions to be exposed to the users +///////////////////////////////////////////////////////// + +#define NE10_MALLOC malloc +#define NE10_FREE free + +#define NE10_MIN(a,b) ((a)>(b)?(b):(a)) +#define NE10_MAX(a,b) ((a)<(b)?(b):(a)) + +///////////////////////////////////////////////////////// +// macro definitions for float to fixed point +///////////////////////////////////////////////////////// +#define NE10_F2I16_MAX 32767 +#define NE10_F2I16_SHIFT 15 +#define NE10_F2I16_OP(x) (ne10_int16_t)((x)*NE10_F2I16_MAX + 0.5f) +#define NE10_F2I16_SROUND(x) (((x)+(1<<(NE10_F2I16_SHIFT-1)))>>NE10_F2I16_SHIFT) + +#endif diff --git a/inc/NE10_types.h b/inc/NE10_types.h index ce49005..e93b6ac 100644 --- a/inc/NE10_types.h +++ b/inc/NE10_types.h @@ -45,6 +45,7 @@ ///////////////////////////////////////////////////////// #define NE10_OK 0 #define NE10_ERR -1 +#define NE10_PI (3.1415926535897932384626433832795) ///////////////////////////////////////////////////////// // some external definitions to be exposed to the users diff --git a/modules/CMakeLists.txt b/modules/CMakeLists.txt index ed7885c..005088d 100644 --- a/modules/CMakeLists.txt +++ b/modules/CMakeLists.txt @@ -224,6 +224,8 @@ if(NE10_ENABLE_IMGPROC) # Add image processing C files. set(NE10_IMGPROC_C_SRCS ${PROJECT_SOURCE_DIR}/common/NE10_mask_table.c + ${PROJECT_SOURCE_DIR}/modules/imgproc/NE10_resize.c + ${PROJECT_SOURCE_DIR}/modules/imgproc/NE10_rotate.c ) # Add image processing NEON files. diff --git a/modules/imgproc/NE10_init_imgproc.c b/modules/imgproc/NE10_init_imgproc.c index 4a976b8..370f29d 100644 --- a/modules/imgproc/NE10_init_imgproc.c +++ b/modules/imgproc/NE10_init_imgproc.c @@ -33,37 +33,29 @@ ne10_result_t ne10_init_imgproc (ne10_int32_t is_NEON_available) { if (NE10_OK == is_NEON_available) { - ne10_vresize = ne10_vresize_neon; - ne10_hresize_4channels = ne10_hresize_4channels_neon; - ne10_img_rotate_get_quad_rangle_subpix = ne10_img_rotate_get_quad_rangle_subpix_neon; + ne10_img_resize_bilinear_rgba = ne10_img_resize_bilinear_rgba_neon; + ne10_img_rotate_rgba = ne10_img_rotate_rgba_neon; } else { - ; + ne10_img_resize_bilinear_rgba = ne10_img_resize_bilinear_rgba_c; + ne10_img_rotate_rgba = ne10_img_rotate_rgba_c; } return NE10_OK; } // These are actual definitions of our function pointers that are declared in inc/NE10_imgproc.h -void (*ne10_vresize) (const ne10_int32_t** src, - ne10_uint8_t* dst, - const ne10_int16_t* beta, - ne10_int32_t width); -void (*ne10_hresize_4channels) (const ne10_uint8_t** src, - ne10_int32_t** dst, - ne10_int32_t count, - const ne10_int32_t* xofs, - const ne10_int16_t* alpha, - ne10_int32_t swidth, - ne10_int32_t dwidth, - ne10_int32_t cn, - ne10_int32_t xmin, - ne10_int32_t xmax); - -void (*ne10_img_rotate_get_quad_rangle_subpix) (ne10_uint8_t* dst, - const ne10_uint8_t* src, - ne10_int32_t swidth, - ne10_int32_t sheight, - ne10_int32_t dwidth, - ne10_int32_t dheight, - ne10_float32_t* matrix); +void (*ne10_img_resize_bilinear_rgba) (ne10_uint8_t* dst, + ne10_uint32_t dst_width, + ne10_uint32_t dst_height, + ne10_uint8_t* src, + ne10_uint32_t src_width, + ne10_uint32_t src_height, + ne10_uint32_t src_stride); +void (*ne10_img_rotate_rgba) (ne10_uint8_t* dst, + ne10_uint32_t* dst_width, + ne10_uint32_t* dst_height, + ne10_uint8_t* src, + ne10_uint32_t src_width, + ne10_uint32_t src_height, + ne10_int32_t angle); diff --git a/modules/imgproc/NE10_resize.c b/modules/imgproc/NE10_resize.c new file mode 100644 index 0000000..62a7c46 --- /dev/null +++ b/modules/imgproc/NE10_resize.c @@ -0,0 +1,549 @@ +/* + * Copyright 2013 ARM Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of ARM Limited nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* license of OpenCV */ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +/* + * NE10 Library : imgproc/NE10_resize.c + */ + +#include "NE10.h" + +/** + * @ingroup groupIMGPROCs + */ +/** + * @defgroup IMG_RESIZE Image Resize + * + * \par + * Image resize is a generic functionality in image processing. In Ne10 library, we use the bilinear interpolation algorithm (http://en.wikipedia.org/wiki/Bilinear_interpolation) to implement image resize. For C implementation, we take the cvResize function from OpenCV (http://opencv.org/) for reference. + * \par + * This set of functions implements image resize with bilinear interpolation algorithm + * for 8-bit data types. The functions operate on out-of-place buffer which use different buffer for input and output. + * + */ +#define INTER_RESIZE_COEF_BITS 11 +#define INTER_RESIZE_COEF_SCALE (1 << 11) +#define NE10_MAX_ESIZE 16 + +static inline ne10_uint32_t ne10_align_size (ne10_int32_t sz, ne10_int32_t n) +{ + return (sz + n - 1) & -n; +} + +static inline ne10_int32_t ne10_floor (ne10_float32_t a) +{ + return ( ( (a) >= 0) ? ( (ne10_int32_t) a) : ( (ne10_int32_t) a - 1)); +} + +static inline ne10_int32_t ne10_clip (ne10_int32_t x, ne10_int32_t a, ne10_int32_t b) +{ + return (x >= a ? (x < b ? x : b - 1) : a); +} + +static inline ne10_uint8_t ne10_cast_op (ne10_int32_t val) +{ + ne10_int32_t bits = INTER_RESIZE_COEF_BITS * 2; + ne10_int32_t SHIFT = bits; + ne10_int32_t DELTA = 1 << (bits - 1) ; + ne10_int32_t temp = NE10_MIN (255, NE10_MAX (0, (val + DELTA) >> SHIFT)); + return (ne10_uint8_t) (temp); +}; + +static void ne10_img_hresize_linear_c (const ne10_uint8_t** src, + ne10_int32_t** dst, + ne10_int32_t count, + const ne10_int32_t* xofs, + const ne10_int16_t* alpha, + ne10_int32_t swidth, + ne10_int32_t dwidth, + ne10_int32_t cn, + ne10_int32_t xmin, + ne10_int32_t xmax) +{ + ne10_int32_t dx, k; + + ne10_int32_t dx0 = 0; + + //for (k = 0; k <= count - 2; k++) + if (count == 2) + { + k = 0; + const ne10_uint8_t *S0 = src[k], *S1 = src[k + 1]; + ne10_int32_t *D0 = dst[k], *D1 = dst[k + 1]; + for (dx = dx0; dx < xmax; dx++) + { + ne10_int32_t sx = xofs[dx]; + ne10_int32_t a0 = alpha[dx * 2], a1 = alpha[dx * 2 + 1]; + ne10_int32_t t0 = S0[sx] * a0 + S0[sx + cn] * a1; + ne10_int32_t t1 = S1[sx] * a0 + S1[sx + cn] * a1; + D0[dx] = t0; + D1[dx] = t1; + } + + for (; dx < dwidth; dx++) + { + ne10_int32_t sx = xofs[dx]; + D0[dx] = (ne10_int32_t) S0[sx] * INTER_RESIZE_COEF_SCALE; + D1[dx] = (ne10_int32_t) S1[sx] * INTER_RESIZE_COEF_SCALE; + } + } + + //for (; k < count; k++) + if (count == 1) + { + k = 0; + const ne10_uint8_t *S = src[k]; + ne10_int32_t *D = dst[k]; + for (dx = 0; dx < xmax; dx++) + { + ne10_int32_t sx = xofs[dx]; + D[dx] = S[sx] * alpha[dx * 2] + S[sx + cn] * alpha[dx * 2 + 1]; + } + + for (; dx < dwidth; dx++) + D[dx] = (ne10_int32_t) S[xofs[dx]] * INTER_RESIZE_COEF_SCALE; + } +} + + +static void ne10_img_vresize_linear_c (const ne10_int32_t** src, ne10_uint8_t* dst, const ne10_int16_t* beta, ne10_int32_t width) +{ + ne10_int32_t b0 = beta[0], b1 = beta[1]; + const ne10_int32_t *S0 = src[0], *S1 = src[1]; + + ne10_int32_t x = 0; + for (; x <= width - 4; x += 4) + { + ne10_int32_t t0, t1; + t0 = S0[x] * b0 + S1[x] * b1; + t1 = S0[x + 1] * b0 + S1[x + 1] * b1; + dst[x] = ne10_cast_op (t0); + dst[x + 1] = ne10_cast_op (t1); + t0 = S0[x + 2] * b0 + S1[x + 2] * b1; + t1 = S0[x + 3] * b0 + S1[x + 3] * b1; + dst[x + 2] = ne10_cast_op (t0); + dst[x + 3] = ne10_cast_op (t1); + } + + for (; x < width; x++) + dst[x] = ne10_cast_op (S0[x] * b0 + S1[x] * b1); +} + +static void ne10_img_resize_generic_linear_c (ne10_uint8_t* src, + ne10_uint8_t* dst, + const ne10_int32_t* xofs, + const ne10_int16_t* _alpha, + const ne10_int32_t* yofs, + const ne10_int16_t* _beta, + ne10_int32_t xmin, + ne10_int32_t xmax, + ne10_int32_t ksize, + ne10_int32_t srcw, + ne10_int32_t srch, + ne10_int32_t srcstep, + ne10_int32_t dstw, + ne10_int32_t dsth, + ne10_int32_t channels) +{ + + const ne10_int16_t* alpha = _alpha; + const ne10_int16_t* beta = _beta; + ne10_int32_t cn = channels; + srcw *= cn; + dstw *= cn; + + ne10_int32_t bufstep = (ne10_int32_t) ne10_align_size (dstw, 16); + ne10_int32_t dststep = (ne10_int32_t) ne10_align_size (dstw, 4); + + + ne10_int32_t *buffer_ = (ne10_int32_t*) NE10_MALLOC (bufstep * ksize * sizeof (ne10_int32_t)); + + const ne10_uint8_t* srows[NE10_MAX_ESIZE]; + ne10_int32_t* rows[NE10_MAX_ESIZE]; + ne10_int32_t prev_sy[NE10_MAX_ESIZE]; + ne10_int32_t k, dy; + xmin *= cn; + xmax *= cn; + + for (k = 0; k < ksize; k++) + { + prev_sy[k] = -1; + rows[k] = (ne10_int32_t*) buffer_ + bufstep * k; + } + + // image resize is a separable operation. In case of not too strong + for (dy = 0; dy < dsth; dy++, beta += ksize) + { + ne10_int32_t sy0 = yofs[dy], k, k0 = ksize, k1 = 0, ksize2 = ksize / 2; + + for (k = 0; k < ksize; k++) + { + ne10_int32_t sy = ne10_clip (sy0 - ksize2 + 1 + k, 0, srch); + for (k1 = NE10_MAX (k1, k); k1 < ksize; k1++) + { + if (sy == prev_sy[k1]) // if the sy-th row has been computed already, reuse it. + { + if (k1 > k) + memcpy (rows[k], rows[k1], bufstep * sizeof (rows[0][0])); + break; + } + } + if (k1 == ksize) + k0 = NE10_MIN (k0, k); // remember the first row that needs to be computed + srows[k] = (const ne10_uint8_t*) (src + srcstep * sy); + prev_sy[k] = sy; + } + + if (k0 < ksize) + ne10_img_hresize_linear_c (srows + k0, rows + k0, ksize - k0, xofs, alpha, + srcw, dstw, cn, xmin, xmax); + + ne10_img_vresize_linear_c ( (const ne10_int32_t**) rows, (ne10_uint8_t*) (dst + dststep * dy), beta, dstw); + } + + NE10_FREE (buffer_); +} + +static void ne10_img_resize_cal_offset_linear (ne10_int32_t* xofs, + ne10_int16_t* ialpha, + ne10_int32_t* yofs, + ne10_int16_t* ibeta, + ne10_int32_t *xmin, + ne10_int32_t *xmax, + ne10_int32_t ksize, + ne10_int32_t ksize2, + ne10_int32_t srcw, + ne10_int32_t srch, + ne10_int32_t dstw, + ne10_int32_t dsth, + ne10_int32_t channels) +{ + ne10_float32_t inv_scale_x = (ne10_float32_t) dstw / srcw; + ne10_float32_t inv_scale_y = (ne10_float32_t) dsth / srch; + + ne10_int32_t cn = channels; + ne10_float32_t scale_x = 1. / inv_scale_x; + ne10_float32_t scale_y = 1. / inv_scale_y; + ne10_int32_t k, sx, sy, dx, dy; + + + ne10_float32_t fx, fy; + + ne10_float32_t cbuf[NE10_MAX_ESIZE]; + + for (dx = 0; dx < dstw; dx++) + { + fx = (ne10_float32_t) ( (dx + 0.5) * scale_x - 0.5); + sx = ne10_floor (fx); + fx -= sx; + + if (sx < ksize2 - 1) + { + *xmin = dx + 1; + if (sx < 0) + fx = 0, sx = 0; + } + + if (sx + ksize2 >= srcw) + { + *xmax = NE10_MIN (*xmax, dx); + if (sx >= srcw - 1) + fx = 0, sx = srcw - 1; + } + + for (k = 0, sx *= cn; k < cn; k++) + xofs[dx * cn + k] = sx + k; + + cbuf[0] = 1.f - fx; + cbuf[1] = fx; + + for (k = 0; k < ksize; k++) + ialpha[dx * cn * ksize + k] = (ne10_int16_t) (cbuf[k] * INTER_RESIZE_COEF_SCALE); + for (; k < cn * ksize; k++) + ialpha[dx * cn * ksize + k] = ialpha[dx * cn * ksize + k - ksize]; + } + + for (dy = 0; dy < dsth; dy++) + { + fy = (ne10_float32_t) ( (dy + 0.5) * scale_y - 0.5); + sy = ne10_floor (fy); + fy -= sy; + + yofs[dy] = sy; + + cbuf[0] = 1.f - fy; + cbuf[1] = fy; + + for (k = 0; k < ksize; k++) + ibeta[dy * ksize + k] = (ne10_int16_t) (cbuf[k] * INTER_RESIZE_COEF_SCALE); + + } + +} + +/** + * @addtogroup IMG_RESIZE + * @{ + */ + +/** + * @brief image resize of 8-bit data. + * @param[out] *dst point to the destination image + * @param[in] dst_width width of destination image + * @param[in] dst_height height of destination image + * @param[in] *src point to the source image + * @param[in] src_width width of source image + * @param[in] src_height height of source image + * @param[in] src_stride stride of source buffer + * @return none. + * The function implements image resize + */ +void ne10_img_resize_bilinear_rgba_c (ne10_uint8_t* dst, + ne10_uint32_t dst_width, + ne10_uint32_t dst_height, + ne10_uint8_t* src, + ne10_uint32_t src_width, + ne10_uint32_t src_height, + ne10_uint32_t src_stride) +{ + ne10_int32_t dstw = dst_width; + ne10_int32_t dsth = dst_height; + ne10_int32_t srcw = src_width; + ne10_int32_t srch = src_height; + + ne10_int32_t cn = 4; + + + ne10_int32_t xmin = 0; + ne10_int32_t xmax = dstw; + ne10_int32_t width = dstw * cn; + ne10_float32_t fx, fy; + + ne10_int32_t ksize = 0, ksize2; + ksize = 2; + ksize2 = ksize / 2; + + ne10_uint8_t *buffer_ = (ne10_uint8_t*) NE10_MALLOC ( (width + dsth) * (sizeof (ne10_int32_t) + sizeof (ne10_float32_t) * ksize)); + + ne10_int32_t* xofs = (ne10_int32_t*) buffer_; + ne10_int32_t* yofs = xofs + width; + ne10_int16_t* ialpha = (ne10_int16_t*) (yofs + dsth); + ne10_int16_t* ibeta = ialpha + width * ksize; + + ne10_img_resize_cal_offset_linear (xofs, ialpha, yofs, ibeta, &xmin, &xmax, ksize, ksize2, srcw, srch, dstw, dsth, cn); + + ne10_img_resize_generic_linear_c (src, dst, xofs, ialpha, yofs, ibeta, xmin, xmax, ksize, srcw, srch, src_stride, dstw, dsth, cn); + NE10_FREE (buffer_); +} + +extern void ne10_img_hresize_4channels_linear_neon (const ne10_uint8_t** src, + ne10_int32_t** dst, + ne10_int32_t count, + const ne10_int32_t* xofs, + const ne10_int16_t* alpha, + ne10_int32_t swidth, + ne10_int32_t dwidth, + ne10_int32_t cn, + ne10_int32_t xmin, + ne10_int32_t xmax); +extern void ne10_img_vresize_linear_neon (const ne10_int32_t** src, ne10_uint8_t* dst, const ne10_int16_t* beta, ne10_int32_t width); + +static void ne10_img_resize_generic_linear_neon (ne10_uint8_t* src, + ne10_uint8_t* dst, + const ne10_int32_t* xofs, + const ne10_int16_t* _alpha, + const ne10_int32_t* yofs, + const ne10_int16_t* _beta, + ne10_int32_t xmin, + ne10_int32_t xmax, + ne10_int32_t ksize, + ne10_int32_t srcw, + ne10_int32_t srch, + ne10_int32_t srcstep, + ne10_int32_t dstw, + ne10_int32_t dsth, + ne10_int32_t channels) +{ + + const ne10_int16_t* alpha = _alpha; + const ne10_int16_t* beta = _beta; + ne10_int32_t cn = channels; + srcw *= cn; + dstw *= cn; + + ne10_int32_t bufstep = (ne10_int32_t) ne10_align_size (dstw, 16); + ne10_int32_t dststep = (ne10_int32_t) ne10_align_size (dstw, 4); + + + ne10_int32_t *buffer_ = (ne10_int32_t*) NE10_MALLOC (bufstep * ksize * sizeof (ne10_int32_t)); + + const ne10_uint8_t* srows[NE10_MAX_ESIZE]; + ne10_int32_t* rows[NE10_MAX_ESIZE]; + ne10_int32_t prev_sy[NE10_MAX_ESIZE]; + ne10_int32_t k, dy; + xmin *= cn; + xmax *= cn; + + for (k = 0; k < ksize; k++) + { + prev_sy[k] = -1; + rows[k] = (ne10_int32_t*) buffer_ + bufstep * k; + } + + // image resize is a separable operation. In case of not too strong + for (dy = 0; dy < dsth; dy++, beta += ksize) + { + ne10_int32_t sy0 = yofs[dy], k, k0 = ksize, k1 = 0, ksize2 = ksize / 2; + + for (k = 0; k < ksize; k++) + { + ne10_int32_t sy = ne10_clip (sy0 - ksize2 + 1 + k, 0, srch); + for (k1 = NE10_MAX (k1, k); k1 < ksize; k1++) + { + if (sy == prev_sy[k1]) // if the sy-th row has been computed already, reuse it. + { + if (k1 > k) + memcpy (rows[k], rows[k1], bufstep * sizeof (rows[0][0])); + break; + } + } + if (k1 == ksize) + k0 = NE10_MIN (k0, k); // remember the first row that needs to be computed + srows[k] = (const ne10_uint8_t*) (src + srcstep * sy); + prev_sy[k] = sy; + } + + if (k0 < ksize) + { + if (cn == 4) + ne10_img_hresize_4channels_linear_neon (srows + k0, rows + k0, ksize - k0, xofs, alpha, + srcw, dstw, cn, xmin, xmax); + else + ne10_img_hresize_linear_c (srows + k0, rows + k0, ksize - k0, xofs, alpha, + srcw, dstw, cn, xmin, xmax); + } + ne10_img_vresize_linear_neon ( (const ne10_int32_t**) rows, (ne10_uint8_t*) (dst + dststep * dy), beta, dstw); + } + + NE10_FREE (buffer_); +} + +/** + * @brief image resize of 8-bit data. + * @param[out] *dst point to the destination image + * @param[in] dst_width width of destination image + * @param[in] dst_height height of destination image + * @param[in] *src point to the source image + * @param[in] src_width width of source image + * @param[in] src_height height of source image + * @param[in] src_stride stride of source buffer + * @return none. + * The function implements image resize + */ +void ne10_img_resize_bilinear_rgba_neon (ne10_uint8_t* dst, + ne10_uint32_t dst_width, + ne10_uint32_t dst_height, + ne10_uint8_t* src, + ne10_uint32_t src_width, + ne10_uint32_t src_height, + ne10_uint32_t src_stride) +{ + ne10_int32_t dstw = dst_width; + ne10_int32_t dsth = dst_height; + ne10_int32_t srcw = src_width; + ne10_int32_t srch = src_height; + + ne10_int32_t cn = 4; + + + ne10_int32_t xmin = 0; + ne10_int32_t xmax = dstw; + ne10_int32_t width = dstw * cn; + ne10_float32_t fx, fy; + + ne10_int32_t ksize = 0, ksize2; + ksize = 2; + ksize2 = ksize / 2; + + ne10_uint8_t *buffer_ = (ne10_uint8_t*) NE10_MALLOC ( (width + dsth) * (sizeof (ne10_int32_t) + sizeof (ne10_float32_t) * ksize)); + + ne10_int32_t* xofs = (ne10_int32_t*) buffer_; + ne10_int32_t* yofs = xofs + width; + ne10_int16_t* ialpha = (ne10_int16_t*) (yofs + dsth); + ne10_int16_t* ibeta = ialpha + width * ksize; + + ne10_img_resize_cal_offset_linear (xofs, ialpha, yofs, ibeta, &xmin, &xmax, ksize, ksize2, srcw, srch, dstw, dsth, cn); + + ne10_img_resize_generic_linear_neon (src, dst, xofs, ialpha, yofs, ibeta, xmin, xmax, ksize, srcw, srch, src_stride, dstw, dsth, cn); + NE10_FREE (buffer_); +} + +/** + * @} end of IMG_RESIZE group + */ diff --git a/modules/imgproc/NE10_resize.neon.s b/modules/imgproc/NE10_resize.neon.s index 255771d..18d2b28 100644 --- a/modules/imgproc/NE10_resize.neon.s +++ b/modules/imgproc/NE10_resize.neon.s @@ -44,14 +44,14 @@ */ .align 4 - .global ne10_vresize_neon + .global ne10_img_vresize_linear_neon .thumb - .extern ne10_vresize_mask_residual_table/* mask of store data */ + .extern ne10_img_vresize_linear_mask_residual_table/* mask of store data */ .thumb_func .equ BITS, 0x16 /* INTER_RESIZE_COEF_BITS*2 */ .equ DELTA, 0x200000 /* 1 << (INTER_RESIZE_COEF_BITS*2 - 1) */ -ne10_vresize_neon: +ne10_img_vresize_linear_neon: push {r4-r6,lr} /*ARM Registers*/ @@ -105,13 +105,13 @@ dDst_01234567 .dn d21 mov tmp, #255 vdup.32 qMax, tmp + vld1.s32 {qS0_0123, qS0_4567}, [pS0]! + vld1.s32 {qS1_0123, qS1_4567}, [pS1]! + subs width, width, #8 blt VResizeResidualLoop - vld1.s32 {qS0_0123, qS0_4567}, [pS0]! - vld1.s32 {qS1_0123, qS1_4567}, [pS1]! VResizeMainLoop: - vmul.s32 qTmp_0123, qS0_0123, dBeta0 vmul.s32 qTmp_4567, qS0_4567, dBeta0 vmla.s32 qTmp_0123, qS1_0123, dBeta1 @@ -139,18 +139,16 @@ VResizeMainLoop: vld1.s32 {qS1_0123, qS1_4567}, [pS1]! bge VResizeMainLoop +VResizeResidualLoop: adds width, width, #8 beq VResizeEnd sub width, width, #1 ldr pMask, =ne10_vresize_mask_residual_table - sub width, width, #1 add pMask, pMask, width, lsl #3 vld1.64 {dMask}, [pMask] vld1.64 {dDst_01234567}, [pDst] -VResizeResidualLoop: - vmul.s32 qTmp_0123, qS0_0123, dBeta0 vmul.s32 qTmp_4567, qS0_4567, dBeta0 vmla.s32 qTmp_0123, qS1_0123, dBeta1 @@ -171,7 +169,7 @@ VResizeResidualLoop: vmovn.I32 dTmp_4567, qTmp_4567 vmovn.I16 dTmp_01234567, qTmp_01234567 vbsl dMask, dTmp_01234567, dDst_01234567 - vst1.8 {dTmp_01234567}, [pDst] + vst1.8 {dMask}, [pDst] VResizeEnd: /*Return From Function*/ pop {r4-r6,pc} @@ -231,12 +229,12 @@ VResizeEnd: */ .align 4 - .global ne10_hresize_4channels_neon + .global ne10_img_hresize_4channels_linear_neon .thumb .thumb_func .equ INTER_RESIZE_COEF_SCALE, 0x800 /* 1 << INTER_RESIZE_COEF_BITS */ -ne10_hresize_4channels_neon: +ne10_img_hresize_4channels_linear_neon: push {r4-r10,lr} /*ARM Registers*/ @@ -284,9 +282,9 @@ qDst1_0123 .qn q10 subs tmp, count, #1 - beq HResize4Count1 -HResize4Count2: + beq ne10_img_hresize_4channels_linear_count_1 +ne10_img_hresize_4channels_linear_count_2: ldr pS0, [pSrc], #4 ldr pS1, [pSrc] ldr pD0, [pDst], #4 @@ -301,9 +299,6 @@ HResize4Count2: ldr xmax, [sp, #52] sub dwidth, dwidth, xmax /* calculate the residual */ - subs xmax, xmax, #4 - blt HResize4ResidualLoop2 - ldr sx, [pXofs], #16 /* for 4 channels only, xofs is changed based on channels */ add pTmp0, pS0, sx /* find the address of starting element */ add pTmp1, pS1, sx @@ -311,8 +306,10 @@ HResize4Count2: vld1.8 {dS0_01234567}, [pTmp0] vld1.8 {dS1_01234567}, [pTmp1] -HResize4MainLoop2: + subs xmax, xmax, #4 + blt ne10_img_hresize_4channels_linear_count_2_dwidth_loop +ne10_img_hresize_4channels_linear_count_2_xmax_loop: vmovl.u8 qS0_01234567, dS0_01234567 vmovl.u8 qS1_01234567, dS1_01234567 @@ -332,12 +329,12 @@ HResize4MainLoop2: vld1.8 {dS1_01234567}, [pTmp1] subs xmax, xmax, #4 - bge HResize4MainLoop2 + bge ne10_img_hresize_4channels_linear_count_2_xmax_loop cmp dwidth, #0 - beq HResize4End + beq ne10_img_hresize_4channels_linear_end -HResize4ResidualLoop2: +ne10_img_hresize_4channels_linear_count_2_dwidth_loop: vmovl.u8 qS0_01234567, dS0_01234567 vmovl.u8 qS1_01234567, dS1_01234567 @@ -355,11 +352,11 @@ HResize4ResidualLoop2: vld1.8 {dS1_01234567}, [pTmp1] subs dwidth, dwidth, #4 - bgt HResize4ResidualLoop2 + bgt ne10_img_hresize_4channels_linear_count_2_dwidth_loop - b HResize4End + b ne10_img_hresize_4channels_linear_end -HResize4Count1: +ne10_img_hresize_4channels_linear_count_1: ldr pS0, [pSrc], #4 ldr pD0, [pDst], #4 @@ -373,16 +370,15 @@ HResize4Count1: ldr xmax, [sp, #52] sub dwidth, dwidth, xmax /* calculate the residual */ - subs xmax, xmax, #4 - blt HResize4ResidualLoop1 - ldr sx, [pXofs], #16 /* for 4 channels only, xofs is changed based on channels */ add pTmp0, pS0, sx /* find the address of starting element */ vld2.16 {dAlpha_0, dAlpha_1}, [pAlpha]! /* alpha is repeated based on channels */ vld1.8 {dS0_01234567}, [pTmp0] -HResize4MainLoop1: + subs xmax, xmax, #4 + blt ne10_img_hresize_4channels_linear_count_1_dwidth_loop +ne10_img_hresize_4channels_linear_count_1_xmax_loop: vmovl.u8 qS0_01234567, dS0_01234567 vmull.u16 qDst0_0123, dS0_0123, dAlpha_0 @@ -396,11 +392,11 @@ HResize4MainLoop1: vld1.8 {dS0_01234567}, [pTmp0] subs xmax, xmax, #4 - bge HResize4MainLoop1 + bge ne10_img_hresize_4channels_linear_count_1_xmax_loop - cbz dwidth, HResize4End + cbz dwidth, ne10_img_hresize_4channels_linear_end -HResize4ResidualLoop1: +ne10_img_hresize_4channels_linear_count_1_dwidth_loop: vmovl.u8 qS0_01234567, dS0_01234567 vmull.u16 qDst0_0123, dS0_0123, dCoeff @@ -412,9 +408,9 @@ HResize4ResidualLoop1: vld1.8 {dS0_01234567}, [pTmp0] subs dwidth, dwidth, #4 - bgt HResize4ResidualLoop1 + bgt ne10_img_hresize_4channels_linear_count_1_dwidth_loop -HResize4End: +ne10_img_hresize_4channels_linear_end: /*Return From Function*/ pop {r4-r10,pc} diff --git a/modules/imgproc/NE10_rotate.c b/modules/imgproc/NE10_rotate.c new file mode 100644 index 0000000..123a04e --- /dev/null +++ b/modules/imgproc/NE10_rotate.c @@ -0,0 +1,316 @@ +/* + * Copyright 2013 ARM Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of ARM Limited nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* license of OpenCV */ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +/* + * NE10 Library : imgproc/NE10_rotate.c + */ + +//#include +#include "NE10.h" + + +/** + * @ingroup groupIMGPROCs + */ +/** + * @defgroup IMG_ROTATE Image Rotate + * + * \par + * Image rotate is a generic functionality in image processing. For C implementation, we take the cvGetQuadrangleSubPix function from OpenCV (http://opencv.org/) for reference. + * \par + * This set of functions implements image rotate with bilinear interpolation algorithm + * for 8-bit data types. + * + */ + + +void ne10_img_rotate_get_quad_rangle_subpix_rgba_c (ne10_uint8_t *dst, + ne10_uint8_t *src, + ne10_int32_t srcw, + ne10_int32_t srch, + ne10_int32_t dstw, + ne10_int32_t dsth, + ne10_float32_t *matrix) +{ + ne10_uint8_t* src_data = src; + ne10_uint8_t* dst_data = dst; + + ne10_int32_t x, y; + //ne10_float32_t dx = (dstw - 1) * 0.5; + //ne10_float32_t dy = (dsth - 1) * 0.5; + ne10_float32_t A11 = matrix[0], A12 = matrix[1], A13 = matrix[2]; + ne10_float32_t A21 = matrix[3], A22 = matrix[4], A23 = matrix[5]; + + ne10_int32_t src_step = srcw * 4; + ne10_int32_t dst_step = dstw * 4; + for (y = 0; y < dsth; y++, dst_data += dst_step) + { + ne10_float32_t xs = A12 * y + A13; + ne10_float32_t ys = A22 * y + A23; + ne10_float32_t xe = A11 * (dstw - 1) + A12 * y + A13; + ne10_float32_t ye = A21 * (dstw - 1) + A22 * y + A23; + + if ( (unsigned) ( (ne10_int32_t) (xs) - 1) < (unsigned) (srcw - 4) && + (unsigned) ( (ne10_int32_t) (ys) - 1) < (unsigned) (srch - 4) && + (unsigned) ( (ne10_int32_t) (xe) - 1) < (unsigned) (srcw - 4) && + (unsigned) ( (ne10_int32_t) (ye) - 1) < (unsigned) (srch - 4)) + { + for (x = 0; x < dstw; x++) + { + ne10_int32_t ixs = (ne10_int32_t) (xs); + ne10_int32_t iys = (ne10_int32_t) (ys); + const ne10_uint8_t *ptr = src_data + src_step * iys + ixs * 4; + //ne10_float32_t a = (xs - ixs), b = (ys - iys), a1 = (1.f - a); + ne10_int16_t a = NE10_F2I16_OP (xs - ixs); + ne10_int16_t b = NE10_F2I16_OP (ys - iys); + ne10_int16_t a1 = NE10_F2I16_OP (1.f - (xs - ixs)); + + ne10_uint8_t p0, p1; + xs += A11; + ys += A21; + + p0 = NE10_F2I16_SROUND (ptr[0] * a1 + ptr[4] * a); + p1 = NE10_F2I16_SROUND (ptr[src_step] * a1 + ptr[src_step + 4] * a); + dst_data[x * 4] = NE10_F2I16_SROUND (p0 * NE10_F2I16_MAX + b * (p1 - p0)); + + p0 = NE10_F2I16_SROUND (ptr[1] * a1 + ptr[1] * a); + p1 = NE10_F2I16_SROUND (ptr[src_step + 1] * a1 + ptr[src_step + 4 + 1] * a); + dst_data[x * 4 + 1] = NE10_F2I16_SROUND (p0 * NE10_F2I16_MAX + b * (p1 - p0)); + + p0 = NE10_F2I16_SROUND (ptr[2] * a1 + ptr[4 + 2] * a); + p1 = NE10_F2I16_SROUND (ptr[src_step + 2] * a1 + ptr[src_step + 4 + 2] * a); + dst_data[x * 4 + 2] = NE10_F2I16_SROUND (p0 * NE10_F2I16_MAX + b * (p1 - p0)); + + p0 = NE10_F2I16_SROUND (ptr[3] * a1 + ptr[4 + 3] * a); + p1 = NE10_F2I16_SROUND (ptr[src_step + 3] * a1 + ptr[src_step + 4 + 3] * a); + dst_data[x * 4 + 3] = NE10_F2I16_SROUND (p0 * NE10_F2I16_MAX + b * (p1 - p0)); + } + } + else + { + for (x = 0; x < dstw; x++) + { + ne10_int32_t ixs = (ne10_int32_t) (xs), iys = (ne10_int32_t) (ys); + //ne10_float32_t a = xs - ixs, b = ys - iys; + //ne10_float32_t a1 = 1.f - a; + ne10_int16_t a = NE10_F2I16_OP (xs - ixs); + ne10_int16_t b = NE10_F2I16_OP (ys - iys); + ne10_int16_t a1 = NE10_F2I16_OP (1.f - (xs - ixs)); + const ne10_uint8_t *ptr0, *ptr1; + xs += A11; + ys += A21; + + if ( (unsigned) iys < (unsigned) (srch - 1)) + { + ptr0 = src_data + src_step * iys; + ptr1 = ptr0 + src_step; + } + else + { + continue; + } + + if ( (unsigned) ixs < (unsigned) (srcw - 1)) + { + + ne10_uint8_t p0, p1; + + ptr0 += ixs * 4; + ptr1 += ixs * 4; + + p0 = NE10_F2I16_SROUND (ptr0[0] * a1 + ptr0[4] * a); + p1 = NE10_F2I16_SROUND (ptr1[0] * a1 + ptr1[4] * a); + dst_data[x * 4] = NE10_F2I16_SROUND (p0 * NE10_F2I16_MAX + b * (p1 - p0)); + + p0 = NE10_F2I16_SROUND (ptr0[1] * a1 + ptr0[4 + 1] * a); + p1 = NE10_F2I16_SROUND (ptr1[1] * a1 + ptr1[4 + 1] * a); + dst_data[x * 4 + 1] = NE10_F2I16_SROUND (p0 * NE10_F2I16_MAX + b * (p1 - p0)); + + p0 = NE10_F2I16_SROUND (ptr0[2] * a1 + ptr0[4 + 2] * a); + p1 = NE10_F2I16_SROUND (ptr1[2] * a1 + ptr1[4 + 2] * a); + dst_data[x * 4 + 2] = NE10_F2I16_SROUND (p0 * NE10_F2I16_MAX + b * (p1 - p0)); + + p0 = NE10_F2I16_SROUND (ptr0[3] * a1 + ptr0[4 + 3] * a); + p1 = NE10_F2I16_SROUND (ptr1[3] * a1 + ptr1[4 + 3] * a); + dst_data[x * 4 + 3] = NE10_F2I16_SROUND (p0 * NE10_F2I16_MAX + b * (p1 - p0)); + } + } + } + } +} + + +/** + * @addtogroup IMG_ROTATE + * @{ + */ +/** + * @brief image resize of 8-bit data. + * @return none. + * The function implements image resize + */ +/** + * @brief image rotate of 8-bit data. + * @param[out] *dst point to the destination image + * @param[out] *dst_width width of destination image + * @param[out] *dst_height height of destination image + * @param[in] *src point to the source image + * @param[in] src_width width of source image + * @param[in] src_height height of source image + * @param[in] angle angle of rotate + * @return none. + * The function extracts pixels from src at sub-pixel accuracy and stores them to dst + */ +void ne10_img_rotate_rgba_c (ne10_uint8_t* dst, + ne10_uint32_t* dst_width, + ne10_uint32_t* dst_height, + ne10_uint8_t* src, + ne10_uint32_t src_width, + ne10_uint32_t src_height, + ne10_int32_t angle) +{ + ne10_float32_t radian = (angle * NE10_PI / 180.0); + ne10_float32_t a = sin (radian), b = cos (radian); + ne10_int32_t srcw = src_width; + ne10_int32_t srch = src_height; + ne10_int32_t dstw = (srch * fabs (a)) + (srcw * fabs (b)) + 1; + ne10_int32_t dsth = (srch * fabs (b)) + (srcw * fabs (a)) + 1; + ne10_int32_t i; + ne10_float32_t m[6]; + ne10_float32_t dx = (dstw - 1) * 0.5; + ne10_float32_t dy = (dsth - 1) * 0.5; + + m[0] = b; + m[1] = a; + m[3] = -m[1]; + m[4] = m[0]; + m[2] = srcw * 0.5f - m[0] * dx - m[1] * dy; + m[5] = srch * 0.5f - m[3] * dx - m[4] * dy; + + *dst_width = dstw; + *dst_height = dsth; + ne10_img_rotate_get_quad_rangle_subpix_rgba_c (dst, src, srcw, srch, dstw, dsth, m); +} + + +extern void ne10_img_rotate_get_quad_rangle_subpix_rgba_neon (ne10_uint8_t *dst, + ne10_uint8_t *src, + ne10_int32_t srcw, + ne10_int32_t srch, + ne10_int32_t dstw, + ne10_int32_t dsth, + ne10_float32_t *matrix); + +/** + * @brief image rotate of 8-bit data. + * @param[out] *dst point to the destination image + * @param[out] *dst_width width of destination image + * @param[out] *dst_height height of destination image + * @param[in] *src point to the source image + * @param[in] src_width width of source image + * @param[in] src_height height of source image + * @param[in] angle angle of rotate + * @return none. + * The function extracts pixels from src at sub-pixel accuracy and stores them to dst + */ +void ne10_img_rotate_rgba_neon (ne10_uint8_t* dst, + ne10_uint32_t* dst_width, + ne10_uint32_t* dst_height, + ne10_uint8_t* src, + ne10_uint32_t src_width, + ne10_uint32_t src_height, + ne10_int32_t angle) +{ + ne10_float32_t radian = (angle * NE10_PI / 180.0); + ne10_float32_t a = sin (radian), b = cos (radian); + ne10_int32_t srcw = src_width; + ne10_int32_t srch = src_height; + ne10_int32_t dstw = (srch * fabs (a)) + (srcw * fabs (b)) + 1; + ne10_int32_t dsth = (srch * fabs (b)) + (srcw * fabs (a)) + 1; + ne10_int32_t i; + ne10_float32_t m[6]; + ne10_float32_t dx = (dstw - 1) * 0.5; + ne10_float32_t dy = (dsth - 1) * 0.5; + + m[0] = b; + m[1] = a; + m[3] = -m[1]; + m[4] = m[0]; + m[2] = srcw * 0.5f - m[0] * dx - m[1] * dy; + m[5] = srch * 0.5f - m[3] * dx - m[4] * dy; + + *dst_width = dstw; + *dst_height = dsth; + ne10_img_rotate_get_quad_rangle_subpix_rgba_neon (dst, src, srcw, srch, dstw, dsth, m); +} + +/** + * @} end of IMG_ROTATE group + */ diff --git a/modules/imgproc/NE10_rotate.neon.s b/modules/imgproc/NE10_rotate.neon.s index 69a304e..cbe6001 100644 --- a/modules/imgproc/NE10_rotate.neon.s +++ b/modules/imgproc/NE10_rotate.neon.s @@ -46,11 +46,11 @@ */ .align 4 - .global ne10_img_rotate_get_quad_rangle_subpix_neon + .global ne10_img_rotate_get_quad_rangle_subpix_rgba_neon .thumb .thumb_func -ne10_img_rotate_get_quad_rangle_subpix_neon: +ne10_img_rotate_get_quad_rangle_subpix_rgba_neon: /*ARM Registers*/ /* long-term variable */ diff --git a/modules/imgproc/test/test_main.c b/modules/imgproc/test/test_main.c new file mode 100644 index 0000000..cef7790 --- /dev/null +++ b/modules/imgproc/test/test_main.c @@ -0,0 +1,59 @@ +/* + * Copyright 2013 ARM Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of ARM Limited nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NE10 Library : test_main.c + */ + +#include "seatest.h" + +void test_fixture_resize (void); +void test_fixture_rotate (void); + +void all_tests (void) +{ + test_fixture_resize(); + test_fixture_rotate(); +} + + +void my_suite_setup (void) +{ + //printf("I'm done before every single test in the suite\r\n"); +} + +void my_suite_teardown (void) +{ + //printf("I'm done after every single test in the suite\r\n"); +} + +int main (ne10_int32_t argc, char** argv) +{ + suite_setup (my_suite_setup); + suite_teardown (my_suite_teardown); + return run_tests (all_tests); +} diff --git a/modules/imgproc/test/test_suite_resize.c b/modules/imgproc/test/test_suite_resize.c new file mode 100644 index 0000000..fc4d655 --- /dev/null +++ b/modules/imgproc/test/test_suite_resize.c @@ -0,0 +1,207 @@ +/* + * Copyright 2013 ARM Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of ARM Limited nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NE10 Library : test_suite_resize.c + */ + +#include +#include +#include +#include + +#include "NE10_imgproc.h" +#include "seatest.h" +#include "unit_test_common.h" + +/* ---------------------------------------------------------------------- +** Global defines +** ------------------------------------------------------------------- */ +#define MEM_SIZE 256//1024 +#define TEST_COUNT 5000 + + +/* ---------------------------------------------------------------------- +** Defines each of the tests performed +** ------------------------------------------------------------------- */ + + +//input and output +static ne10_uint8_t * in_c = NULL; +static ne10_uint8_t * in_neon = NULL; + +static ne10_uint8_t * out_c = NULL; +static ne10_uint8_t * out_neon = NULL; + +static ne10_float32_t snr = 0.0f; + +void test_resize_conformance_case() +{ + ne10_int32_t srcw; + ne10_int32_t srch; + ne10_int32_t dstw; + ne10_int32_t dsth; + ne10_int32_t i; + ne10_int32_t w, h; + ne10_int32_t channels = 4; + ne10_int32_t pic_size = MEM_SIZE * MEM_SIZE * channels * sizeof (ne10_uint8_t); + ne10_float32_t PSNR = 0.0f; + + /* init input memory */ + in_c = NE10_MALLOC (pic_size); + in_neon = NE10_MALLOC (pic_size); + + /* init dst memory */ + out_c = NE10_MALLOC (pic_size); + out_neon = NE10_MALLOC (pic_size); + + for (i = 0; i < pic_size; i++) + { + in_c[i] = in_neon[i] = (rand() & 0xff); + } + + for (h = 1; h < MEM_SIZE; h++) + { + for (w = 1; w < MEM_SIZE; w++) + { + srcw = h; + srch = h; + dstw = w; + dsth = w; + + printf ("srcw X srch = %d X %d \n", srcw, srch); + printf ("dstw X dsth = %d X %d \n", dstw, dsth); + + ne10_img_resize_bilinear_rgba_c (out_c, dstw, dsth, in_c, srcw, srch, srcw); + ne10_img_resize_bilinear_rgba_neon (out_neon, dstw, dsth, in_neon, srcw, srch, srcw); + + PSNR = CAL_PSNR_UINT8 (out_c, out_neon, dstw * dsth * channels); + assert_false ( (PSNR < PSNR_THRESHOLD)); + } + } + NE10_FREE (in_c); + NE10_FREE (in_neon); + NE10_FREE (out_c); + NE10_FREE (out_neon); +} + +void test_resize_performance_case() +{ + ne10_int32_t srcw; + ne10_int32_t srch; + ne10_int32_t dstw; + ne10_int32_t dsth; + ne10_int32_t i; + ne10_int32_t w, h; + ne10_int32_t channels = 4; + ne10_int32_t pic_size = MEM_SIZE * MEM_SIZE * channels * sizeof (ne10_uint8_t); + ne10_int64_t time_c = 0; + ne10_int64_t time_neon = 0; + + /* init input memory */ + in_c = NE10_MALLOC (pic_size); + in_neon = NE10_MALLOC (pic_size); + + /* init dst memory */ + out_c = NE10_MALLOC (pic_size); + out_neon = NE10_MALLOC (pic_size); + + for (i = 0; i < pic_size; i++) + { + in_c[i] = in_neon[i] = (rand() & 0xff); + } + + for (h = 16; h < MEM_SIZE; h += 4) + { + for (w = 16; w < MEM_SIZE; w += 4) + { + srcw = h; + srch = h; + dstw = w; + dsth = w; + + printf ("srcw X srch = %d X %d \n", srcw, srch); + printf ("dstw X dsth = %d X %d \n", dstw, dsth); + + GET_TIME + ( + time_c, + { + for (i = 0; i < TEST_COUNT; i++) + ne10_img_resize_bilinear_rgba_c (out_c, dstw, dsth, in_c, srcw, srch, srcw); + } + ); + + GET_TIME + ( + time_neon, + { + for (i = 0; i < TEST_COUNT; i++) + ne10_img_resize_bilinear_rgba_neon (out_neon, dstw, dsth, in_neon, srcw, srch, srcw); + } + ); + //printf ("time c %lld \n", time_c); + //printf ("time neon %lld \n", time_neon); + ne10_log (__FUNCTION__, "IMAGERESIZE%20d%20lld%20lld%19.2f%%%18.2f:1\n", (h*MEM_SIZE+w), time_c, time_neon, 0, 0); + + } + } + NE10_FREE (in_c); + NE10_FREE (in_neon); + NE10_FREE (out_c); + NE10_FREE (out_neon); +} + +void test_resize() +{ +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + test_resize_conformance_case(); +#endif + +#if defined PERFORMANCE_TEST + test_resize_performance_case(); +#endif +} + +static void my_test_setup (void) +{ + ne10_log_buffer_ptr = ne10_log_buffer; +} + +void test_fixture_resize (void) +{ + test_fixture_start(); // starts a fixture + + fixture_setup (my_test_setup); + + run_test (test_resize); // run tests + + test_fixture_end(); // ends a fixture +} + + + diff --git a/modules/imgproc/test/test_suite_rotate.c b/modules/imgproc/test/test_suite_rotate.c new file mode 100644 index 0000000..9e6f984 --- /dev/null +++ b/modules/imgproc/test/test_suite_rotate.c @@ -0,0 +1,203 @@ +/* + * Copyright 2013 ARM Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of ARM Limited nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NE10 Library : test_suite_rotate.c + */ + +#include +#include +#include +#include + +#include "NE10_imgproc.h" +#include "seatest.h" +#include "unit_test_common.h" + +/* ---------------------------------------------------------------------- +** Global defines +** ------------------------------------------------------------------- */ + +#define SRC_HEIGHT 512 +#define SRC_WIDTH 512 +#define DST_HEIGHT 734 //sqrt(512*512 + 512*512) + 10 +#define DST_WIDTH 734 //sqrt(512*512 + 512*512) + 10 +#define TEST_COUNT 5000 + + +/* ---------------------------------------------------------------------- +** Defines each of the tests performed +** ------------------------------------------------------------------- */ + + +//input and output +static ne10_uint8_t * in_c = NULL; +static ne10_uint8_t * in_neon = NULL; + +static ne10_uint8_t * out_c = NULL; +static ne10_uint8_t * out_neon = NULL; + +static ne10_float32_t psnr = 0.0f; + +void test_rotate_conformance_case() +{ + ne10_int32_t i; + ne10_int32_t channels = 4; + ne10_int32_t in_size = SRC_HEIGHT * SRC_WIDTH * channels; + ne10_int32_t out_size = DST_HEIGHT * DST_WIDTH * channels; + ne10_float32_t PSNR = 0.0f; + ne10_int32_t srcw = SRC_WIDTH; + ne10_int32_t srch = SRC_HEIGHT; + ne10_int32_t dstw_c, dsth_c; + ne10_int32_t dstw_neon, dsth_neon; + ne10_int32_t angle; + + /* init input memory */ + in_c = NE10_MALLOC (in_size * sizeof (ne10_uint8_t)); + in_neon = NE10_MALLOC (in_size * sizeof (ne10_uint8_t)); + + /* init dst memory */ + out_c = NE10_MALLOC (out_size * sizeof (ne10_uint8_t)); + out_neon = NE10_MALLOC (out_size * sizeof (ne10_uint8_t)); + + for (i = 0; i < in_size; i++) + { + in_c[i] = in_neon[i] = (rand() & 0xff); + } + + for (angle = -360; angle <= 360; angle += 30) + { + printf ("rotate angle %d \n", angle); + + memset (out_c, 0, out_size); + ne10_img_rotate_rgba_c (out_c, &dstw_c, &dsth_c, in_c, srcw, srch, angle); + + memset (out_neon, 0, out_size); + ne10_img_rotate_rgba_neon (out_neon, &dstw_neon, &dsth_neon, in_neon, srcw, srch, angle); + + PSNR = CAL_PSNR_UINT8 (out_c, out_neon, dstw_c * dsth_c * 4); + assert_false ( (PSNR < PSNR_THRESHOLD)); + //printf ("PSNR %f \n", PSNR); + } + NE10_FREE (in_c); + NE10_FREE (in_neon); + NE10_FREE (out_c); + NE10_FREE (out_neon); +} + +void test_rotate_performance_case() +{ + ne10_int32_t i; + ne10_int32_t channels = 4; + ne10_int32_t in_size = SRC_HEIGHT * SRC_WIDTH * channels; + ne10_int32_t out_size = DST_HEIGHT * DST_WIDTH * channels; + ne10_int32_t srcw = SRC_WIDTH; + ne10_int32_t srch = SRC_HEIGHT; + ne10_int32_t dstw_c, dsth_c; + ne10_int32_t dstw_neon, dsth_neon; + ne10_int32_t angle; + ne10_int64_t time_c = 0; + ne10_int64_t time_neon = 0; + + /* init input memory */ + in_c = NE10_MALLOC (in_size * sizeof (ne10_uint8_t)); + in_neon = NE10_MALLOC (in_size * sizeof (ne10_uint8_t)); + + /* init dst memory */ + out_c = NE10_MALLOC (out_size * sizeof (ne10_uint8_t)); + out_neon = NE10_MALLOC (out_size * sizeof (ne10_uint8_t)); + + for (i = 0; i < in_size; i++) + { + in_c[i] = in_neon[i] = (rand() & 0xff); + } + + //for (angle = -360; angle <= 360; angle += 5) + for (angle = 45; angle <= 45; angle += 5) + { + printf ("rotate angle %d \n", angle); + + memset (out_c, 0, out_size); + GET_TIME + ( + time_c, + { + for (i = 0; i < TEST_COUNT; i++) + ne10_img_rotate_rgba_c (out_c, &dstw_c, &dsth_c, in_c, srcw, srch, angle); + } + ); + + memset (out_neon, 0, out_size); + GET_TIME + ( + time_neon, + { + for (i = 0; i < TEST_COUNT; i++) + ne10_img_rotate_rgba_neon (out_neon, &dstw_neon, &dsth_neon, in_neon, srcw, srch, angle); + } + ); + + //printf ("time c %lld \n", time_c); + //printf ("time neon %lld \n", time_neon); + ne10_log (__FUNCTION__, "IMAGEROTATE%20d%20lld%20lld%19.2f%%%18.2f:1\n", angle, time_c, time_neon, 0, 0); + } + + NE10_FREE (in_c); + NE10_FREE (in_neon); + NE10_FREE (out_c); + NE10_FREE (out_neon); +} + +void test_rotate() +{ +#if defined (SMOKE_TEST)||(REGRESSION_TEST) + test_rotate_conformance_case(); +#endif + +#if defined PERFORMANCE_TEST + test_rotate_performance_case(); +#endif +} + +static void my_test_setup (void) +{ + ne10_log_buffer_ptr = ne10_log_buffer; +} + +void test_fixture_rotate (void) +{ + test_fixture_start(); // starts a fixture + + fixture_setup (my_test_setup); + + run_test (test_rotate); // run tests + + test_fixture_end(); // ends a fixture +} + + + diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 9de1805..ffccb34 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -140,3 +140,45 @@ if(NE10_ENABLE_DSP) endif() endif() + +if(NE10_ENABLE_IMGPROC) + # Define imgproc test files. + set(NE10_TEST_IMGPROC_SRCS + ${PROJECT_SOURCE_DIR}/modules/imgproc/test/test_main.c + ${PROJECT_SOURCE_DIR}/modules/imgproc/test/test_suite_resize.c + ${PROJECT_SOURCE_DIR}/modules/imgproc/test/test_suite_rotate.c + ) + + if(NE10_BUILD_STATIC) + add_executable(NE10_imgproc_unit_test_static ${NE10_TEST_IMGPROC_SRCS} ${NE10_TEST_COMMON_SRCS}) + if(ANDROID_PLATFORM OR IOS_PLATFORM) + target_link_libraries ( + NE10_imgproc_unit_test_static + NE10 + m + ) + elseif(GNULINUX_PLATFORM) + target_link_libraries ( + NE10_imgproc_unit_test_static + NE10 + m + rt + ) + endif() + + if(NE10_SMOKE_TEST) + set_target_properties(NE10_imgproc_unit_test_static PROPERTIES + OUTPUT_NAME "NE10_imgproc_unit_test_smoke" + ) + elseif (NE10_REGRESSION_TEST) + set_target_properties(NE10_imgproc_unit_test_static PROPERTIES + OUTPUT_NAME "NE10_imgproc_unit_test_regression" + ) + elseif (NE10_PERFORMANCE_TEST) + set_target_properties(NE10_imgproc_unit_test_static PROPERTIES + OUTPUT_NAME "NE10_imgproc_unit_test_performance" + ) + endif() + endif() +endif() + diff --git a/test/include/unit_test_common.h b/test/include/unit_test_common.h index 98e78e5..56be85e 100644 --- a/test/include/unit_test_common.h +++ b/test/include/unit_test_common.h @@ -83,6 +83,7 @@ #define ERROR_MARGIN_SMALL 0x0A #define ERROR_MARGIN_LARGE 0xFF #define SNR_THRESHOLD 50.0f +#define PSNR_THRESHOLD 30.0f // What's the acceptable number of warnings in a test #define ACCEPTABLE_WARNS 12 @@ -131,6 +132,7 @@ extern int EQUALS_FLOAT( ne10_float32_t fa, ne10_float32_t fb , ne10_uint32_t er extern int GUARD_ARRAY( ne10_float32_t* array, ne10_uint32_t array_length ); extern int CHECK_ARRAY_GUARD( ne10_float32_t* array, ne10_uint32_t array_length ); extern ne10_float32_t CAL_SNR_FLOAT32(ne10_float32_t *pRef, ne10_float32_t *pTest, ne10_uint32_t buffSize); +extern ne10_float32_t CAL_PSNR_UINT8 (ne10_uint8_t *pRef, ne10_uint8_t *pTest, ne10_uint32_t buffSize); extern char ne10_log_buffer[]; extern char *ne10_log_buffer_ptr; diff --git a/test/src/unit_test_common.c b/test/src/unit_test_common.c index c82da1c..37a1808 100644 --- a/test/src/unit_test_common.c +++ b/test/src/unit_test_common.c @@ -200,6 +200,31 @@ ne10_float32_t CAL_SNR_FLOAT32 (ne10_float32_t *pRef, ne10_float32_t *pTest, ne1 } +/** + * @brief Caluclation of PSNR + * @param ne10_uint8_t* Pointer to the reference buffer + * @param ne10_uint8_t* Pointer to the test buffer + * @param ne10_uint32_t total number of samples + * @return ne10_float32_t PSNR + * The function Caluclates peak signal to noise ratio for the reference output + * and test output + */ + +ne10_float32_t CAL_PSNR_UINT8 (ne10_uint8_t *pRef, ne10_uint8_t *pTest, ne10_uint32_t buffSize) +{ + ne10_float64_t mse = 0.0, max = 255.0; + ne10_uint32_t i; + ne10_float32_t PSNR; + + for (i = 0; i < buffSize; i++) + { + mse += (pRef[i] - pTest[i]) * (pRef[i] - pTest[i]); + } + mse /= buffSize; + PSNR = 10 * log10 (max*max / mse); + return (PSNR); + +} char ne10_log_buffer[1000]; char *ne10_log_buffer_ptr; -- 2.7.4