From f7fc4bf1a2a1ac141a6a7262f9075ab802422831 Mon Sep 17 00:00:00 2001
From: Yang Zhang <yang.zhang@arm.com>
Date: Mon, 2 Sep 2013 18:06:45 +0800
Subject: [PATCH] Make the following changes  - Add C implementations, doc and
 test cases for image resize/rotate  - fix the bug in NEON version of image
 resize  - add a header file for external macro definitions

---
 common/NE10_mask_table.c                 |   2 +-
 common/NE10_mask_table.h                 |   4 +-
 inc/NE10.h                               |  10 +
 inc/NE10_imgproc.h                       |  90 ++---
 inc/NE10_macros.h                        |  57 ++++
 inc/NE10_types.h                         |   1 +
 modules/CMakeLists.txt                   |   2 +
 modules/imgproc/NE10_init_imgproc.c      |  44 +--
 modules/imgproc/NE10_resize.c            | 549 +++++++++++++++++++++++++++++++
 modules/imgproc/NE10_resize.neon.s       |  62 ++--
 modules/imgproc/NE10_rotate.c            | 316 ++++++++++++++++++
 modules/imgproc/NE10_rotate.neon.s       |   4 +-
 modules/imgproc/test/test_main.c         |  59 ++++
 modules/imgproc/test/test_suite_resize.c | 207 ++++++++++++
 modules/imgproc/test/test_suite_rotate.c | 203 ++++++++++++
 test/CMakeLists.txt                      |  42 +++
 test/include/unit_test_common.h          |   2 +
 test/src/unit_test_common.c              |  25 ++
 18 files changed, 1571 insertions(+), 108 deletions(-)
 create mode 100644 inc/NE10_macros.h
 create mode 100644 modules/imgproc/NE10_resize.c
 create mode 100644 modules/imgproc/NE10_rotate.c
 create mode 100644 modules/imgproc/test/test_main.c
 create mode 100644 modules/imgproc/test/test_suite_resize.c
 create mode 100644 modules/imgproc/test/test_suite_rotate.c

diff --git a/common/NE10_mask_table.c b/common/NE10_mask_table.c
index 0db75a8..d03d6bd 100644
--- a/common/NE10_mask_table.c
+++ b/common/NE10_mask_table.c
@@ -68,7 +68,7 @@ const ne10_uint32_t ne10_divLookUpTable[DIV_LOOKUP_TABLE_SIZE]=
     272,271,270,269,267,266,265,264,263,262,261,260,259,258,257
     };
 
-const ne10_uint64_t ne10_vresize_mask_residual_table[VRESIZE_MASK_TABLE_SIZE] =
+const ne10_uint64_t ne10_img_vresize_linear_mask_residual_table[NE10_VRESIZE_LINEAR_MASK_TABLE_SIZE] =
 {
     0x00000000000000FF, 0x000000000000FFFF,
     0x0000000000FFFFFF, 0x00000000FFFFFFFF,
diff --git a/common/NE10_mask_table.h b/common/NE10_mask_table.h
index 6381231..1b9dbdf 100644
--- a/common/NE10_mask_table.h
+++ b/common/NE10_mask_table.h
@@ -44,8 +44,8 @@ extern const ne10_uint32_t ne10_divLookUpTable[DIV_LOOKUP_TABLE_SIZE] \
 asm ("ne10_divLookUpTable");
 
 /* mask table for imgproc module */
-#define VRESIZE_MASK_TABLE_SIZE    7
-extern const ne10_uint64_t ne10_vresize_mask_residual_table[VRESIZE_MASK_TABLE_SIZE] \
+#define NE10_VRESIZE_LINEAR_MASK_TABLE_SIZE    7
+extern const ne10_uint64_t ne10_img_vresize_linear_mask_residual_table[NE10_VRESIZE_LINEAR_MASK_TABLE_SIZE] \
 asm ("ne10_vresize_mask_residual_table");
 
 #endif
diff --git a/inc/NE10.h b/inc/NE10.h
index 148bc86..02ebace 100644
--- a/inc/NE10.h
+++ b/inc/NE10.h
@@ -64,6 +64,10 @@
    * âÂ Â  âÂ Â  âââ @link groupDSPs dsp module@endlink that provides a set of signal processing functions, such as complex/real FFT/IFFT, FIR and IIR
    * âÂ Â  âÂ Â  âââ test
    * âÂ Â  âÂ Â      âââ  directory for test files
+   * âÂ Â  âââ imgproc
+   * âÂ Â  âÂ Â  âââ @link groupIMGPROCs imgproc module@endlink that provides a set of image processing functions, such as image resize, image rotate
+   * âÂ Â  âÂ Â  âââ test
+   * âÂ Â  âÂ Â      âââ  directory for test files
    * âÂ Â  âââ math
    * âÂ Â  âÂ Â  âââ @link groupMaths math module@endlink that provides a set of vector/matrix algebra functions
    * âÂ Â  âÂ Â  âââ test
@@ -148,7 +152,13 @@
 extern "C" {
 #endif
 
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
 #include "NE10_types.h"
+#include "NE10_macros.h"
 #include "NE10_init.h"
 #include "NE10_math.h"
 #include "NE10_dsp.h"
diff --git a/inc/NE10_imgproc.h b/inc/NE10_imgproc.h
index f080875..527a6ce 100644
--- a/inc/NE10_imgproc.h
+++ b/inc/NE10_imgproc.h
@@ -46,55 +46,57 @@ extern "C" {
     /* image resize functions*/
 
     /* function pointers*/
-    extern void (*ne10_vresize) (const ne10_int32_t** src,
-                                 ne10_uint8_t* dst,
-                                 const ne10_int16_t* beta,
-                                 ne10_int32_t width);
-    extern void (*ne10_hresize_4channels) (const ne10_uint8_t** src,
-                                           ne10_int32_t** dst,
-                                           ne10_int32_t count,
-                                           const ne10_int32_t* xofs,
-                                           const ne10_int16_t* alpha,
-                                           ne10_int32_t swidth,
-                                           ne10_int32_t dwidth,
-                                           ne10_int32_t cn,
-                                           ne10_int32_t xmin,
-                                           ne10_int32_t xmax);
+    extern void (*ne10_img_resize_bilinear_rgba) (ne10_uint8_t* dst,
+            ne10_uint32_t dst_width,
+            ne10_uint32_t dst_height,
+            ne10_uint8_t* src,
+            ne10_uint32_t src_width,
+            ne10_uint32_t src_height,
+            ne10_uint32_t src_stride);
+    /* C version*/
+    extern void ne10_img_resize_bilinear_rgba_c (ne10_uint8_t* dst,
+            ne10_uint32_t dst_width,
+            ne10_uint32_t dst_height,
+            ne10_uint8_t* src,
+            ne10_uint32_t src_width,
+            ne10_uint32_t src_height,
+            ne10_uint32_t src_stride);
     /* NEON version*/
-    extern void ne10_vresize_neon (const ne10_int32_t** src,
-                                   ne10_uint8_t* dst,
-                                   const ne10_int16_t* beta,
-                                   ne10_int32_t width)
-        asm ("ne10_vresize_neon");
-    extern void ne10_hresize_4channels_neon (const ne10_uint8_t** src,
-            ne10_int32_t** dst,
-            ne10_int32_t count,
-            const ne10_int32_t* xofs,
-            const ne10_int16_t* alpha,
-            ne10_int32_t swidth,
-            ne10_int32_t dwidth,
-            ne10_int32_t cn,
-            ne10_int32_t xmin,
-            ne10_int32_t xmax) asm ("ne10_hresize_4channels_neon");
+    extern void ne10_img_resize_bilinear_rgba_neon (ne10_uint8_t* dst,
+            ne10_uint32_t dst_width,
+            ne10_uint32_t dst_height,
+            ne10_uint8_t* src,
+            ne10_uint32_t src_width,
+            ne10_uint32_t src_height,
+            ne10_uint32_t src_stride)
+    asm ("ne10_img_resize_bilinear_rgba_neon");
 
     /* image rotate functions*/
     /* function pointers*/
-    extern void (*ne10_img_rotate_get_quad_rangle_subpix) (ne10_uint8_t* dst,
-            const ne10_uint8_t* src,
-            ne10_int32_t swidth,
-            ne10_int32_t sheight,
-            ne10_int32_t dwidth,
-            ne10_int32_t dheight,
-            ne10_float32_t* matrix);
+    extern void (*ne10_img_rotate_rgba) (ne10_uint8_t* dst,
+                                         ne10_uint32_t* dst_width,
+                                         ne10_uint32_t* dst_height,
+                                         ne10_uint8_t* src,
+                                         ne10_uint32_t src_width,
+                                         ne10_uint32_t src_height,
+                                         ne10_int32_t angle);
+    /* C version*/
+    extern void ne10_img_rotate_rgba_c (ne10_uint8_t* dst,
+                                        ne10_uint32_t* dst_width,
+                                        ne10_uint32_t* dst_height,
+                                        ne10_uint8_t* src,
+                                        ne10_uint32_t src_width,
+                                        ne10_uint32_t src_height,
+                                        ne10_int32_t angle);
     /* NEON version*/
-    extern void ne10_img_rotate_get_quad_rangle_subpix_neon (ne10_uint8_t* dst,
-            const ne10_uint8_t* src,
-            ne10_int32_t swidth,
-            ne10_int32_t sheight,
-            ne10_int32_t dwidth,
-            ne10_int32_t dheight,
-            ne10_float32_t* matrix)
-        asm ("ne10_img_rotate_get_quad_rangle_subpix_neon");
+    extern void ne10_img_rotate_rgba_neon (ne10_uint8_t* dst,
+                                           ne10_uint32_t* dst_width,
+                                           ne10_uint32_t* dst_height,
+                                           ne10_uint8_t* src,
+                                           ne10_uint32_t src_width,
+                                           ne10_uint32_t src_height,
+                                           ne10_int32_t angle)
+    asm ("ne10_img_rotate_rgba_neon");
 
 #ifdef __cplusplus
 }
diff --git a/inc/NE10_macros.h b/inc/NE10_macros.h
new file mode 100644
index 0000000..4232555
--- /dev/null
+++ b/inc/NE10_macros.h
@@ -0,0 +1,57 @@
+/*
+ *  Copyright 2013 ARM Limited
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of ARM Limited nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
+ *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
+ *  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ *  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NE10 Library : inc/NE10_macros.h
+ */
+
+/** NE10 defines a number of macros for use in its function signatures.
+ *  The macros are defined within this header file.
+ */
+
+#ifndef NE10_MACROS_H
+#define NE10_MACROS_H
+
+/////////////////////////////////////////////////////////
+// some external macro definitions to be exposed to the users
+/////////////////////////////////////////////////////////
+
+#define NE10_MALLOC malloc
+#define NE10_FREE free
+
+#define NE10_MIN(a,b) ((a)>(b)?(b):(a))
+#define NE10_MAX(a,b) ((a)<(b)?(b):(a))
+
+/////////////////////////////////////////////////////////
+// macro definitions for float to fixed point
+/////////////////////////////////////////////////////////
+#define NE10_F2I16_MAX         32767
+#define NE10_F2I16_SHIFT       15
+#define NE10_F2I16_OP(x)       (ne10_int16_t)((x)*NE10_F2I16_MAX + 0.5f)
+#define NE10_F2I16_SROUND(x)   (((x)+(1<<(NE10_F2I16_SHIFT-1)))>>NE10_F2I16_SHIFT)
+
+#endif
diff --git a/inc/NE10_types.h b/inc/NE10_types.h
index ce49005..e93b6ac 100644
--- a/inc/NE10_types.h
+++ b/inc/NE10_types.h
@@ -45,6 +45,7 @@
 /////////////////////////////////////////////////////////
 #define NE10_OK 0
 #define NE10_ERR -1
+#define NE10_PI (3.1415926535897932384626433832795)
 
 /////////////////////////////////////////////////////////
 // some external definitions to be exposed to the users
diff --git a/modules/CMakeLists.txt b/modules/CMakeLists.txt
index ed7885c..005088d 100644
--- a/modules/CMakeLists.txt
+++ b/modules/CMakeLists.txt
@@ -224,6 +224,8 @@ if(NE10_ENABLE_IMGPROC)
     # Add image processing C files.
     set(NE10_IMGPROC_C_SRCS
         ${PROJECT_SOURCE_DIR}/common/NE10_mask_table.c
+        ${PROJECT_SOURCE_DIR}/modules/imgproc/NE10_resize.c
+        ${PROJECT_SOURCE_DIR}/modules/imgproc/NE10_rotate.c
     )
 
     # Add image processing NEON files.
diff --git a/modules/imgproc/NE10_init_imgproc.c b/modules/imgproc/NE10_init_imgproc.c
index 4a976b8..370f29d 100644
--- a/modules/imgproc/NE10_init_imgproc.c
+++ b/modules/imgproc/NE10_init_imgproc.c
@@ -33,37 +33,29 @@ ne10_result_t ne10_init_imgproc (ne10_int32_t is_NEON_available)
 {
     if (NE10_OK == is_NEON_available)
     {
-        ne10_vresize = ne10_vresize_neon;
-        ne10_hresize_4channels = ne10_hresize_4channels_neon;
-        ne10_img_rotate_get_quad_rangle_subpix = ne10_img_rotate_get_quad_rangle_subpix_neon;
+        ne10_img_resize_bilinear_rgba = ne10_img_resize_bilinear_rgba_neon;
+        ne10_img_rotate_rgba = ne10_img_rotate_rgba_neon;
     }
     else
     {
-        ;
+        ne10_img_resize_bilinear_rgba = ne10_img_resize_bilinear_rgba_c;
+        ne10_img_rotate_rgba = ne10_img_rotate_rgba_c;
     }
     return NE10_OK;
 }
 
 // These are actual definitions of our function pointers that are declared in inc/NE10_imgproc.h
-void (*ne10_vresize) (const ne10_int32_t** src,
-                      ne10_uint8_t* dst,
-                      const ne10_int16_t* beta,
-                      ne10_int32_t width);
-void (*ne10_hresize_4channels) (const ne10_uint8_t** src,
-                                ne10_int32_t** dst,
-                                ne10_int32_t count,
-                                const ne10_int32_t* xofs,
-                                const ne10_int16_t* alpha,
-                                ne10_int32_t swidth,
-                                ne10_int32_t dwidth,
-                                ne10_int32_t cn,
-                                ne10_int32_t xmin,
-                                ne10_int32_t xmax);
-
-void (*ne10_img_rotate_get_quad_rangle_subpix) (ne10_uint8_t* dst,
-        const ne10_uint8_t* src,
-        ne10_int32_t swidth,
-        ne10_int32_t sheight,
-        ne10_int32_t dwidth,
-        ne10_int32_t dheight,
-        ne10_float32_t* matrix);
+void (*ne10_img_resize_bilinear_rgba) (ne10_uint8_t* dst,
+                                       ne10_uint32_t dst_width,
+                                       ne10_uint32_t dst_height,
+                                       ne10_uint8_t* src,
+                                       ne10_uint32_t src_width,
+                                       ne10_uint32_t src_height,
+                                       ne10_uint32_t src_stride);
+void (*ne10_img_rotate_rgba) (ne10_uint8_t* dst,
+                              ne10_uint32_t* dst_width,
+                              ne10_uint32_t* dst_height,
+                              ne10_uint8_t* src,
+                              ne10_uint32_t src_width,
+                              ne10_uint32_t src_height,
+                              ne10_int32_t angle);
diff --git a/modules/imgproc/NE10_resize.c b/modules/imgproc/NE10_resize.c
new file mode 100644
index 0000000..62a7c46
--- /dev/null
+++ b/modules/imgproc/NE10_resize.c
@@ -0,0 +1,549 @@
+/*
+ *  Copyright 2013 ARM Limited
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of ARM Limited nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
+ *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
+ *  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ *  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* license of OpenCV */
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+/*
+ * NE10 Library : imgproc/NE10_resize.c
+ */
+
+#include "NE10.h"
+
+/**
+ * @ingroup groupIMGPROCs
+ */
+/**
+ * @defgroup IMG_RESIZE Image Resize
+ *
+ * \par
+ * Image resize is a generic functionality in image processing. In Ne10 library, we use the bilinear interpolation algorithm (http://en.wikipedia.org/wiki/Bilinear_interpolation) to implement image resize. For C implementation, we take the cvResize function from OpenCV (http://opencv.org/) for reference.
+ * \par
+ * This set of functions implements image resize with bilinear interpolation algorithm
+ * for 8-bit data types.  The functions operate on out-of-place buffer which use different buffer for input and output.
+ *
+ */
+#define INTER_RESIZE_COEF_BITS  11
+#define INTER_RESIZE_COEF_SCALE (1 << 11)
+#define NE10_MAX_ESIZE          16
+
+static inline ne10_uint32_t ne10_align_size (ne10_int32_t sz, ne10_int32_t n)
+{
+    return (sz + n - 1) & -n;
+}
+
+static inline ne10_int32_t ne10_floor (ne10_float32_t a)
+{
+    return ( ( (a) >= 0) ? ( (ne10_int32_t) a) : ( (ne10_int32_t) a - 1));
+}
+
+static inline ne10_int32_t ne10_clip (ne10_int32_t x, ne10_int32_t a, ne10_int32_t b)
+{
+    return (x >= a ? (x < b ? x : b - 1) : a);
+}
+
+static inline ne10_uint8_t ne10_cast_op (ne10_int32_t val)
+{
+    ne10_int32_t bits = INTER_RESIZE_COEF_BITS * 2;
+    ne10_int32_t SHIFT = bits;
+    ne10_int32_t DELTA = 1 << (bits - 1) ;
+    ne10_int32_t temp = NE10_MIN (255, NE10_MAX (0, (val + DELTA) >> SHIFT));
+    return (ne10_uint8_t) (temp);
+};
+
+static void ne10_img_hresize_linear_c (const ne10_uint8_t** src,
+                                       ne10_int32_t** dst,
+                                       ne10_int32_t count,
+                                       const ne10_int32_t* xofs,
+                                       const ne10_int16_t* alpha,
+                                       ne10_int32_t swidth,
+                                       ne10_int32_t dwidth,
+                                       ne10_int32_t cn,
+                                       ne10_int32_t xmin,
+                                       ne10_int32_t xmax)
+{
+    ne10_int32_t dx, k;
+
+    ne10_int32_t dx0 = 0;
+
+    //for (k = 0; k <= count - 2; k++)
+    if (count == 2)
+    {
+        k = 0;
+        const ne10_uint8_t *S0 = src[k], *S1 = src[k + 1];
+        ne10_int32_t *D0 = dst[k], *D1 = dst[k + 1];
+        for (dx = dx0; dx < xmax; dx++)
+        {
+            ne10_int32_t sx = xofs[dx];
+            ne10_int32_t a0 = alpha[dx * 2], a1 = alpha[dx * 2 + 1];
+            ne10_int32_t t0 = S0[sx] * a0 + S0[sx + cn] * a1;
+            ne10_int32_t t1 = S1[sx] * a0 + S1[sx + cn] * a1;
+            D0[dx] = t0;
+            D1[dx] = t1;
+        }
+
+        for (; dx < dwidth; dx++)
+        {
+            ne10_int32_t sx = xofs[dx];
+            D0[dx] = (ne10_int32_t) S0[sx] * INTER_RESIZE_COEF_SCALE;
+            D1[dx] = (ne10_int32_t) S1[sx] * INTER_RESIZE_COEF_SCALE;
+        }
+    }
+
+    //for (; k < count; k++)
+    if (count == 1)
+    {
+        k = 0;
+        const ne10_uint8_t *S = src[k];
+        ne10_int32_t *D = dst[k];
+        for (dx = 0; dx < xmax; dx++)
+        {
+            ne10_int32_t sx = xofs[dx];
+            D[dx] = S[sx] * alpha[dx * 2] + S[sx + cn] * alpha[dx * 2 + 1];
+        }
+
+        for (; dx < dwidth; dx++)
+            D[dx] = (ne10_int32_t) S[xofs[dx]] * INTER_RESIZE_COEF_SCALE;
+    }
+}
+
+
+static void ne10_img_vresize_linear_c (const ne10_int32_t** src, ne10_uint8_t* dst, const ne10_int16_t* beta, ne10_int32_t width)
+{
+    ne10_int32_t b0 = beta[0], b1 = beta[1];
+    const ne10_int32_t *S0 = src[0], *S1 = src[1];
+
+    ne10_int32_t x = 0;
+    for (; x <= width - 4; x += 4)
+    {
+        ne10_int32_t t0, t1;
+        t0 = S0[x] * b0 + S1[x] * b1;
+        t1 = S0[x + 1] * b0 + S1[x + 1] * b1;
+        dst[x] = ne10_cast_op (t0);
+        dst[x + 1] = ne10_cast_op (t1);
+        t0 = S0[x + 2] * b0 + S1[x + 2] * b1;
+        t1 = S0[x + 3] * b0 + S1[x + 3] * b1;
+        dst[x + 2] = ne10_cast_op (t0);
+        dst[x + 3] = ne10_cast_op (t1);
+    }
+
+    for (; x < width; x++)
+        dst[x] = ne10_cast_op (S0[x] * b0 + S1[x] * b1);
+}
+
+static void ne10_img_resize_generic_linear_c (ne10_uint8_t* src,
+        ne10_uint8_t* dst,
+        const ne10_int32_t* xofs,
+        const ne10_int16_t* _alpha,
+        const ne10_int32_t* yofs,
+        const ne10_int16_t* _beta,
+        ne10_int32_t xmin,
+        ne10_int32_t xmax,
+        ne10_int32_t ksize,
+        ne10_int32_t srcw,
+        ne10_int32_t srch,
+        ne10_int32_t srcstep,
+        ne10_int32_t dstw,
+        ne10_int32_t dsth,
+        ne10_int32_t channels)
+{
+
+    const ne10_int16_t* alpha = _alpha;
+    const ne10_int16_t* beta = _beta;
+    ne10_int32_t cn = channels;
+    srcw *= cn;
+    dstw *= cn;
+
+    ne10_int32_t bufstep = (ne10_int32_t) ne10_align_size (dstw, 16);
+    ne10_int32_t dststep = (ne10_int32_t) ne10_align_size (dstw, 4);
+
+
+    ne10_int32_t *buffer_ = (ne10_int32_t*) NE10_MALLOC (bufstep * ksize * sizeof (ne10_int32_t));
+
+    const ne10_uint8_t* srows[NE10_MAX_ESIZE];
+    ne10_int32_t* rows[NE10_MAX_ESIZE];
+    ne10_int32_t prev_sy[NE10_MAX_ESIZE];
+    ne10_int32_t k, dy;
+    xmin *= cn;
+    xmax *= cn;
+
+    for (k = 0; k < ksize; k++)
+    {
+        prev_sy[k] = -1;
+        rows[k] = (ne10_int32_t*) buffer_ + bufstep * k;
+    }
+
+    // image resize is a separable operation. In case of not too strong
+    for (dy = 0; dy < dsth; dy++, beta += ksize)
+    {
+        ne10_int32_t sy0 = yofs[dy], k, k0 = ksize, k1 = 0, ksize2 = ksize / 2;
+
+        for (k = 0; k < ksize; k++)
+        {
+            ne10_int32_t sy = ne10_clip (sy0 - ksize2 + 1 + k, 0, srch);
+            for (k1 = NE10_MAX (k1, k); k1 < ksize; k1++)
+            {
+                if (sy == prev_sy[k1])  // if the sy-th row has been computed already, reuse it.
+                {
+                    if (k1 > k)
+                        memcpy (rows[k], rows[k1], bufstep * sizeof (rows[0][0]));
+                    break;
+                }
+            }
+            if (k1 == ksize)
+                k0 = NE10_MIN (k0, k); // remember the first row that needs to be computed
+            srows[k] = (const ne10_uint8_t*) (src + srcstep * sy);
+            prev_sy[k] = sy;
+        }
+
+        if (k0 < ksize)
+            ne10_img_hresize_linear_c (srows + k0, rows + k0, ksize - k0, xofs, alpha,
+                                       srcw, dstw, cn, xmin, xmax);
+
+        ne10_img_vresize_linear_c ( (const ne10_int32_t**) rows, (ne10_uint8_t*) (dst + dststep * dy), beta, dstw);
+    }
+
+    NE10_FREE (buffer_);
+}
+
+static void ne10_img_resize_cal_offset_linear (ne10_int32_t* xofs,
+        ne10_int16_t* ialpha,
+        ne10_int32_t* yofs,
+        ne10_int16_t* ibeta,
+        ne10_int32_t *xmin,
+        ne10_int32_t *xmax,
+        ne10_int32_t ksize,
+        ne10_int32_t ksize2,
+        ne10_int32_t srcw,
+        ne10_int32_t srch,
+        ne10_int32_t dstw,
+        ne10_int32_t dsth,
+        ne10_int32_t channels)
+{
+    ne10_float32_t inv_scale_x = (ne10_float32_t) dstw / srcw;
+    ne10_float32_t inv_scale_y = (ne10_float32_t) dsth / srch;
+
+    ne10_int32_t cn = channels;
+    ne10_float32_t scale_x = 1. / inv_scale_x;
+    ne10_float32_t scale_y = 1. / inv_scale_y;
+    ne10_int32_t k, sx, sy, dx, dy;
+
+
+    ne10_float32_t fx, fy;
+
+    ne10_float32_t cbuf[NE10_MAX_ESIZE];
+
+    for (dx = 0; dx < dstw; dx++)
+    {
+        fx = (ne10_float32_t) ( (dx + 0.5) * scale_x - 0.5);
+        sx = ne10_floor (fx);
+        fx -= sx;
+
+        if (sx < ksize2 - 1)
+        {
+            *xmin = dx + 1;
+            if (sx < 0)
+                fx = 0, sx = 0;
+        }
+
+        if (sx + ksize2 >= srcw)
+        {
+            *xmax = NE10_MIN (*xmax, dx);
+            if (sx >= srcw - 1)
+                fx = 0, sx = srcw - 1;
+        }
+
+        for (k = 0, sx *= cn; k < cn; k++)
+            xofs[dx * cn + k] = sx + k;
+
+        cbuf[0] = 1.f - fx;
+        cbuf[1] = fx;
+
+        for (k = 0; k < ksize; k++)
+            ialpha[dx * cn * ksize + k] = (ne10_int16_t) (cbuf[k] * INTER_RESIZE_COEF_SCALE);
+        for (; k < cn * ksize; k++)
+            ialpha[dx * cn * ksize + k] = ialpha[dx * cn * ksize + k - ksize];
+    }
+
+    for (dy = 0; dy < dsth; dy++)
+    {
+        fy = (ne10_float32_t) ( (dy + 0.5) * scale_y - 0.5);
+        sy = ne10_floor (fy);
+        fy -= sy;
+
+        yofs[dy] = sy;
+
+        cbuf[0] = 1.f - fy;
+        cbuf[1] = fy;
+
+        for (k = 0; k < ksize; k++)
+            ibeta[dy * ksize + k] = (ne10_int16_t) (cbuf[k] * INTER_RESIZE_COEF_SCALE);
+
+    }
+
+}
+
+/**
+ * @addtogroup IMG_RESIZE
+ * @{
+ */
+
+/**
+ * @brief image resize of 8-bit data.
+ * @param[out]  *dst                  point to the destination image
+ * @param[in]   dst_width             width of destination image
+ * @param[in]   dst_height            height of destination image
+ * @param[in]   *src                  point to the source image
+ * @param[in]   src_width             width of source image
+ * @param[in]   src_height            height of source image
+ * @param[in]   src_stride            stride of source buffer
+ * @return none.
+ * The function implements image resize
+ */
+void ne10_img_resize_bilinear_rgba_c (ne10_uint8_t* dst,
+                                      ne10_uint32_t dst_width,
+                                      ne10_uint32_t dst_height,
+                                      ne10_uint8_t* src,
+                                      ne10_uint32_t src_width,
+                                      ne10_uint32_t src_height,
+                                      ne10_uint32_t src_stride)
+{
+    ne10_int32_t dstw = dst_width;
+    ne10_int32_t dsth = dst_height;
+    ne10_int32_t srcw = src_width;
+    ne10_int32_t srch = src_height;
+
+    ne10_int32_t cn = 4;
+
+
+    ne10_int32_t xmin = 0;
+    ne10_int32_t xmax = dstw;
+    ne10_int32_t width = dstw * cn;
+    ne10_float32_t fx, fy;
+
+    ne10_int32_t ksize = 0, ksize2;
+    ksize = 2;
+    ksize2 = ksize / 2;
+
+    ne10_uint8_t *buffer_ = (ne10_uint8_t*) NE10_MALLOC ( (width + dsth) * (sizeof (ne10_int32_t) + sizeof (ne10_float32_t) * ksize));
+
+    ne10_int32_t* xofs = (ne10_int32_t*) buffer_;
+    ne10_int32_t* yofs = xofs + width;
+    ne10_int16_t* ialpha = (ne10_int16_t*) (yofs + dsth);
+    ne10_int16_t* ibeta = ialpha + width * ksize;
+
+    ne10_img_resize_cal_offset_linear (xofs, ialpha, yofs, ibeta, &xmin, &xmax, ksize, ksize2, srcw, srch, dstw, dsth, cn);
+
+    ne10_img_resize_generic_linear_c (src, dst, xofs, ialpha, yofs, ibeta, xmin, xmax, ksize, srcw, srch, src_stride, dstw, dsth, cn);
+    NE10_FREE (buffer_);
+}
+
+extern void ne10_img_hresize_4channels_linear_neon (const ne10_uint8_t** src,
+        ne10_int32_t** dst,
+        ne10_int32_t count,
+        const ne10_int32_t* xofs,
+        const ne10_int16_t* alpha,
+        ne10_int32_t swidth,
+        ne10_int32_t dwidth,
+        ne10_int32_t cn,
+        ne10_int32_t xmin,
+        ne10_int32_t xmax);
+extern void ne10_img_vresize_linear_neon (const ne10_int32_t** src, ne10_uint8_t* dst, const ne10_int16_t* beta, ne10_int32_t width);
+
+static void ne10_img_resize_generic_linear_neon (ne10_uint8_t* src,
+        ne10_uint8_t* dst,
+        const ne10_int32_t* xofs,
+        const ne10_int16_t* _alpha,
+        const ne10_int32_t* yofs,
+        const ne10_int16_t* _beta,
+        ne10_int32_t xmin,
+        ne10_int32_t xmax,
+        ne10_int32_t ksize,
+        ne10_int32_t srcw,
+        ne10_int32_t srch,
+        ne10_int32_t srcstep,
+        ne10_int32_t dstw,
+        ne10_int32_t dsth,
+        ne10_int32_t channels)
+{
+
+    const ne10_int16_t* alpha = _alpha;
+    const ne10_int16_t* beta = _beta;
+    ne10_int32_t cn = channels;
+    srcw *= cn;
+    dstw *= cn;
+
+    ne10_int32_t bufstep = (ne10_int32_t) ne10_align_size (dstw, 16);
+    ne10_int32_t dststep = (ne10_int32_t) ne10_align_size (dstw, 4);
+
+
+    ne10_int32_t *buffer_ = (ne10_int32_t*) NE10_MALLOC (bufstep * ksize * sizeof (ne10_int32_t));
+
+    const ne10_uint8_t* srows[NE10_MAX_ESIZE];
+    ne10_int32_t* rows[NE10_MAX_ESIZE];
+    ne10_int32_t prev_sy[NE10_MAX_ESIZE];
+    ne10_int32_t k, dy;
+    xmin *= cn;
+    xmax *= cn;
+
+    for (k = 0; k < ksize; k++)
+    {
+        prev_sy[k] = -1;
+        rows[k] = (ne10_int32_t*) buffer_ + bufstep * k;
+    }
+
+    // image resize is a separable operation. In case of not too strong
+    for (dy = 0; dy < dsth; dy++, beta += ksize)
+    {
+        ne10_int32_t sy0 = yofs[dy], k, k0 = ksize, k1 = 0, ksize2 = ksize / 2;
+
+        for (k = 0; k < ksize; k++)
+        {
+            ne10_int32_t sy = ne10_clip (sy0 - ksize2 + 1 + k, 0, srch);
+            for (k1 = NE10_MAX (k1, k); k1 < ksize; k1++)
+            {
+                if (sy == prev_sy[k1])  // if the sy-th row has been computed already, reuse it.
+                {
+                    if (k1 > k)
+                        memcpy (rows[k], rows[k1], bufstep * sizeof (rows[0][0]));
+                    break;
+                }
+            }
+            if (k1 == ksize)
+                k0 = NE10_MIN (k0, k); // remember the first row that needs to be computed
+            srows[k] = (const ne10_uint8_t*) (src + srcstep * sy);
+            prev_sy[k] = sy;
+        }
+
+        if (k0 < ksize)
+        {
+            if (cn == 4)
+                ne10_img_hresize_4channels_linear_neon (srows + k0, rows + k0, ksize - k0, xofs, alpha,
+                                                        srcw, dstw, cn, xmin, xmax);
+            else
+                ne10_img_hresize_linear_c (srows + k0, rows + k0, ksize - k0, xofs, alpha,
+                                           srcw, dstw, cn, xmin, xmax);
+        }
+        ne10_img_vresize_linear_neon ( (const ne10_int32_t**) rows, (ne10_uint8_t*) (dst + dststep * dy), beta, dstw);
+    }
+
+    NE10_FREE (buffer_);
+}
+
+/**
+ * @brief image resize of 8-bit data.
+ * @param[out]  *dst                  point to the destination image
+ * @param[in]   dst_width             width of destination image
+ * @param[in]   dst_height            height of destination image
+ * @param[in]   *src                  point to the source image
+ * @param[in]   src_width             width of source image
+ * @param[in]   src_height            height of source image
+ * @param[in]   src_stride            stride of source buffer
+ * @return none.
+ * The function implements image resize
+ */
+void ne10_img_resize_bilinear_rgba_neon (ne10_uint8_t* dst,
+        ne10_uint32_t dst_width,
+        ne10_uint32_t dst_height,
+        ne10_uint8_t* src,
+        ne10_uint32_t src_width,
+        ne10_uint32_t src_height,
+        ne10_uint32_t src_stride)
+{
+    ne10_int32_t dstw = dst_width;
+    ne10_int32_t dsth = dst_height;
+    ne10_int32_t srcw = src_width;
+    ne10_int32_t srch = src_height;
+
+    ne10_int32_t cn = 4;
+
+
+    ne10_int32_t xmin = 0;
+    ne10_int32_t xmax = dstw;
+    ne10_int32_t width = dstw * cn;
+    ne10_float32_t fx, fy;
+
+    ne10_int32_t ksize = 0, ksize2;
+    ksize = 2;
+    ksize2 = ksize / 2;
+
+    ne10_uint8_t *buffer_ = (ne10_uint8_t*) NE10_MALLOC ( (width + dsth) * (sizeof (ne10_int32_t) + sizeof (ne10_float32_t) * ksize));
+
+    ne10_int32_t* xofs = (ne10_int32_t*) buffer_;
+    ne10_int32_t* yofs = xofs + width;
+    ne10_int16_t* ialpha = (ne10_int16_t*) (yofs + dsth);
+    ne10_int16_t* ibeta = ialpha + width * ksize;
+
+    ne10_img_resize_cal_offset_linear (xofs, ialpha, yofs, ibeta, &xmin, &xmax, ksize, ksize2, srcw, srch, dstw, dsth, cn);
+
+    ne10_img_resize_generic_linear_neon (src, dst, xofs, ialpha, yofs, ibeta, xmin, xmax, ksize, srcw, srch, src_stride, dstw, dsth, cn);
+    NE10_FREE (buffer_);
+}
+
+/**
+ * @} end of IMG_RESIZE group
+ */
diff --git a/modules/imgproc/NE10_resize.neon.s b/modules/imgproc/NE10_resize.neon.s
index 255771d..18d2b28 100644
--- a/modules/imgproc/NE10_resize.neon.s
+++ b/modules/imgproc/NE10_resize.neon.s
@@ -44,14 +44,14 @@
          */
 
         .align   4
-        .global   ne10_vresize_neon
+        .global   ne10_img_vresize_linear_neon
         .thumb
-        .extern ne10_vresize_mask_residual_table/* mask of store data */
+        .extern ne10_img_vresize_linear_mask_residual_table/* mask of store data */
         .thumb_func
         .equ         BITS,        0x16        /* INTER_RESIZE_COEF_BITS*2 */
         .equ         DELTA,       0x200000    /* 1 << (INTER_RESIZE_COEF_BITS*2 - 1) */
 
-ne10_vresize_neon:
+ne10_img_vresize_linear_neon:
                      push    {r4-r6,lr}
 
 /*ARM Registers*/
@@ -105,13 +105,13 @@ dDst_01234567    .dn   d21
                      mov         tmp, #255
                      vdup.32     qMax, tmp
 
+                     vld1.s32    {qS0_0123, qS0_4567}, [pS0]!
+                     vld1.s32    {qS1_0123, qS1_4567}, [pS1]!
+
                      subs        width, width, #8
                      blt         VResizeResidualLoop
 
-                     vld1.s32    {qS0_0123, qS0_4567}, [pS0]!
-                     vld1.s32    {qS1_0123, qS1_4567}, [pS1]!
 VResizeMainLoop:
-
                      vmul.s32    qTmp_0123, qS0_0123, dBeta0
                      vmul.s32    qTmp_4567, qS0_4567, dBeta0
                      vmla.s32    qTmp_0123, qS1_0123, dBeta1
@@ -139,18 +139,16 @@ VResizeMainLoop:
                      vld1.s32    {qS1_0123, qS1_4567}, [pS1]!
                      bge         VResizeMainLoop
 
+VResizeResidualLoop:
                      adds        width, width, #8
                      beq         VResizeEnd
 
                      sub         width, width, #1
                      ldr         pMask, =ne10_vresize_mask_residual_table
-                     sub         width, width, #1
                      add         pMask, pMask, width, lsl #3
                      vld1.64     {dMask}, [pMask]
                      vld1.64     {dDst_01234567}, [pDst]
 
-VResizeResidualLoop:
-
                      vmul.s32    qTmp_0123, qS0_0123, dBeta0
                      vmul.s32    qTmp_4567, qS0_4567, dBeta0
                      vmla.s32    qTmp_0123, qS1_0123, dBeta1
@@ -171,7 +169,7 @@ VResizeResidualLoop:
                      vmovn.I32    dTmp_4567, qTmp_4567
                      vmovn.I16    dTmp_01234567, qTmp_01234567
                      vbsl         dMask, dTmp_01234567, dDst_01234567
-                     vst1.8       {dTmp_01234567}, [pDst]
+                     vst1.8       {dMask}, [pDst]
 VResizeEnd:
                      /*Return From Function*/
                      pop     {r4-r6,pc}
@@ -231,12 +229,12 @@ VResizeEnd:
          */
 
         .align   4
-        .global   ne10_hresize_4channels_neon
+        .global   ne10_img_hresize_4channels_linear_neon
         .thumb
         .thumb_func
         .equ         INTER_RESIZE_COEF_SCALE,        0x800  /* 1 << INTER_RESIZE_COEF_BITS */
 
-ne10_hresize_4channels_neon:
+ne10_img_hresize_4channels_linear_neon:
                      push    {r4-r10,lr}
 
 /*ARM Registers*/
@@ -284,9 +282,9 @@ qDst1_0123       .qn   q10
 
 
                      subs        tmp, count, #1
-                     beq         HResize4Count1
-HResize4Count2:
+                     beq         ne10_img_hresize_4channels_linear_count_1
 
+ne10_img_hresize_4channels_linear_count_2:
                      ldr         pS0, [pSrc], #4
                      ldr         pS1, [pSrc]
                      ldr         pD0, [pDst], #4
@@ -301,9 +299,6 @@ HResize4Count2:
                      ldr         xmax, [sp, #52]
                      sub         dwidth, dwidth, xmax    /* calculate the residual */
 
-                     subs        xmax, xmax, #4
-                     blt         HResize4ResidualLoop2
-
                      ldr         sx, [pXofs], #16     /* for 4 channels only, xofs is changed based on channels */
                      add         pTmp0, pS0, sx     /* find the address of starting element */
                      add         pTmp1, pS1, sx
@@ -311,8 +306,10 @@ HResize4Count2:
                      vld1.8      {dS0_01234567}, [pTmp0]
                      vld1.8      {dS1_01234567}, [pTmp1]
 
-HResize4MainLoop2:
+                     subs        xmax, xmax, #4
+                     blt         ne10_img_hresize_4channels_linear_count_2_dwidth_loop
 
+ne10_img_hresize_4channels_linear_count_2_xmax_loop:
                      vmovl.u8    qS0_01234567, dS0_01234567
                      vmovl.u8    qS1_01234567, dS1_01234567
 
@@ -332,12 +329,12 @@ HResize4MainLoop2:
                      vld1.8      {dS1_01234567}, [pTmp1]
 
                      subs        xmax, xmax, #4
-                     bge         HResize4MainLoop2
+                     bge         ne10_img_hresize_4channels_linear_count_2_xmax_loop
 
                      cmp         dwidth, #0
-                     beq         HResize4End
+                     beq         ne10_img_hresize_4channels_linear_end
 
-HResize4ResidualLoop2:
+ne10_img_hresize_4channels_linear_count_2_dwidth_loop:
 
                      vmovl.u8    qS0_01234567, dS0_01234567
                      vmovl.u8    qS1_01234567, dS1_01234567
@@ -355,11 +352,11 @@ HResize4ResidualLoop2:
                      vld1.8      {dS1_01234567}, [pTmp1]
 
                      subs        dwidth, dwidth, #4
-                     bgt         HResize4ResidualLoop2
+                     bgt         ne10_img_hresize_4channels_linear_count_2_dwidth_loop
 
-                     b           HResize4End
+                     b           ne10_img_hresize_4channels_linear_end
 
-HResize4Count1:
+ne10_img_hresize_4channels_linear_count_1:
 
                      ldr         pS0, [pSrc], #4
                      ldr         pD0, [pDst], #4
@@ -373,16 +370,15 @@ HResize4Count1:
                      ldr         xmax, [sp, #52]
                      sub         dwidth, dwidth, xmax    /* calculate the residual */
 
-                     subs        xmax, xmax, #4
-                     blt         HResize4ResidualLoop1
-
                      ldr         sx, [pXofs], #16     /* for 4 channels only, xofs is changed based on channels */
                      add         pTmp0, pS0, sx     /* find the address of starting element */
                      vld2.16     {dAlpha_0, dAlpha_1}, [pAlpha]! /* alpha is repeated based on channels */
                      vld1.8      {dS0_01234567}, [pTmp0]
 
-HResize4MainLoop1:
+                     subs        xmax, xmax, #4
+                     blt         ne10_img_hresize_4channels_linear_count_1_dwidth_loop
 
+ne10_img_hresize_4channels_linear_count_1_xmax_loop:
                      vmovl.u8    qS0_01234567, dS0_01234567
 
                      vmull.u16   qDst0_0123, dS0_0123, dAlpha_0
@@ -396,11 +392,11 @@ HResize4MainLoop1:
                      vld1.8      {dS0_01234567}, [pTmp0]
 
                      subs        xmax, xmax, #4
-                     bge         HResize4MainLoop1
+                     bge         ne10_img_hresize_4channels_linear_count_1_xmax_loop
 
-                     cbz         dwidth, HResize4End
+                     cbz         dwidth, ne10_img_hresize_4channels_linear_end
 
-HResize4ResidualLoop1:
+ne10_img_hresize_4channels_linear_count_1_dwidth_loop:
 
                      vmovl.u8    qS0_01234567, dS0_01234567
                      vmull.u16   qDst0_0123, dS0_0123, dCoeff
@@ -412,9 +408,9 @@ HResize4ResidualLoop1:
                      vld1.8      {dS0_01234567}, [pTmp0]
 
                      subs        dwidth, dwidth, #4
-                     bgt         HResize4ResidualLoop1
+                     bgt         ne10_img_hresize_4channels_linear_count_1_dwidth_loop
 
-HResize4End:
+ne10_img_hresize_4channels_linear_end:
                      /*Return From Function*/
                      pop     {r4-r10,pc}
 
diff --git a/modules/imgproc/NE10_rotate.c b/modules/imgproc/NE10_rotate.c
new file mode 100644
index 0000000..123a04e
--- /dev/null
+++ b/modules/imgproc/NE10_rotate.c
@@ -0,0 +1,316 @@
+/*
+ *  Copyright 2013 ARM Limited
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of ARM Limited nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
+ *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
+ *  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ *  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* license of OpenCV */
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+/*
+ * NE10 Library : imgproc/NE10_rotate.c
+ */
+
+//#include <math.h>
+#include "NE10.h"
+
+
+/**
+ * @ingroup groupIMGPROCs
+ */
+/**
+ * @defgroup IMG_ROTATE Image Rotate
+ *
+ * \par
+ * Image rotate is a generic functionality in image processing. For C implementation, we take the cvGetQuadrangleSubPix function from OpenCV (http://opencv.org/) for reference.
+ * \par
+ * This set of functions implements image rotate with bilinear interpolation algorithm
+ * for 8-bit data types.
+ *
+ */
+
+
+void ne10_img_rotate_get_quad_rangle_subpix_rgba_c (ne10_uint8_t *dst,
+        ne10_uint8_t *src,
+        ne10_int32_t srcw,
+        ne10_int32_t srch,
+        ne10_int32_t dstw,
+        ne10_int32_t dsth,
+        ne10_float32_t *matrix)
+{
+    ne10_uint8_t* src_data = src;
+    ne10_uint8_t* dst_data = dst;
+
+    ne10_int32_t x, y;
+    //ne10_float32_t dx = (dstw - 1) * 0.5;
+    //ne10_float32_t dy = (dsth - 1) * 0.5;
+    ne10_float32_t A11 = matrix[0], A12 = matrix[1], A13 = matrix[2];
+    ne10_float32_t A21 = matrix[3], A22 = matrix[4], A23 = matrix[5];
+
+    ne10_int32_t src_step = srcw * 4;
+    ne10_int32_t dst_step = dstw * 4;
+    for (y = 0; y < dsth; y++, dst_data += dst_step)
+    {
+        ne10_float32_t xs = A12 * y + A13;
+        ne10_float32_t ys = A22 * y + A23;
+        ne10_float32_t xe = A11 * (dstw - 1) + A12 * y + A13;
+        ne10_float32_t ye = A21 * (dstw - 1) + A22 * y + A23;
+
+        if ( (unsigned) ( (ne10_int32_t) (xs) - 1) < (unsigned) (srcw - 4) &&
+                (unsigned) ( (ne10_int32_t) (ys) - 1) < (unsigned) (srch - 4) &&
+                (unsigned) ( (ne10_int32_t) (xe) - 1) < (unsigned) (srcw - 4) &&
+                (unsigned) ( (ne10_int32_t) (ye) - 1) < (unsigned) (srch - 4))
+        {
+            for (x = 0; x < dstw; x++)
+            {
+                ne10_int32_t ixs = (ne10_int32_t) (xs);
+                ne10_int32_t iys = (ne10_int32_t) (ys);
+                const ne10_uint8_t *ptr = src_data + src_step * iys + ixs * 4;
+                //ne10_float32_t a = (xs - ixs), b = (ys - iys), a1 = (1.f - a);
+                ne10_int16_t a = NE10_F2I16_OP (xs - ixs);
+                ne10_int16_t b = NE10_F2I16_OP (ys - iys);
+                ne10_int16_t a1 = NE10_F2I16_OP (1.f - (xs - ixs));
+
+                ne10_uint8_t p0, p1;
+                xs += A11;
+                ys += A21;
+
+                p0 = NE10_F2I16_SROUND (ptr[0] * a1 + ptr[4] * a);
+                p1 = NE10_F2I16_SROUND (ptr[src_step] * a1 + ptr[src_step + 4] * a);
+                dst_data[x * 4] = NE10_F2I16_SROUND (p0 * NE10_F2I16_MAX + b * (p1 - p0));
+
+                p0 = NE10_F2I16_SROUND (ptr[1] * a1 + ptr[1] * a);
+                p1 = NE10_F2I16_SROUND (ptr[src_step + 1] * a1 + ptr[src_step + 4 + 1] * a);
+                dst_data[x * 4 + 1] = NE10_F2I16_SROUND (p0 * NE10_F2I16_MAX + b * (p1 - p0));
+
+                p0 = NE10_F2I16_SROUND (ptr[2] * a1 + ptr[4 + 2] * a);
+                p1 = NE10_F2I16_SROUND (ptr[src_step + 2] * a1 + ptr[src_step + 4 + 2] * a);
+                dst_data[x * 4 + 2] = NE10_F2I16_SROUND (p0 * NE10_F2I16_MAX + b * (p1 - p0));
+
+                p0 = NE10_F2I16_SROUND (ptr[3] * a1 + ptr[4 + 3] * a);
+                p1 = NE10_F2I16_SROUND (ptr[src_step + 3] * a1 + ptr[src_step + 4 + 3] * a);
+                dst_data[x * 4 + 3] = NE10_F2I16_SROUND (p0 * NE10_F2I16_MAX + b * (p1 - p0));
+            }
+        }
+        else
+        {
+            for (x = 0; x < dstw; x++)
+            {
+                ne10_int32_t ixs = (ne10_int32_t) (xs), iys = (ne10_int32_t) (ys);
+                //ne10_float32_t a = xs - ixs, b = ys - iys;
+                //ne10_float32_t a1 = 1.f - a;
+                ne10_int16_t  a =  NE10_F2I16_OP (xs - ixs);
+                ne10_int16_t  b =  NE10_F2I16_OP (ys - iys);
+                ne10_int16_t  a1 =  NE10_F2I16_OP (1.f - (xs - ixs));
+                const ne10_uint8_t *ptr0, *ptr1;
+                xs += A11;
+                ys += A21;
+
+                if ( (unsigned) iys < (unsigned) (srch - 1))
+                {
+                    ptr0 = src_data + src_step * iys;
+                    ptr1 = ptr0 + src_step;
+                }
+                else
+                {
+                    continue;
+                }
+
+                if ( (unsigned) ixs < (unsigned) (srcw - 1))
+                {
+
+                    ne10_uint8_t p0, p1;
+
+                    ptr0 += ixs * 4;
+                    ptr1 += ixs * 4;
+
+                    p0 = NE10_F2I16_SROUND (ptr0[0] * a1 + ptr0[4] * a);
+                    p1 = NE10_F2I16_SROUND (ptr1[0] * a1 + ptr1[4] * a);
+                    dst_data[x * 4] = NE10_F2I16_SROUND (p0 * NE10_F2I16_MAX + b * (p1 - p0));
+
+                    p0 = NE10_F2I16_SROUND (ptr0[1] * a1 + ptr0[4 + 1] * a);
+                    p1 = NE10_F2I16_SROUND (ptr1[1] * a1 + ptr1[4 + 1] * a);
+                    dst_data[x * 4 + 1] = NE10_F2I16_SROUND (p0 * NE10_F2I16_MAX + b * (p1 - p0));
+
+                    p0 = NE10_F2I16_SROUND (ptr0[2] * a1 + ptr0[4 + 2] * a);
+                    p1 = NE10_F2I16_SROUND (ptr1[2] * a1 + ptr1[4 + 2] * a);
+                    dst_data[x * 4 + 2] = NE10_F2I16_SROUND (p0 * NE10_F2I16_MAX + b * (p1 - p0));
+
+                    p0 = NE10_F2I16_SROUND (ptr0[3] * a1 + ptr0[4 + 3] * a);
+                    p1 = NE10_F2I16_SROUND (ptr1[3] * a1 + ptr1[4 + 3] * a);
+                    dst_data[x * 4 + 3] = NE10_F2I16_SROUND (p0 * NE10_F2I16_MAX + b * (p1 - p0));
+                }
+            }
+        }
+    }
+}
+
+
+/**
+ * @addtogroup IMG_ROTATE
+ * @{
+ */
+/**
+ * @brief image resize of 8-bit data.
+ * @return none.
+ * The function implements image resize
+ */
+/**
+ * @brief image rotate of 8-bit data.
+ * @param[out]  *dst                  point to the destination image
+ * @param[out]  *dst_width            width of destination image
+ * @param[out]  *dst_height           height of destination image
+ * @param[in]   *src                  point to the source image
+ * @param[in]   src_width             width of source image
+ * @param[in]   src_height            height of source image
+ * @param[in]   angle                 angle of rotate
+ * @return none.
+ * The function extracts pixels from src at sub-pixel accuracy and stores them to dst
+ */
+void ne10_img_rotate_rgba_c (ne10_uint8_t* dst,
+                             ne10_uint32_t* dst_width,
+                             ne10_uint32_t* dst_height,
+                             ne10_uint8_t* src,
+                             ne10_uint32_t src_width,
+                             ne10_uint32_t src_height,
+                             ne10_int32_t angle)
+{
+    ne10_float32_t radian = (angle * NE10_PI / 180.0);
+    ne10_float32_t a = sin (radian), b = cos (radian);
+    ne10_int32_t srcw = src_width;
+    ne10_int32_t srch = src_height;
+    ne10_int32_t dstw = (srch * fabs (a)) + (srcw * fabs (b)) + 1;
+    ne10_int32_t dsth = (srch * fabs (b)) + (srcw * fabs (a)) + 1;
+    ne10_int32_t i;
+    ne10_float32_t m[6];
+    ne10_float32_t dx = (dstw - 1) * 0.5;
+    ne10_float32_t dy = (dsth - 1) * 0.5;
+
+    m[0] = b;
+    m[1] = a;
+    m[3] = -m[1];
+    m[4] = m[0];
+    m[2] = srcw * 0.5f - m[0] * dx - m[1] * dy;
+    m[5] = srch * 0.5f - m[3] * dx - m[4] * dy;
+
+    *dst_width = dstw;
+    *dst_height = dsth;
+    ne10_img_rotate_get_quad_rangle_subpix_rgba_c (dst, src, srcw, srch, dstw, dsth, m);
+}
+
+
+extern void  ne10_img_rotate_get_quad_rangle_subpix_rgba_neon (ne10_uint8_t *dst,
+        ne10_uint8_t *src,
+        ne10_int32_t srcw,
+        ne10_int32_t srch,
+        ne10_int32_t dstw,
+        ne10_int32_t dsth,
+        ne10_float32_t *matrix);
+
+/**
+ * @brief image rotate of 8-bit data.
+ * @param[out]  *dst                  point to the destination image
+ * @param[out]  *dst_width            width of destination image
+ * @param[out]  *dst_height           height of destination image
+ * @param[in]   *src                  point to the source image
+ * @param[in]   src_width             width of source image
+ * @param[in]   src_height            height of source image
+ * @param[in]   angle                 angle of rotate
+ * @return none.
+ * The function extracts pixels from src at sub-pixel accuracy and stores them to dst
+ */
+void ne10_img_rotate_rgba_neon (ne10_uint8_t* dst,
+                                ne10_uint32_t* dst_width,
+                                ne10_uint32_t* dst_height,
+                                ne10_uint8_t* src,
+                                ne10_uint32_t src_width,
+                                ne10_uint32_t src_height,
+                                ne10_int32_t angle)
+{
+    ne10_float32_t radian = (angle * NE10_PI / 180.0);
+    ne10_float32_t a = sin (radian), b = cos (radian);
+    ne10_int32_t srcw = src_width;
+    ne10_int32_t srch = src_height;
+    ne10_int32_t dstw = (srch * fabs (a)) + (srcw * fabs (b)) + 1;
+    ne10_int32_t dsth = (srch * fabs (b)) + (srcw * fabs (a)) + 1;
+    ne10_int32_t i;
+    ne10_float32_t m[6];
+    ne10_float32_t dx = (dstw - 1) * 0.5;
+    ne10_float32_t dy = (dsth - 1) * 0.5;
+
+    m[0] = b;
+    m[1] = a;
+    m[3] = -m[1];
+    m[4] = m[0];
+    m[2] = srcw * 0.5f - m[0] * dx - m[1] * dy;
+    m[5] = srch * 0.5f - m[3] * dx - m[4] * dy;
+
+    *dst_width = dstw;
+    *dst_height = dsth;
+    ne10_img_rotate_get_quad_rangle_subpix_rgba_neon (dst, src, srcw, srch, dstw, dsth, m);
+}
+
+/**
+ * @} end of IMG_ROTATE group
+ */
diff --git a/modules/imgproc/NE10_rotate.neon.s b/modules/imgproc/NE10_rotate.neon.s
index 69a304e..cbe6001 100644
--- a/modules/imgproc/NE10_rotate.neon.s
+++ b/modules/imgproc/NE10_rotate.neon.s
@@ -46,11 +46,11 @@
          */
 
         .align   4
-        .global   ne10_img_rotate_get_quad_rangle_subpix_neon
+        .global   ne10_img_rotate_get_quad_rangle_subpix_rgba_neon
         .thumb
         .thumb_func
 
-ne10_img_rotate_get_quad_rangle_subpix_neon:
+ne10_img_rotate_get_quad_rangle_subpix_rgba_neon:
 
 /*ARM Registers*/
 /* long-term variable */
diff --git a/modules/imgproc/test/test_main.c b/modules/imgproc/test/test_main.c
new file mode 100644
index 0000000..cef7790
--- /dev/null
+++ b/modules/imgproc/test/test_main.c
@@ -0,0 +1,59 @@
+/*
+ *  Copyright 2013 ARM Limited
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of ARM Limited nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
+ *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
+ *  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ *  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NE10 Library : test_main.c
+ */
+
+#include "seatest.h"
+
+void test_fixture_resize (void);
+void test_fixture_rotate (void);
+
+void all_tests (void)
+{
+    test_fixture_resize();
+    test_fixture_rotate();
+}
+
+
+void my_suite_setup (void)
+{
+    //printf("I'm done before every single test in the suite\r\n");
+}
+
+void my_suite_teardown (void)
+{
+    //printf("I'm done after every single test in the suite\r\n");
+}
+
+int main (ne10_int32_t argc, char** argv)
+{
+    suite_setup (my_suite_setup);
+    suite_teardown (my_suite_teardown);
+    return run_tests (all_tests);
+}
diff --git a/modules/imgproc/test/test_suite_resize.c b/modules/imgproc/test/test_suite_resize.c
new file mode 100644
index 0000000..fc4d655
--- /dev/null
+++ b/modules/imgproc/test/test_suite_resize.c
@@ -0,0 +1,207 @@
+/*
+ *  Copyright 2013 ARM Limited
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of ARM Limited nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
+ *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
+ *  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ *  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NE10 Library : test_suite_resize.c
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#include "NE10_imgproc.h"
+#include "seatest.h"
+#include "unit_test_common.h"
+
+/* ----------------------------------------------------------------------
+** Global defines
+** ------------------------------------------------------------------- */
+#define MEM_SIZE        256//1024
+#define TEST_COUNT 5000
+
+
+/* ----------------------------------------------------------------------
+** Defines each of the tests performed
+** ------------------------------------------------------------------- */
+
+
+//input and output
+static ne10_uint8_t * in_c = NULL;
+static ne10_uint8_t * in_neon = NULL;
+
+static ne10_uint8_t * out_c = NULL;
+static ne10_uint8_t * out_neon = NULL;
+
+static ne10_float32_t snr = 0.0f;
+
+void test_resize_conformance_case()
+{
+    ne10_int32_t srcw;
+    ne10_int32_t srch;
+    ne10_int32_t dstw;
+    ne10_int32_t dsth;
+    ne10_int32_t i;
+    ne10_int32_t w, h;
+    ne10_int32_t channels = 4;
+    ne10_int32_t pic_size = MEM_SIZE * MEM_SIZE * channels * sizeof (ne10_uint8_t);
+    ne10_float32_t PSNR = 0.0f;
+
+    /* init input memory */
+    in_c = NE10_MALLOC (pic_size);
+    in_neon = NE10_MALLOC (pic_size);
+
+    /* init dst memory */
+    out_c = NE10_MALLOC (pic_size);
+    out_neon = NE10_MALLOC (pic_size);
+
+    for (i = 0; i < pic_size; i++)
+    {
+        in_c[i] = in_neon[i] = (rand() & 0xff);
+    }
+
+    for (h = 1; h < MEM_SIZE; h++)
+    {
+        for (w = 1; w < MEM_SIZE; w++)
+        {
+            srcw = h;
+            srch = h;
+            dstw = w;
+            dsth = w;
+
+            printf ("srcw X srch = %d X %d \n", srcw, srch);
+            printf ("dstw X dsth = %d X %d \n", dstw, dsth);
+
+            ne10_img_resize_bilinear_rgba_c (out_c, dstw, dsth, in_c, srcw, srch, srcw);
+            ne10_img_resize_bilinear_rgba_neon (out_neon, dstw, dsth, in_neon, srcw, srch, srcw);
+
+            PSNR = CAL_PSNR_UINT8 (out_c, out_neon, dstw * dsth * channels);
+            assert_false ( (PSNR < PSNR_THRESHOLD));
+        }
+    }
+    NE10_FREE (in_c);
+    NE10_FREE (in_neon);
+    NE10_FREE (out_c);
+    NE10_FREE (out_neon);
+}
+
+void test_resize_performance_case()
+{
+    ne10_int32_t srcw;
+    ne10_int32_t srch;
+    ne10_int32_t dstw;
+    ne10_int32_t dsth;
+    ne10_int32_t i;
+    ne10_int32_t w, h;
+    ne10_int32_t channels = 4;
+    ne10_int32_t pic_size = MEM_SIZE * MEM_SIZE * channels * sizeof (ne10_uint8_t);
+    ne10_int64_t time_c = 0;
+    ne10_int64_t time_neon = 0;
+
+    /* init input memory */
+    in_c = NE10_MALLOC (pic_size);
+    in_neon = NE10_MALLOC (pic_size);
+
+    /* init dst memory */
+    out_c = NE10_MALLOC (pic_size);
+    out_neon = NE10_MALLOC (pic_size);
+
+    for (i = 0; i < pic_size; i++)
+    {
+        in_c[i] = in_neon[i] = (rand() & 0xff);
+    }
+
+    for (h = 16; h < MEM_SIZE; h += 4)
+    {
+        for (w = 16; w < MEM_SIZE; w += 4)
+        {
+            srcw = h;
+            srch = h;
+            dstw = w;
+            dsth = w;
+
+            printf ("srcw X srch = %d X %d \n", srcw, srch);
+            printf ("dstw X dsth = %d X %d \n", dstw, dsth);
+
+            GET_TIME
+            (
+                time_c,
+            {
+                for (i = 0; i < TEST_COUNT; i++)
+                    ne10_img_resize_bilinear_rgba_c (out_c, dstw, dsth, in_c, srcw, srch, srcw);
+            }
+            );
+
+            GET_TIME
+            (
+                time_neon,
+            {
+                for (i = 0; i < TEST_COUNT; i++)
+                    ne10_img_resize_bilinear_rgba_neon (out_neon, dstw, dsth, in_neon, srcw, srch, srcw);
+            }
+            );
+            //printf ("time c %lld \n", time_c);
+            //printf ("time neon %lld \n", time_neon);
+            ne10_log (__FUNCTION__, "IMAGERESIZE%20d%20lld%20lld%19.2f%%%18.2f:1\n", (h*MEM_SIZE+w), time_c, time_neon, 0, 0);
+
+        }
+    }
+    NE10_FREE (in_c);
+    NE10_FREE (in_neon);
+    NE10_FREE (out_c);
+    NE10_FREE (out_neon);
+}
+
+void test_resize()
+{
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    test_resize_conformance_case();
+#endif
+
+#if defined PERFORMANCE_TEST
+    test_resize_performance_case();
+#endif
+}
+
+static void my_test_setup (void)
+{
+    ne10_log_buffer_ptr = ne10_log_buffer;
+}
+
+void test_fixture_resize (void)
+{
+    test_fixture_start();               // starts a fixture
+
+    fixture_setup (my_test_setup);
+
+    run_test (test_resize);       // run tests
+
+    test_fixture_end();                 // ends a fixture
+}
+
+
+
diff --git a/modules/imgproc/test/test_suite_rotate.c b/modules/imgproc/test/test_suite_rotate.c
new file mode 100644
index 0000000..9e6f984
--- /dev/null
+++ b/modules/imgproc/test/test_suite_rotate.c
@@ -0,0 +1,203 @@
+/*
+ *  Copyright 2013 ARM Limited
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *    * Neither the name of ARM Limited nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
+ *  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
+ *  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ *  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * NE10 Library : test_suite_rotate.c
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#include "NE10_imgproc.h"
+#include "seatest.h"
+#include "unit_test_common.h"
+
+/* ----------------------------------------------------------------------
+** Global defines
+** ------------------------------------------------------------------- */
+
+#define SRC_HEIGHT        512
+#define SRC_WIDTH         512
+#define DST_HEIGHT        734 //sqrt(512*512 + 512*512) + 10
+#define DST_WIDTH         734 //sqrt(512*512 + 512*512) + 10
+#define TEST_COUNT        5000
+
+
+/* ----------------------------------------------------------------------
+** Defines each of the tests performed
+** ------------------------------------------------------------------- */
+
+
+//input and output
+static ne10_uint8_t * in_c = NULL;
+static ne10_uint8_t * in_neon = NULL;
+
+static ne10_uint8_t * out_c = NULL;
+static ne10_uint8_t * out_neon = NULL;
+
+static ne10_float32_t psnr = 0.0f;
+
+void test_rotate_conformance_case()
+{
+    ne10_int32_t i;
+    ne10_int32_t channels = 4;
+    ne10_int32_t in_size = SRC_HEIGHT * SRC_WIDTH * channels;
+    ne10_int32_t out_size = DST_HEIGHT * DST_WIDTH * channels;
+    ne10_float32_t PSNR = 0.0f;
+    ne10_int32_t srcw = SRC_WIDTH;
+    ne10_int32_t srch = SRC_HEIGHT;
+    ne10_int32_t dstw_c, dsth_c;
+    ne10_int32_t dstw_neon, dsth_neon;
+    ne10_int32_t angle;
+
+    /* init input memory */
+    in_c = NE10_MALLOC (in_size * sizeof (ne10_uint8_t));
+    in_neon = NE10_MALLOC (in_size * sizeof (ne10_uint8_t));
+
+    /* init dst memory */
+    out_c = NE10_MALLOC (out_size * sizeof (ne10_uint8_t));
+    out_neon = NE10_MALLOC (out_size * sizeof (ne10_uint8_t));
+
+    for (i = 0; i < in_size; i++)
+    {
+        in_c[i] = in_neon[i] = (rand() & 0xff);
+    }
+
+    for (angle = -360; angle <= 360; angle += 30)
+    {
+        printf ("rotate angle %d \n", angle);
+
+        memset (out_c, 0, out_size);
+        ne10_img_rotate_rgba_c (out_c, &dstw_c, &dsth_c, in_c, srcw, srch, angle);
+
+        memset (out_neon, 0, out_size);
+        ne10_img_rotate_rgba_neon (out_neon, &dstw_neon, &dsth_neon, in_neon, srcw, srch, angle);
+
+        PSNR = CAL_PSNR_UINT8 (out_c, out_neon, dstw_c * dsth_c * 4);
+        assert_false ( (PSNR < PSNR_THRESHOLD));
+        //printf ("PSNR %f \n", PSNR);
+    }
+    NE10_FREE (in_c);
+    NE10_FREE (in_neon);
+    NE10_FREE (out_c);
+    NE10_FREE (out_neon);
+}
+
+void test_rotate_performance_case()
+{
+    ne10_int32_t i;
+    ne10_int32_t channels = 4;
+    ne10_int32_t in_size = SRC_HEIGHT * SRC_WIDTH * channels;
+    ne10_int32_t out_size = DST_HEIGHT * DST_WIDTH * channels;
+    ne10_int32_t srcw = SRC_WIDTH;
+    ne10_int32_t srch = SRC_HEIGHT;
+    ne10_int32_t dstw_c, dsth_c;
+    ne10_int32_t dstw_neon, dsth_neon;
+    ne10_int32_t angle;
+    ne10_int64_t time_c = 0;
+    ne10_int64_t time_neon = 0;
+
+    /* init input memory */
+    in_c = NE10_MALLOC (in_size * sizeof (ne10_uint8_t));
+    in_neon = NE10_MALLOC (in_size * sizeof (ne10_uint8_t));
+
+    /* init dst memory */
+    out_c = NE10_MALLOC (out_size * sizeof (ne10_uint8_t));
+    out_neon = NE10_MALLOC (out_size * sizeof (ne10_uint8_t));
+
+    for (i = 0; i < in_size; i++)
+    {
+        in_c[i] = in_neon[i] = (rand() & 0xff);
+    }
+
+    //for (angle = -360; angle <= 360; angle += 5)
+    for (angle = 45; angle <= 45; angle += 5)
+    {
+        printf ("rotate angle %d \n", angle);
+
+        memset (out_c, 0, out_size);
+        GET_TIME
+        (
+            time_c,
+        {
+            for (i = 0; i < TEST_COUNT; i++)
+                ne10_img_rotate_rgba_c (out_c, &dstw_c, &dsth_c, in_c, srcw, srch, angle);
+        }
+        );
+
+        memset (out_neon, 0, out_size);
+        GET_TIME
+        (
+            time_neon,
+        {
+            for (i = 0; i < TEST_COUNT; i++)
+                ne10_img_rotate_rgba_neon (out_neon, &dstw_neon, &dsth_neon, in_neon, srcw, srch, angle);
+        }
+        );
+
+        //printf ("time c %lld \n", time_c);
+        //printf ("time neon %lld \n", time_neon);
+        ne10_log (__FUNCTION__, "IMAGEROTATE%20d%20lld%20lld%19.2f%%%18.2f:1\n", angle, time_c, time_neon, 0, 0);
+    }
+
+    NE10_FREE (in_c);
+    NE10_FREE (in_neon);
+    NE10_FREE (out_c);
+    NE10_FREE (out_neon);
+}
+
+void test_rotate()
+{
+#if defined (SMOKE_TEST)||(REGRESSION_TEST)
+    test_rotate_conformance_case();
+#endif
+
+#if defined PERFORMANCE_TEST
+    test_rotate_performance_case();
+#endif
+}
+
+static void my_test_setup (void)
+{
+    ne10_log_buffer_ptr = ne10_log_buffer;
+}
+
+void test_fixture_rotate (void)
+{
+    test_fixture_start();               // starts a fixture
+
+    fixture_setup (my_test_setup);
+
+    run_test (test_rotate);       // run tests
+
+    test_fixture_end();                 // ends a fixture
+}
+
+
+
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 9de1805..ffccb34 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -140,3 +140,45 @@ if(NE10_ENABLE_DSP)
     endif()
 endif()
 
+
+if(NE10_ENABLE_IMGPROC)
+    # Define imgproc test files.
+    set(NE10_TEST_IMGPROC_SRCS
+        ${PROJECT_SOURCE_DIR}/modules/imgproc/test/test_main.c
+        ${PROJECT_SOURCE_DIR}/modules/imgproc/test/test_suite_resize.c
+        ${PROJECT_SOURCE_DIR}/modules/imgproc/test/test_suite_rotate.c
+    )
+
+    if(NE10_BUILD_STATIC)
+        add_executable(NE10_imgproc_unit_test_static ${NE10_TEST_IMGPROC_SRCS} ${NE10_TEST_COMMON_SRCS})
+        if(ANDROID_PLATFORM OR IOS_PLATFORM)
+            target_link_libraries (
+                NE10_imgproc_unit_test_static
+                NE10
+                m
+            )
+        elseif(GNULINUX_PLATFORM)
+            target_link_libraries (
+                NE10_imgproc_unit_test_static
+                NE10
+                m
+                rt
+            )
+        endif()
+
+        if(NE10_SMOKE_TEST)
+            set_target_properties(NE10_imgproc_unit_test_static PROPERTIES
+                OUTPUT_NAME "NE10_imgproc_unit_test_smoke"
+            )
+        elseif (NE10_REGRESSION_TEST)
+            set_target_properties(NE10_imgproc_unit_test_static PROPERTIES
+                OUTPUT_NAME "NE10_imgproc_unit_test_regression"
+            )
+        elseif (NE10_PERFORMANCE_TEST)
+            set_target_properties(NE10_imgproc_unit_test_static PROPERTIES
+                OUTPUT_NAME "NE10_imgproc_unit_test_performance"
+            )
+        endif()
+    endif()
+endif()
+
diff --git a/test/include/unit_test_common.h b/test/include/unit_test_common.h
index 98e78e5..56be85e 100644
--- a/test/include/unit_test_common.h
+++ b/test/include/unit_test_common.h
@@ -83,6 +83,7 @@
 #define ERROR_MARGIN_SMALL 0x0A
 #define ERROR_MARGIN_LARGE 0xFF
 #define SNR_THRESHOLD 50.0f
+#define PSNR_THRESHOLD 30.0f
 
 // What's the acceptable number of warnings in a test
 #define ACCEPTABLE_WARNS 12
@@ -131,6 +132,7 @@ extern int EQUALS_FLOAT( ne10_float32_t fa, ne10_float32_t fb , ne10_uint32_t er
 extern int GUARD_ARRAY( ne10_float32_t* array, ne10_uint32_t array_length );
 extern int CHECK_ARRAY_GUARD( ne10_float32_t* array, ne10_uint32_t array_length );
 extern ne10_float32_t CAL_SNR_FLOAT32(ne10_float32_t *pRef, ne10_float32_t *pTest, ne10_uint32_t buffSize);
+extern ne10_float32_t CAL_PSNR_UINT8 (ne10_uint8_t *pRef, ne10_uint8_t *pTest, ne10_uint32_t buffSize);
 
 extern char ne10_log_buffer[];
 extern char *ne10_log_buffer_ptr;
diff --git a/test/src/unit_test_common.c b/test/src/unit_test_common.c
index c82da1c..37a1808 100644
--- a/test/src/unit_test_common.c
+++ b/test/src/unit_test_common.c
@@ -200,6 +200,31 @@ ne10_float32_t CAL_SNR_FLOAT32 (ne10_float32_t *pRef, ne10_float32_t *pTest, ne1
 
 }
 
+/**
+ * @brief  Caluclation of PSNR
+ * @param  ne10_uint8_t*  Pointer to the reference buffer
+ * @param  ne10_uint8_t*  Pointer to the test buffer
+ * @param  ne10_uint32_t    total number of samples
+ * @return ne10_float32_t   PSNR
+ * The function Caluclates peak signal to noise ratio for the reference output
+ * and test output
+ */
+
+ne10_float32_t CAL_PSNR_UINT8 (ne10_uint8_t *pRef, ne10_uint8_t *pTest, ne10_uint32_t buffSize)
+{
+    ne10_float64_t mse = 0.0, max = 255.0;
+    ne10_uint32_t i;
+    ne10_float32_t PSNR;
+
+    for (i = 0; i < buffSize; i++)
+    {
+        mse += (pRef[i] - pTest[i]) * (pRef[i] - pTest[i]);
+    }
+    mse /= buffSize;
+    PSNR = 10 * log10 (max*max / mse);
+    return (PSNR);
+
+}
 char ne10_log_buffer[1000];
 char *ne10_log_buffer_ptr;
 
-- 
2.7.4