Evas filters: Add template optimizable files for box blur
authorJean-Philippe Andre <jp.andre@samsung.com>
Tue, 25 Mar 2014 07:54:57 +0000 (16:54 +0900)
committerJean-Philippe Andre <jp.andre@samsung.com>
Wed, 2 Apr 2014 05:53:00 +0000 (14:53 +0900)
The new files (i386, sse3 and neon) are basically empty and fallback
to the C version. This is just to pave the way for full low-level
optimization... if someone has the time and skills to do it :)

Add both Alpha and RGBA template files.

src/Makefile_Evas.am
src/lib/evas/filters/blur/blur_box_alpha_i386.c [new file with mode: 0644]
src/lib/evas/filters/blur/blur_box_alpha_neon.c [new file with mode: 0644]
src/lib/evas/filters/blur/blur_box_alpha_sse3.c [new file with mode: 0644]
src/lib/evas/filters/blur/blur_box_rgba_.c
src/lib/evas/filters/blur/blur_box_rgba_i386.c [new file with mode: 0644]
src/lib/evas/filters/blur/blur_box_rgba_neon.c [new file with mode: 0644]
src/lib/evas/filters/blur/blur_box_rgba_sse3.c [new file with mode: 0644]
src/lib/evas/filters/evas_filter_blur.c

index 540d08d..34d3315 100644 (file)
@@ -496,7 +496,14 @@ EXTRA_DIST += \
 lib/evas/filters/blur/blur_gaussian_alpha_.c \
 lib/evas/filters/blur/blur_gaussian_rgba_.c \
 lib/evas/filters/blur/blur_box_alpha_.c \
-lib/evas/filters/blur/blur_box_rgba_.c
+lib/evas/filters/blur/blur_box_alpha_i386.c \
+lib/evas/filters/blur/blur_box_alpha_sse3.c \
+lib/evas/filters/blur/blur_box_alpha_neon.c \
+lib/evas/filters/blur/blur_box_rgba_.c \
+lib/evas/filters/blur/blur_box_rgba_i386.c \
+lib/evas/filters/blur/blur_box_rgba_sse3.c \
+lib/evas/filters/blur/blur_box_rgba_neon.c
+
 
 ### Engines
 
diff --git a/src/lib/evas/filters/blur/blur_box_alpha_i386.c b/src/lib/evas/filters/blur/blur_box_alpha_i386.c
new file mode 100644 (file)
index 0000000..1664431
--- /dev/null
@@ -0,0 +1,25 @@
+#ifdef BUILD_MMX
+
+static inline void
+_box_blur_alpha_horiz_step_mmx(const DATA8* restrict const srcdata,
+                               DATA8* restrict const dstdata,
+                               const int* restrict const radii,
+                               const int len,
+                               const int loops)
+{
+   // TODO: implement optimized code here and remove the following line:
+   _box_blur_alpha_horiz_step(srcdata, dstdata, radii, len, loops);
+}
+
+static inline void
+_box_blur_alpha_vert_step_mmx(const DATA8* restrict const srcdata,
+                              DATA8* restrict const dstdata,
+                              const int* restrict const radii,
+                              const int len,
+                              const int loops)
+{
+   // TODO: implement optimized code here and remove the following line:
+   _box_blur_alpha_vert_step(srcdata, dstdata, radii, len, loops);
+}
+
+#endif
diff --git a/src/lib/evas/filters/blur/blur_box_alpha_neon.c b/src/lib/evas/filters/blur/blur_box_alpha_neon.c
new file mode 100644 (file)
index 0000000..b8d9524
--- /dev/null
@@ -0,0 +1,25 @@
+#ifdef BUILD_NEON
+
+static inline void
+_box_blur_alpha_horiz_step_neon(const DATA8* restrict const srcdata,
+                                DATA8* restrict const dstdata,
+                                const int* restrict const radii,
+                                const int len,
+                                const int loops)
+{
+   // TODO: implement optimized code here and remove the following line:
+   _box_blur_alpha_horiz_step(srcdata, dstdata, radii, len, loops);
+}
+
+static inline void
+_box_blur_alpha_vert_step_neon(const DATA8* restrict const srcdata,
+                               DATA8* restrict const dstdata,
+                               const int* restrict const radii,
+                               const int len,
+                               const int loops)
+{
+   // TODO: implement optimized code here and remove the following line:
+   _box_blur_alpha_vert_step(srcdata, dstdata, radii, len, loops);
+}
+
+#endif
diff --git a/src/lib/evas/filters/blur/blur_box_alpha_sse3.c b/src/lib/evas/filters/blur/blur_box_alpha_sse3.c
new file mode 100644 (file)
index 0000000..17a19f1
--- /dev/null
@@ -0,0 +1,25 @@
+#ifdef BUILD_SSE3
+
+static inline void
+_box_blur_alpha_horiz_step_sse3(const DATA8* restrict const srcdata,
+                                DATA8* restrict const dstdata,
+                                const int* restrict const radii,
+                                const int len,
+                                const int loops)
+{
+   // TODO: implement optimized code here and remove the following line:
+   _box_blur_alpha_horiz_step(srcdata, dstdata, radii, len, loops);
+}
+
+static inline void
+_box_blur_alpha_vert_step_sse3(const DATA8* restrict const srcdata,
+                               DATA8* restrict const dstdata,
+                               const int* restrict const radii,
+                               const int len,
+                               const int loops)
+{
+   // TODO: implement optimized code here and remove the following line:
+   _box_blur_alpha_vert_step(srcdata, dstdata, radii, len, loops);
+}
+
+#endif
index 7c22c65..606f571 100644 (file)
@@ -8,7 +8,7 @@
 #include "../evas_filter_private.h"
 
 static inline void
-_box_blur_horiz_rgba_step(const DATA32* restrict const srcdata,
+_box_blur_rgba_horiz_step(const DATA32* restrict const srcdata,
                           DATA32* restrict const dstdata,
                           const int* restrict const radii,
                           const int len,
@@ -149,7 +149,7 @@ _box_blur_horiz_rgba_step(const DATA32* restrict const srcdata,
 }
 
 static inline void
-_box_blur_vert_rgba_step(const DATA32* restrict const srcdata,
+_box_blur_rgba_vert_step(const DATA32* restrict const srcdata,
                          DATA32* restrict const dstdata,
                          const int* restrict const radii,
                          const int len,
diff --git a/src/lib/evas/filters/blur/blur_box_rgba_i386.c b/src/lib/evas/filters/blur/blur_box_rgba_i386.c
new file mode 100644 (file)
index 0000000..7f0f76b
--- /dev/null
@@ -0,0 +1,25 @@
+#ifdef BUILD_MMX
+
+static inline void
+_box_blur_rgba_horiz_step_mmx(const DATA32* restrict const srcdata,
+                              DATA32* restrict const dstdata,
+                              const int* restrict const radii,
+                              const int len,
+                              const int loops)
+{
+   // TODO: implement optimized code here and remove the following line:
+   _box_blur_rgba_horiz_step(srcdata, dstdata, radii, len, loops);
+}
+
+static inline void
+_box_blur_rgba_vert_step_mmx(const DATA32* restrict const srcdata,
+                             DATA32* restrict const dstdata,
+                             const int* restrict const radii,
+                             const int len,
+                             const int loops)
+{
+   // TODO: implement optimized code here and remove the following line:
+   _box_blur_rgba_vert_step(srcdata, dstdata, radii, len, loops);
+}
+
+#endif
diff --git a/src/lib/evas/filters/blur/blur_box_rgba_neon.c b/src/lib/evas/filters/blur/blur_box_rgba_neon.c
new file mode 100644 (file)
index 0000000..9206df5
--- /dev/null
@@ -0,0 +1,25 @@
+#ifdef BUILD_NEON
+
+static inline void
+_box_blur_rgba_horiz_step_neon(const DATA32* restrict const srcdata,
+                               DATA32* restrict const dstdata,
+                               const int* restrict const radii,
+                               const int len,
+                               const int loops)
+{
+   // TODO: implement optimized code here and remove the following line:
+   _box_blur_rgba_horiz_step(srcdata, dstdata, radii, len, loops);
+}
+
+static inline void
+_box_blur_rgba_vert_step_neon(const DATA32* restrict const srcdata,
+                              DATA32* restrict const dstdata,
+                              const int* restrict const radii,
+                              const int len,
+                              const int loops)
+{
+   // TODO: implement optimized code here and remove the following line:
+   _box_blur_rgba_vert_step(srcdata, dstdata, radii, len, loops);
+}
+
+#endif
diff --git a/src/lib/evas/filters/blur/blur_box_rgba_sse3.c b/src/lib/evas/filters/blur/blur_box_rgba_sse3.c
new file mode 100644 (file)
index 0000000..03ae282
--- /dev/null
@@ -0,0 +1,25 @@
+#ifdef BUILD_SSE3
+
+static inline void
+_box_blur_rgba_horiz_step_sse3(const DATA32* restrict const srcdata,
+                               DATA32* restrict const dstdata,
+                               const int* restrict const radii,
+                               const int len,
+                               const int loops)
+{
+   // TODO: implement optimized code here and remove the following line:
+   _box_blur_rgba_horiz_step(srcdata, dstdata, radii, len, loops);
+}
+
+static inline void
+_box_blur_rgba_vert_step_sse3(const DATA32* restrict const srcdata,
+                              DATA32* restrict const dstdata,
+                              const int* restrict const radii,
+                              const int len,
+                              const int loops)
+{
+   // TODO: implement optimized code here and remove the following line:
+   _box_blur_rgba_vert_step(srcdata, dstdata, radii, len, loops);
+}
+
+#endif
index 955fc64..7af357f 100644 (file)
@@ -34,12 +34,45 @@ _box_blur_auto_radius(int *radii, int r)
 }
 
 #include "./blur/blur_box_rgba_.c"
+#ifdef BUILD_MMX
+#include "./blur/blur_box_rgba_i386.c"
+#endif
+#ifdef BUILD_SSE3
+#include "./blur/blur_box_rgba_sse3.c"
+#endif
+#ifdef BUILD_NEON
+#include "./blur/blur_box_rgba_neon.c"
+#endif
 
 static void
 _box_blur_horiz_rgba(DATA32 *src, DATA32 *dst, int* radii, int w, int h)
 {
    DEBUG_TIME_BEGIN();
-   _box_blur_horiz_rgba_step(src, dst, radii, w, h);
+
+#ifdef BUILD_SSE3
+   if (evas_common_cpu_has_feature(CPU_FEATURE_SSE3))
+     {
+        _box_blur_rgba_horiz_step_sse3(src, dst, radii, w, h);
+        goto end;
+     }
+#endif
+#ifdef BUILD_MMX
+   if (evas_common_cpu_has_feature(CPU_FEATURE_MMX))
+     {
+        _box_blur_rgba_horiz_step_mmx(src, dst, radii, w, h);
+        goto end;
+     }
+#endif
+#ifdef BUILD_NEON
+   if (evas_common_cpu_has_feature(CPU_FEATURE_NEON))
+     {
+        _box_blur_rgba_horiz_step_neon(src, dst, radii, w, h);
+        goto end;
+     }
+#endif
+   _box_blur_rgba_horiz_step(src, dst, radii, w, h);
+
+end:
    DEBUG_TIME_END();
 }
 
@@ -47,7 +80,31 @@ static void
 _box_blur_vert_rgba(DATA32 *src, DATA32 *dst, int* radii, int w, int h)
 {
    DEBUG_TIME_BEGIN();
-   _box_blur_vert_rgba_step(src, dst, radii, h, w);
+
+#ifdef BUILD_SSE3
+   if (evas_common_cpu_has_feature(CPU_FEATURE_SSE3))
+     {
+        _box_blur_rgba_vert_step_sse3(src, dst, radii, h, w);
+        goto end;
+     }
+#endif
+#ifdef BUILD_MMX
+   if (evas_common_cpu_has_feature(CPU_FEATURE_MMX))
+     {
+        _box_blur_rgba_vert_step_mmx(src, dst, radii, h, w);
+        goto end;
+     }
+#endif
+#ifdef BUILD_NEON
+   if (evas_common_cpu_has_feature(CPU_FEATURE_NEON))
+     {
+        _box_blur_rgba_vert_step_neon(src, dst, radii, h, w);
+        goto end;
+     }
+#endif
+   _box_blur_rgba_vert_step(src, dst, radii, h, w);
+
+end:
    DEBUG_TIME_END();
 }
 
@@ -110,12 +167,45 @@ _box_blur_vert_apply_rgba(Evas_Filter_Command *cmd)
 }
 
 #include "./blur/blur_box_alpha_.c"
+#ifdef BUILD_MMX
+#include "./blur/blur_box_alpha_i386.c"
+#endif
+#ifdef BUILD_SSE3
+#include "./blur/blur_box_alpha_sse3.c"
+#endif
+#ifdef BUILD_NEON
+#include "./blur/blur_box_alpha_neon.c"
+#endif
 
 static void
 _box_blur_horiz_alpha(DATA8 *src, DATA8 *dst, int* radii, int w, int h)
 {
    DEBUG_TIME_BEGIN();
+
+#ifdef BUILD_SSE3
+   if (evas_common_cpu_has_feature(CPU_FEATURE_SSE3))
+     {
+        _box_blur_alpha_horiz_step_sse3(src, dst, radii, w, h);
+        goto end;
+     }
+#endif
+#ifdef BUILD_MMX
+   if (evas_common_cpu_has_feature(CPU_FEATURE_MMX))
+     {
+        _box_blur_alpha_horiz_step_mmx(src, dst, radii, w, h);
+        goto end;
+     }
+#endif
+#ifdef BUILD_NEON
+   if (evas_common_cpu_has_feature(CPU_FEATURE_NEON))
+     {
+        _box_blur_alpha_horiz_step_neon(src, dst, radii, w, h);
+        goto end;
+     }
+#endif
    _box_blur_alpha_horiz_step(src, dst, radii, w, h);
+
+end:
    DEBUG_TIME_END();
 }
 
@@ -123,7 +213,31 @@ static void
 _box_blur_vert_alpha(DATA8 *src, DATA8 *dst, int* radii, int w, int h)
 {
    DEBUG_TIME_BEGIN();
+
+#ifdef BUILD_SSE3
+   if (evas_common_cpu_has_feature(CPU_FEATURE_SSE3))
+     {
+        _box_blur_alpha_vert_step_sse3(src, dst, radii, h, w);
+        goto end;
+     }
+#endif
+#ifdef BUILD_MMX
+   if (evas_common_cpu_has_feature(CPU_FEATURE_MMX))
+     {
+        _box_blur_alpha_vert_step_mmx(src, dst, radii, h, w);
+        goto end;
+     }
+#endif
+#ifdef BUILD_NEON
+   if (evas_common_cpu_has_feature(CPU_FEATURE_NEON))
+     {
+        _box_blur_alpha_vert_step_neon(src, dst, radii, h, w);
+        goto end;
+     }
+#endif
    _box_blur_alpha_vert_step(src, dst, radii, h, w);
+
+end:
    DEBUG_TIME_END();
 }