From 634034af4615234fc017f71a2d7c67b1b1745800 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Andre Date: Tue, 25 Mar 2014 16:54:57 +0900 Subject: [PATCH] Evas filters: Add template optimizable files for box blur The new files (i386, sse3 and neon) are basically empty and fallback to the C version. This is just to pave the way for full low-level optimization... if someone has the time and skills to do it :) Add both Alpha and RGBA template files. --- src/Makefile_Evas.am | 9 +- src/lib/evas/filters/blur/blur_box_alpha_i386.c | 25 +++++ src/lib/evas/filters/blur/blur_box_alpha_neon.c | 25 +++++ src/lib/evas/filters/blur/blur_box_alpha_sse3.c | 25 +++++ src/lib/evas/filters/blur/blur_box_rgba_.c | 4 +- src/lib/evas/filters/blur/blur_box_rgba_i386.c | 25 +++++ src/lib/evas/filters/blur/blur_box_rgba_neon.c | 25 +++++ src/lib/evas/filters/blur/blur_box_rgba_sse3.c | 25 +++++ src/lib/evas/filters/evas_filter_blur.c | 118 +++++++++++++++++++++++- 9 files changed, 276 insertions(+), 5 deletions(-) create mode 100644 src/lib/evas/filters/blur/blur_box_alpha_i386.c create mode 100644 src/lib/evas/filters/blur/blur_box_alpha_neon.c create mode 100644 src/lib/evas/filters/blur/blur_box_alpha_sse3.c create mode 100644 src/lib/evas/filters/blur/blur_box_rgba_i386.c create mode 100644 src/lib/evas/filters/blur/blur_box_rgba_neon.c create mode 100644 src/lib/evas/filters/blur/blur_box_rgba_sse3.c diff --git a/src/Makefile_Evas.am b/src/Makefile_Evas.am index 540d08d..34d3315 100644 --- a/src/Makefile_Evas.am +++ b/src/Makefile_Evas.am @@ -496,7 +496,14 @@ EXTRA_DIST += \ lib/evas/filters/blur/blur_gaussian_alpha_.c \ lib/evas/filters/blur/blur_gaussian_rgba_.c \ lib/evas/filters/blur/blur_box_alpha_.c \ -lib/evas/filters/blur/blur_box_rgba_.c +lib/evas/filters/blur/blur_box_alpha_i386.c \ +lib/evas/filters/blur/blur_box_alpha_sse3.c \ +lib/evas/filters/blur/blur_box_alpha_neon.c \ +lib/evas/filters/blur/blur_box_rgba_.c \ +lib/evas/filters/blur/blur_box_rgba_i386.c \ +lib/evas/filters/blur/blur_box_rgba_sse3.c \ +lib/evas/filters/blur/blur_box_rgba_neon.c + ### Engines diff --git a/src/lib/evas/filters/blur/blur_box_alpha_i386.c b/src/lib/evas/filters/blur/blur_box_alpha_i386.c new file mode 100644 index 0000000..1664431 --- /dev/null +++ b/src/lib/evas/filters/blur/blur_box_alpha_i386.c @@ -0,0 +1,25 @@ +#ifdef BUILD_MMX + +static inline void +_box_blur_alpha_horiz_step_mmx(const DATA8* restrict const srcdata, + DATA8* restrict const dstdata, + const int* restrict const radii, + const int len, + const int loops) +{ + // TODO: implement optimized code here and remove the following line: + _box_blur_alpha_horiz_step(srcdata, dstdata, radii, len, loops); +} + +static inline void +_box_blur_alpha_vert_step_mmx(const DATA8* restrict const srcdata, + DATA8* restrict const dstdata, + const int* restrict const radii, + const int len, + const int loops) +{ + // TODO: implement optimized code here and remove the following line: + _box_blur_alpha_vert_step(srcdata, dstdata, radii, len, loops); +} + +#endif diff --git a/src/lib/evas/filters/blur/blur_box_alpha_neon.c b/src/lib/evas/filters/blur/blur_box_alpha_neon.c new file mode 100644 index 0000000..b8d9524 --- /dev/null +++ b/src/lib/evas/filters/blur/blur_box_alpha_neon.c @@ -0,0 +1,25 @@ +#ifdef BUILD_NEON + +static inline void +_box_blur_alpha_horiz_step_neon(const DATA8* restrict const srcdata, + DATA8* restrict const dstdata, + const int* restrict const radii, + const int len, + const int loops) +{ + // TODO: implement optimized code here and remove the following line: + _box_blur_alpha_horiz_step(srcdata, dstdata, radii, len, loops); +} + +static inline void +_box_blur_alpha_vert_step_neon(const DATA8* restrict const srcdata, + DATA8* restrict const dstdata, + const int* restrict const radii, + const int len, + const int loops) +{ + // TODO: implement optimized code here and remove the following line: + _box_blur_alpha_vert_step(srcdata, dstdata, radii, len, loops); +} + +#endif diff --git a/src/lib/evas/filters/blur/blur_box_alpha_sse3.c b/src/lib/evas/filters/blur/blur_box_alpha_sse3.c new file mode 100644 index 0000000..17a19f1 --- /dev/null +++ b/src/lib/evas/filters/blur/blur_box_alpha_sse3.c @@ -0,0 +1,25 @@ +#ifdef BUILD_SSE3 + +static inline void +_box_blur_alpha_horiz_step_sse3(const DATA8* restrict const srcdata, + DATA8* restrict const dstdata, + const int* restrict const radii, + const int len, + const int loops) +{ + // TODO: implement optimized code here and remove the following line: + _box_blur_alpha_horiz_step(srcdata, dstdata, radii, len, loops); +} + +static inline void +_box_blur_alpha_vert_step_sse3(const DATA8* restrict const srcdata, + DATA8* restrict const dstdata, + const int* restrict const radii, + const int len, + const int loops) +{ + // TODO: implement optimized code here and remove the following line: + _box_blur_alpha_vert_step(srcdata, dstdata, radii, len, loops); +} + +#endif diff --git a/src/lib/evas/filters/blur/blur_box_rgba_.c b/src/lib/evas/filters/blur/blur_box_rgba_.c index 7c22c65..606f571 100644 --- a/src/lib/evas/filters/blur/blur_box_rgba_.c +++ b/src/lib/evas/filters/blur/blur_box_rgba_.c @@ -8,7 +8,7 @@ #include "../evas_filter_private.h" static inline void -_box_blur_horiz_rgba_step(const DATA32* restrict const srcdata, +_box_blur_rgba_horiz_step(const DATA32* restrict const srcdata, DATA32* restrict const dstdata, const int* restrict const radii, const int len, @@ -149,7 +149,7 @@ _box_blur_horiz_rgba_step(const DATA32* restrict const srcdata, } static inline void -_box_blur_vert_rgba_step(const DATA32* restrict const srcdata, +_box_blur_rgba_vert_step(const DATA32* restrict const srcdata, DATA32* restrict const dstdata, const int* restrict const radii, const int len, diff --git a/src/lib/evas/filters/blur/blur_box_rgba_i386.c b/src/lib/evas/filters/blur/blur_box_rgba_i386.c new file mode 100644 index 0000000..7f0f76b --- /dev/null +++ b/src/lib/evas/filters/blur/blur_box_rgba_i386.c @@ -0,0 +1,25 @@ +#ifdef BUILD_MMX + +static inline void +_box_blur_rgba_horiz_step_mmx(const DATA32* restrict const srcdata, + DATA32* restrict const dstdata, + const int* restrict const radii, + const int len, + const int loops) +{ + // TODO: implement optimized code here and remove the following line: + _box_blur_rgba_horiz_step(srcdata, dstdata, radii, len, loops); +} + +static inline void +_box_blur_rgba_vert_step_mmx(const DATA32* restrict const srcdata, + DATA32* restrict const dstdata, + const int* restrict const radii, + const int len, + const int loops) +{ + // TODO: implement optimized code here and remove the following line: + _box_blur_rgba_vert_step(srcdata, dstdata, radii, len, loops); +} + +#endif diff --git a/src/lib/evas/filters/blur/blur_box_rgba_neon.c b/src/lib/evas/filters/blur/blur_box_rgba_neon.c new file mode 100644 index 0000000..9206df5 --- /dev/null +++ b/src/lib/evas/filters/blur/blur_box_rgba_neon.c @@ -0,0 +1,25 @@ +#ifdef BUILD_NEON + +static inline void +_box_blur_rgba_horiz_step_neon(const DATA32* restrict const srcdata, + DATA32* restrict const dstdata, + const int* restrict const radii, + const int len, + const int loops) +{ + // TODO: implement optimized code here and remove the following line: + _box_blur_rgba_horiz_step(srcdata, dstdata, radii, len, loops); +} + +static inline void +_box_blur_rgba_vert_step_neon(const DATA32* restrict const srcdata, + DATA32* restrict const dstdata, + const int* restrict const radii, + const int len, + const int loops) +{ + // TODO: implement optimized code here and remove the following line: + _box_blur_rgba_vert_step(srcdata, dstdata, radii, len, loops); +} + +#endif diff --git a/src/lib/evas/filters/blur/blur_box_rgba_sse3.c b/src/lib/evas/filters/blur/blur_box_rgba_sse3.c new file mode 100644 index 0000000..03ae282 --- /dev/null +++ b/src/lib/evas/filters/blur/blur_box_rgba_sse3.c @@ -0,0 +1,25 @@ +#ifdef BUILD_SSE3 + +static inline void +_box_blur_rgba_horiz_step_sse3(const DATA32* restrict const srcdata, + DATA32* restrict const dstdata, + const int* restrict const radii, + const int len, + const int loops) +{ + // TODO: implement optimized code here and remove the following line: + _box_blur_rgba_horiz_step(srcdata, dstdata, radii, len, loops); +} + +static inline void +_box_blur_rgba_vert_step_sse3(const DATA32* restrict const srcdata, + DATA32* restrict const dstdata, + const int* restrict const radii, + const int len, + const int loops) +{ + // TODO: implement optimized code here and remove the following line: + _box_blur_rgba_vert_step(srcdata, dstdata, radii, len, loops); +} + +#endif diff --git a/src/lib/evas/filters/evas_filter_blur.c b/src/lib/evas/filters/evas_filter_blur.c index 955fc64..7af357f 100644 --- a/src/lib/evas/filters/evas_filter_blur.c +++ b/src/lib/evas/filters/evas_filter_blur.c @@ -34,12 +34,45 @@ _box_blur_auto_radius(int *radii, int r) } #include "./blur/blur_box_rgba_.c" +#ifdef BUILD_MMX +#include "./blur/blur_box_rgba_i386.c" +#endif +#ifdef BUILD_SSE3 +#include "./blur/blur_box_rgba_sse3.c" +#endif +#ifdef BUILD_NEON +#include "./blur/blur_box_rgba_neon.c" +#endif static void _box_blur_horiz_rgba(DATA32 *src, DATA32 *dst, int* radii, int w, int h) { DEBUG_TIME_BEGIN(); - _box_blur_horiz_rgba_step(src, dst, radii, w, h); + +#ifdef BUILD_SSE3 + if (evas_common_cpu_has_feature(CPU_FEATURE_SSE3)) + { + _box_blur_rgba_horiz_step_sse3(src, dst, radii, w, h); + goto end; + } +#endif +#ifdef BUILD_MMX + if (evas_common_cpu_has_feature(CPU_FEATURE_MMX)) + { + _box_blur_rgba_horiz_step_mmx(src, dst, radii, w, h); + goto end; + } +#endif +#ifdef BUILD_NEON + if (evas_common_cpu_has_feature(CPU_FEATURE_NEON)) + { + _box_blur_rgba_horiz_step_neon(src, dst, radii, w, h); + goto end; + } +#endif + _box_blur_rgba_horiz_step(src, dst, radii, w, h); + +end: DEBUG_TIME_END(); } @@ -47,7 +80,31 @@ static void _box_blur_vert_rgba(DATA32 *src, DATA32 *dst, int* radii, int w, int h) { DEBUG_TIME_BEGIN(); - _box_blur_vert_rgba_step(src, dst, radii, h, w); + +#ifdef BUILD_SSE3 + if (evas_common_cpu_has_feature(CPU_FEATURE_SSE3)) + { + _box_blur_rgba_vert_step_sse3(src, dst, radii, h, w); + goto end; + } +#endif +#ifdef BUILD_MMX + if (evas_common_cpu_has_feature(CPU_FEATURE_MMX)) + { + _box_blur_rgba_vert_step_mmx(src, dst, radii, h, w); + goto end; + } +#endif +#ifdef BUILD_NEON + if (evas_common_cpu_has_feature(CPU_FEATURE_NEON)) + { + _box_blur_rgba_vert_step_neon(src, dst, radii, h, w); + goto end; + } +#endif + _box_blur_rgba_vert_step(src, dst, radii, h, w); + +end: DEBUG_TIME_END(); } @@ -110,12 +167,45 @@ _box_blur_vert_apply_rgba(Evas_Filter_Command *cmd) } #include "./blur/blur_box_alpha_.c" +#ifdef BUILD_MMX +#include "./blur/blur_box_alpha_i386.c" +#endif +#ifdef BUILD_SSE3 +#include "./blur/blur_box_alpha_sse3.c" +#endif +#ifdef BUILD_NEON +#include "./blur/blur_box_alpha_neon.c" +#endif static void _box_blur_horiz_alpha(DATA8 *src, DATA8 *dst, int* radii, int w, int h) { DEBUG_TIME_BEGIN(); + +#ifdef BUILD_SSE3 + if (evas_common_cpu_has_feature(CPU_FEATURE_SSE3)) + { + _box_blur_alpha_horiz_step_sse3(src, dst, radii, w, h); + goto end; + } +#endif +#ifdef BUILD_MMX + if (evas_common_cpu_has_feature(CPU_FEATURE_MMX)) + { + _box_blur_alpha_horiz_step_mmx(src, dst, radii, w, h); + goto end; + } +#endif +#ifdef BUILD_NEON + if (evas_common_cpu_has_feature(CPU_FEATURE_NEON)) + { + _box_blur_alpha_horiz_step_neon(src, dst, radii, w, h); + goto end; + } +#endif _box_blur_alpha_horiz_step(src, dst, radii, w, h); + +end: DEBUG_TIME_END(); } @@ -123,7 +213,31 @@ static void _box_blur_vert_alpha(DATA8 *src, DATA8 *dst, int* radii, int w, int h) { DEBUG_TIME_BEGIN(); + +#ifdef BUILD_SSE3 + if (evas_common_cpu_has_feature(CPU_FEATURE_SSE3)) + { + _box_blur_alpha_vert_step_sse3(src, dst, radii, h, w); + goto end; + } +#endif +#ifdef BUILD_MMX + if (evas_common_cpu_has_feature(CPU_FEATURE_MMX)) + { + _box_blur_alpha_vert_step_mmx(src, dst, radii, h, w); + goto end; + } +#endif +#ifdef BUILD_NEON + if (evas_common_cpu_has_feature(CPU_FEATURE_NEON)) + { + _box_blur_alpha_vert_step_neon(src, dst, radii, h, w); + goto end; + } +#endif _box_blur_alpha_vert_step(src, dst, radii, h, w); + +end: DEBUG_TIME_END(); } -- 2.7.4