From cb6970038959d6263a6334be31f69c23c7685a6e Mon Sep 17 00:00:00 2001 From: Jean-Philippe Andre Date: Mon, 10 Mar 2014 18:36:28 +0900 Subject: [PATCH] Evas filters: Add optimizable blur function Prepare optimization paths for blur operations, as they are VERY costly. This simple change, when using gcc -O3 flag, boosts horizontal blur performance by > 50%, because STEP is 1 (and so, memory accesses, increments, etc... are all very simple) The objective is to have support for NEON, MMX, SSE, too, with runtime detection. --- src/Makefile_Evas.am | 2 + src/lib/evas/filters/blur/blur_gaussian_alpha_.c | 74 +++++++++++++++++++ src/lib/evas/filters/evas_filter_blur.c | 91 ++++-------------------- src/lib/evas/filters/evas_filter_private.h | 5 ++ 4 files changed, 96 insertions(+), 76 deletions(-) create mode 100644 src/lib/evas/filters/blur/blur_gaussian_alpha_.c diff --git a/src/Makefile_Evas.am b/src/Makefile_Evas.am index 41f7747..9dca181 100644 --- a/src/Makefile_Evas.am +++ b/src/Makefile_Evas.am @@ -441,6 +441,8 @@ lib/evas/filters/evas_filter_transform.c \ lib/evas/filters/evas_filter_utils.c \ lib/evas/filters/evas_filter_private.h +EXTRA_DIST += \ +lib/evas/filters/blur/blur_gaussian_alpha_.c ### Engines diff --git a/src/lib/evas/filters/blur/blur_gaussian_alpha_.c b/src/lib/evas/filters/blur/blur_gaussian_alpha_.c new file mode 100644 index 0000000..297ddb7 --- /dev/null +++ b/src/lib/evas/filters/blur/blur_gaussian_alpha_.c @@ -0,0 +1,74 @@ +/* Datatypes and MIN macro */ +#include "evas_common_private.h" +#include "../evas_filter_private.h" + +#if !defined (FUNCTION_NAME) || !defined (STEP) +# error Must define FUNCTION_NAME and STEP +#endif + +static inline void +FUNCTION_NAME(const DATA8* restrict srcdata, DATA8* restrict dstdata, + const int radius, const int len, + const int loops, const int loopstep, + const int* restrict weights, const int pow2_divider) +{ + int i, j, k, acc, divider; + const int diameter = 2 * radius + 1; + const int left = MIN(radius, len); + const int right = MIN(radius, (len - radius)); + const DATA8* restrict s; + const DATA8* restrict src; + DATA8* restrict dst; + + for (i = loops; i; --i) + { + src = srcdata; + dst = dstdata; + + // left + for (k = 0; k < left; k++, dst += STEP) + { + acc = 0; + divider = 0; + s = src; + for (j = 0; j <= k + radius; j++, s += STEP) + { + acc += (*s) * weights[j + radius - k]; + divider += weights[j + radius - k]; + } + //if (!divider) abort(); + *dst = acc / divider; + } + + // middle + for (k = radius; k < (len - radius); k++, src += STEP, dst += STEP) + { + acc = 0; + s = src; + for (j = 0; j < diameter; j++, s += STEP) + acc += (*s) * weights[j]; + *dst = acc >> pow2_divider; + } + + // right + for (k = 0; k < right; k++, dst += STEP, src += STEP) + { + acc = 0; + divider = 0; + s = src; + for (j = 0; j < 2 * radius - k; j++, s += STEP) + { + acc += (*s) * weights[j]; + divider += weights[j]; + } + //if (!divider) abort(); + *dst = acc / divider; + } + + dstdata += loopstep; + srcdata += loopstep; + } +} + +#undef FUNCTION_NAME +#undef STEP diff --git a/src/lib/evas/filters/evas_filter_blur.c b/src/lib/evas/filters/evas_filter_blur.c index 6d07813..b29654a 100644 --- a/src/lib/evas/filters/evas_filter_blur.c +++ b/src/lib/evas/filters/evas_filter_blur.c @@ -380,62 +380,6 @@ _sin_blur_weights_get(int *weights, int *pow2_divider, int radius) } static void -_gaussian_blur_step_alpha(DATA8 *src, DATA8 *dst, int radius, int len, int step, - int *weights, int pow2_divider) -{ - int j, k, acc, divider; - DATA8 *s = src; - const int diameter = 2 * radius + 1; - int left = MIN(radius, len); - int right = MIN(radius, (len - radius)); - - // left - for (k = 0; k < left; k++, dst += step) - { - acc = 0; - divider = 0; - s = src; - for (j = 0; j <= k + radius; j++, s += step) - { - acc += (*s) * weights[j + radius - k]; - divider += weights[j + radius - k]; - } - if (!divider) goto div_zero; - *dst = acc / divider; - } - - // middle - for (k = radius; k < (len - radius); k++, src += step, dst += step) - { - acc = 0; - s = src; - for (j = 0; j < diameter; j++, s += step) - acc += (*s) * weights[j]; - *dst = acc >> pow2_divider; - } - - // right - for (k = 0; k < right; k++, dst += step, src += step) - { - acc = 0; - divider = 0; - s = src; - for (j = 0; j < 2 * radius - k; j++, s += step) - { - acc += (*s) * weights[j]; - divider += weights[j]; - } - if (!divider) goto div_zero; - *dst = acc / divider; - } - - return; - -div_zero: - CRI("Division by zero avoided! Something is very wrong here!"); -} - -static void _gaussian_blur_step_rgba(DATA32 *src, DATA32 *dst, int radius, int len, int step, int *weights, int pow2_divider) { @@ -511,45 +455,40 @@ div_zero: CRI("Division by zero avoided! Something is very wrong here!"); } +#define FUNCTION_NAME _gaussian_blur_horiz_alpha_step +#define STEP 1 +#include "./blur/blur_gaussian_alpha_.c" + static void -_gaussian_blur_horiz_alpha(DATA8 *src, DATA8 *dst, int radius, int w, int h) +_gaussian_blur_horiz_alpha(const DATA8 *src, DATA8 *dst, int radius, int w, int h) { int *weights; - int k, pow2_div = 0; + int pow2_div = 0; weights = alloca((2 * radius + 1) * sizeof(int)); _sin_blur_weights_get(weights, &pow2_div, radius); DEBUG_TIME_BEGIN(); - - for (k = h; k; k--) - { - _gaussian_blur_step_alpha(src, dst, radius, w, 1, weights, pow2_div); - dst += w; - src += w; - } - + _gaussian_blur_horiz_alpha_step(src, dst, radius, w, h, w, weights, pow2_div); DEBUG_TIME_END(); } +// w steps, loops = w --> STEP = loops +#define FUNCTION_NAME _gaussian_blur_vert_alpha_step +#define STEP loops +#include "./blur/blur_gaussian_alpha_.c" + static void -_gaussian_blur_vert_alpha(DATA8 *src, DATA8 *dst, int radius, int w, int h) +_gaussian_blur_vert_alpha(const DATA8 *src, DATA8 *dst, int radius, int w, int h) { int *weights; - int k, pow2_div = 0; + int pow2_div = 0; weights = alloca((2 * radius + 1) * sizeof(int)); _sin_blur_weights_get(weights, &pow2_div, radius); DEBUG_TIME_BEGIN(); - - for (k = w; k; k--) - { - _gaussian_blur_step_alpha(src, dst, radius, h, w, weights, pow2_div); - dst += 1; - src += 1; - } - + _gaussian_blur_vert_alpha_step(src, dst, radius, h, w, 1, weights, pow2_div); DEBUG_TIME_END(); } diff --git a/src/lib/evas/filters/evas_filter_private.h b/src/lib/evas/filters/evas_filter_private.h index 6be8b63..45cc5c2 100644 --- a/src/lib/evas/filters/evas_filter_private.h +++ b/src/lib/evas/filters/evas_filter_private.h @@ -29,6 +29,11 @@ #define GREEN_OF(a) (((a) >> 8) & 0xff) #define BLUE_OF(a) ((a) & 0xff) +// The 'restrict' keyword is part of C99 +#if __STDC_VERSION__ < 199901L +# define restrict +#endif + // Helpers #define ENFN ctx->evas->engine.func #define ENDT ctx->evas->engine.data.output -- 2.7.4