Evas filters: Optimize RGBA blur as well
authorJean-Philippe Andre <jp.andre@samsung.com>
Wed, 12 Mar 2014 04:55:44 +0000 (13:55 +0900)
committerJean-Philippe Andre <jp.andre@samsung.com>
Wed, 12 Mar 2014 05:08:02 +0000 (14:08 +0900)
Same as Alpha blur, use combination of box blurs,
and put all that code into optimizable functions.

src/lib/evas/filters/blur/blur_box_alpha_.c
src/lib/evas/filters/blur/blur_box_rgba_.c
src/lib/evas/filters/evas_filter.c
src/lib/evas/filters/evas_filter_blur.c

index 4a9facd..71ac943 100644 (file)
@@ -1,6 +1,7 @@
 /* @file blur_box_alpha_.c
- * Defines the following function:
- * _box_blur_alpha_step
+ * Defines the following functions:
+ * _box_blur_alpha_horiz_step
+ * _box_blur_alpha_vert_step
  */
 
 #include "evas_common_private.h"
index b930d6e..95d381a 100644 (file)
 #include "evas_common_private.h"
 #include "../evas_filter_private.h"
 
-#if !defined (FUNCTION_NAME) || !defined (STEP)
-# error Must define FUNCTION_NAME and STEP
-#endif
-
 static inline void
-FUNCTION_NAME(const DATA32* restrict src, DATA32* restrict dst,
-              const int radius, const int len,
-              const int loops, const int loopstep)
+_box_blur_horiz_rgba_step(const DATA32* restrict const srcdata,
+                          DATA32* restrict const dstdata,
+                          const int* restrict const radii,
+                          const int len,
+                          const int loops)
 {
-   DEFINE_DIVIDER(2 * radius + 1);
-   const int left = MIN(radius, len);
-   const int right = MIN(radius, (len - radius));
+   const DATA32* restrict src;
+   DATA32* restrict dst;
+   DATA32* restrict span1;
+   DATA32* restrict span2;
+
+#if DIV_USING_BITSHIFT
+   int pow2_shifts[6] = {0};
+   int numerators[6] = {0};
+   for (int run = 0; radii[run]; run++)
+     {
+        const int div = radii[run] * 2 + 1;
+        pow2_shifts[run] = evas_filter_smallest_pow2_larger_than(div << 10);
+        numerators[run] = (1 << pow2_shifts[run]) / (div);
+     }
+#endif
 
-   for (int l = loops; l; --l)
+   span1 = alloca(len * sizeof(DATA32));
+   span2 = alloca(len * sizeof(DATA32));
+
+   // For each line, apply as many blurs as requested
+   for (int l = 0; l < loops; l++)
      {
-        int acc[4] = {0};
-        int x, k;
-        int divider;
+        int run;
 
-        const DATA8* restrict sl = (DATA8 *) src;
-        const DATA8* restrict sr = (DATA8 *) src;
-        DATA8* restrict d = (DATA8 *) dst;
+        // New line: reset source & destination pointers
+        src = srcdata + len * l;
+        if (!radii[1]) // Only one run
+          dst = dstdata + len * l;
+        else
+          dst = span1;
 
-        // Read-ahead
-        for (x = left; x; x--)
+        // Apply blur with current radius
+        for (run = 0; radii[run]; run++)
           {
-             for (k = 0; k < 4; k++)
-               acc[k] += sr[k];
-             sr += STEP;
+             const int radius = radii[run];
+             const int left = MIN(radius, len);
+             const int right = MIN(radius, (len - radius));
+
+#if DIV_USING_BITSHIFT
+             const int pow2 = pow2_shifts[run];
+             const int numerator = numerators[run];
+#else
+             const int divider = 2 * radius + 1;
+#endif
+
+             const DATA8* restrict sl = (DATA8 *) src;
+             const DATA8* restrict sr = (DATA8 *) src;
+             DATA8* restrict d = (DATA8 *) dst;
+             int acc[4] = {0};
+             int x, k;
+
+             // Read-ahead
+             for (x = left; x; x--)
+               {
+                  for (k = 0; k < 4; k++)
+                    acc[k] += sr[k];
+                  sr += sizeof(DATA32);
+               }
+
+             // Left
+             for (x = 0; x < left; x++)
+               {
+                  for (k = 0; k < 4; k++)
+                    acc[k] += sr[k];
+                  sr += sizeof(DATA32);
+
+                  const int divider = x + left + 1;
+                  d[ALPHA] = acc[ALPHA] / divider;
+                  d[RED]   = acc[RED]   / divider;
+                  d[GREEN] = acc[GREEN] / divider;
+                  d[BLUE]  = acc[BLUE]  / divider;
+                  d += sizeof(DATA32);
+               }
+
+             // Main part
+             for (x = len - (2 * radius); x > 0; x--)
+               {
+                  for (k = 0; k < 4; k++)
+                    acc[k] += sr[k];
+                  sr += sizeof(DATA32);
+
+                  d[ALPHA] = DIVIDE(acc[ALPHA]);
+                  d[RED]   = DIVIDE(acc[RED]);
+                  d[GREEN] = DIVIDE(acc[GREEN]);
+                  d[BLUE]  = DIVIDE(acc[BLUE]);
+                  d += sizeof(DATA32);
+
+                  for (k = 0; k < 4; k++)
+                    acc[k] -= sl[k];
+                  sl += sizeof(DATA32);
+               }
+
+             // Right part
+             for (x = right; x; x--)
+               {
+                  const int divider = x + right;
+                  d[ALPHA] = acc[ALPHA] / divider;
+                  d[RED]   = acc[RED]   / divider;
+                  d[GREEN] = acc[GREEN] / divider;
+                  d[BLUE]  = acc[BLUE]  / divider;
+                  d += sizeof(DATA32);
+
+                  for (k = 0; k < 4; k++)
+                    acc[k] -= sl[k];
+                  sl += sizeof(DATA32);
+               }
+
+             // More runs to go: swap spans
+             if (radii[run + 1])
+               {
+                  src = dst;
+                  if (radii[run + 2])
+                    {
+                       // Two more runs: swap
+                       DATA32* swap = span1;
+                       span1 = span2;
+                       span2 = swap;
+                       dst = span1;
+                    }
+                  else
+                    {
+                       // Last run: write directly to dstdata
+                       dst = dstdata + len * l;
+                    }
+               }
           }
+     }
+}
+
+static inline void
+_box_blur_vert_rgba_step(const DATA32* restrict const srcdata,
+                         DATA32* restrict const dstdata,
+                         const int* restrict const radii,
+                         const int len,
+                         const int loops)
+{
+   /* Note: This function tries to optimize cache hits by working on
+    * contiguous horizontal spans.
+    */
 
-        // Left
-        for (x = 0; x < left; x++)
+   const int step = loops;
+   DATA32* restrict src;
+   DATA32* restrict dst;
+   DATA32* restrict span1;
+   DATA32* restrict span2;
+
+#if DIV_USING_BITSHIFT
+   int pow2_shifts[6] = {0};
+   int numerators[6] = {0};
+   for (int run = 0; radii[run]; run++)
+     {
+        const int div = radii[run] * 2 + 1;
+        pow2_shifts[run] = evas_filter_smallest_pow2_larger_than(div << 10);
+        numerators[run] = (1 << pow2_shifts[run]) / (div);
+     }
+#endif
+
+   span1 = alloca(len * sizeof(DATA32));
+   span2 = alloca(len * sizeof(DATA32));
+
+   // For each line, apply as many blurs as requested
+   for (int l = 0; l < loops; l++)
+     {
+        int run;
+
+        // Rotate input into work span
+        const DATA32* srcptr = srcdata + l;
+        DATA32* s = span1;
+        for (int k = len; k; --k)
           {
-             for (k = 0; k < 4; k++)
-               acc[k] += sr[k];
-             sr += STEP;
-
-             divider = x + left + 1;
-             d[ALPHA] = acc[ALPHA] / divider;
-             d[RED]   = acc[RED]   / divider;
-             d[GREEN] = acc[GREEN] / divider;
-             d[BLUE]  = acc[BLUE]  / divider;
-             d += STEP;
+             *s++ = *srcptr;
+             srcptr += step;
           }
 
-        // Main part
-        for (x = len - (2 * radius); x > 0; x--)
+        src = span1;
+        dst = span2;
+
+        // Apply blur with current radius
+        for (run = 0; radii[run]; run++)
           {
-             for (k = 0; k < 4; k++)
-               acc[k] += sr[k];
-             sr += STEP;
-
-             d[ALPHA] = DIVIDE(acc[ALPHA]);
-             d[RED]   = DIVIDE(acc[RED]);
-             d[GREEN] = DIVIDE(acc[GREEN]);
-             d[BLUE]  = DIVIDE(acc[BLUE]);
-             d += STEP;
-
-             for (k = 0; k < 4; k++)
-               acc[k] -= sl[k];
-             sl += STEP;
+             const int radius = radii[run];
+             const int left = MIN(radius, len);
+             const int right = MIN(radius, (len - radius));
+
+#if DIV_USING_BITSHIFT
+             const int pow2 = pow2_shifts[run];
+             const int numerator = numerators[run];
+#else
+             const int divider = 2 * radius + 1;
+#endif
+
+             const DATA8* restrict sl = (DATA8 *) src;
+             const DATA8* restrict sr = (DATA8 *) src;
+             DATA8* restrict d = (DATA8 *) dst;
+             int acc[4] = {0};
+             int x, k;
+
+             // Read-ahead
+             for (x = left; x; x--)
+               {
+                  for (k = 0; k < 4; k++)
+                    acc[k] += sr[k];
+                  sr += sizeof(DATA32);
+               }
+
+             // Left
+             for (x = 0; x < left; x++)
+               {
+                  for (k = 0; k < 4; k++)
+                    acc[k] += sr[k];
+                  sr += sizeof(DATA32);
+
+                  const int divider = x + left + 1;
+                  d[ALPHA] = acc[ALPHA] / divider;
+                  d[RED]   = acc[RED]   / divider;
+                  d[GREEN] = acc[GREEN] / divider;
+                  d[BLUE]  = acc[BLUE]  / divider;
+                  d += sizeof(DATA32);
+               }
+
+             // Main part
+             for (x = len - (2 * radius); x > 0; x--)
+               {
+                  for (k = 0; k < 4; k++)
+                    acc[k] += sr[k];
+                  sr += sizeof(DATA32);
+
+                  d[ALPHA] = DIVIDE(acc[ALPHA]);
+                  d[RED]   = DIVIDE(acc[RED]);
+                  d[GREEN] = DIVIDE(acc[GREEN]);
+                  d[BLUE]  = DIVIDE(acc[BLUE]);
+                  d += sizeof(DATA32);
+
+                  for (k = 0; k < 4; k++)
+                    acc[k] -= sl[k];
+                  sl += sizeof(DATA32);
+               }
+
+             // Right part
+             for (x = right; x; x--)
+               {
+                  const int divider = x + right;
+                  d[ALPHA] = acc[ALPHA] / divider;
+                  d[RED]   = acc[RED]   / divider;
+                  d[GREEN] = acc[GREEN] / divider;
+                  d[BLUE]  = acc[BLUE]  / divider;
+                  d += sizeof(DATA32);
+
+                  for (k = 0; k < 4; k++)
+                    acc[k] -= sl[k];
+                  sl += sizeof(DATA32);
+               }
+
+             // More runs to go: swap spans
+             if (radii[run + 1])
+               {
+                  DATA32* swap = src;
+                  src = dst;
+                  dst = swap;
+               }
           }
 
-        // Right part
-        for (x = right; x; x--)
+        // Last run: rotate & copy back to destination
+        DATA32* restrict dstptr = dstdata + l;
+        for (int k = len; k; --k)
           {
-             divider = x + right;
-             d[ALPHA] = acc[ALPHA] / divider;
-             d[RED]   = acc[RED]   / divider;
-             d[GREEN] = acc[GREEN] / divider;
-             d[BLUE]  = acc[BLUE]  / divider;
-             d += STEP;
-
-             for (k = 0; k < 4; k++)
-               acc[k] -= sl[k];
-             sl += STEP;
+             *dstptr = *dst++;
+             dstptr += step;
           }
-
-        src += loopstep;
-        dst += loopstep;
      }
 }
-
-#undef FUNCTION_NAME
-#undef STEP
index 67006a2..5b59fcc 100644 (file)
@@ -915,6 +915,20 @@ evas_filter_command_blur_add(Evas_Filter_Context *ctx, void *drawctx,
    if (dy < 0) dy = 0;
    if (!dx && !dy) goto fail;
 
+   in = _filter_buffer_get(ctx, inbuf);
+   if (!in)
+     {
+        ERR("Buffer %d does not exist [input].", inbuf);
+        goto fail;
+     }
+
+   out = _filter_buffer_get(ctx, outbuf);
+   if (!out)
+     {
+        ERR("Buffer %d does not exist [output].", outbuf);
+        goto fail;
+     }
+
    switch (type)
      {
       case EVAS_FILTER_BLUR_GAUSSIAN:
@@ -926,7 +940,6 @@ evas_filter_command_blur_add(Evas_Filter_Context *ctx, void *drawctx,
         break;
 
       case EVAS_FILTER_BLUR_DEFAULT:
-        count = 1;
 
         /* In DEFAULT mode we cheat, depending on the size of the kernel:
          * For 1px to 2px, use true Gaussian blur.
@@ -938,6 +951,7 @@ evas_filter_command_blur_add(Evas_Filter_Context *ctx, void *drawctx,
          * needed, of course!
          */
         {
+           const Eina_Bool alpha = in->alpha_only;
            int tmp_out = outbuf;
            int tmp_in = inbuf;
            int tmp_ox = ox;
@@ -946,7 +960,7 @@ evas_filter_command_blur_add(Evas_Filter_Context *ctx, void *drawctx,
            id = -1;
            if (dx && dy)
              {
-                tmp = evas_filter_temporary_buffer_get(ctx, 0, 0, EINA_TRUE);
+                tmp = evas_filter_temporary_buffer_get(ctx, 0, 0, alpha);
                 if (!tmp) goto fail;
                 tmp_in = tmp_out = tmp->id;
                 tmp_ox = tmp_oy = 0;
@@ -989,20 +1003,6 @@ evas_filter_command_blur_add(Evas_Filter_Context *ctx, void *drawctx,
         goto fail;
      }
 
-   in = _filter_buffer_get(ctx, inbuf);
-   if (!in)
-     {
-        ERR("Buffer %d does not exist [input].", inbuf);
-        goto fail;
-     }
-
-   out = _filter_buffer_get(ctx, outbuf);
-   if (!out)
-     {
-        ERR("Buffer %d does not exist [output].", outbuf);
-        goto fail;
-     }
-
    if (!in->alpha_only && out->alpha_only)
      {
         ERR("Output and input don't have the same format");
index 2220c9d..36a06e1 100644 (file)
@@ -33,27 +33,21 @@ _box_blur_auto_radius(int *radii, int r)
      }
 }
 
-#define FUNCTION_NAME _box_blur_horiz_rgba_step
-#define STEP (sizeof(DATA32))
 #include "./blur/blur_box_rgba_.c"
 
 static void
-_box_blur_horiz_rgba(DATA32 *src, DATA32 *dst, int radius, int w, int h)
+_box_blur_horiz_rgba(DATA32 *src, DATA32 *dst, int* radii, int w, int h)
 {
    DEBUG_TIME_BEGIN();
-   _box_blur_horiz_rgba_step(src, dst, radius, w, h, w);
+   _box_blur_horiz_rgba_step(src, dst, radii, w, h);
    DEBUG_TIME_END();
 }
 
-#define FUNCTION_NAME _box_blur_vert_rgba_step
-#define STEP (loops * sizeof(DATA32))
-#include "./blur/blur_box_rgba_.c"
-
 static void
-_box_blur_vert_rgba(DATA32 *src, DATA32 *dst, int radius, int w, int h)
+_box_blur_vert_rgba(DATA32 *src, DATA32 *dst, int* radii, int w, int h)
 {
    DEBUG_TIME_BEGIN();
-   _box_blur_vert_rgba_step(src, dst, radius, h, w, 1);
+   _box_blur_vert_rgba_step(src, dst, radii, h, w);
    DEBUG_TIME_END();
 }
 
@@ -61,6 +55,7 @@ static Eina_Bool
 _box_blur_horiz_apply_rgba(Evas_Filter_Command *cmd)
 {
    RGBA_Image *in, *out;
+   int radii[7] = {0};
    unsigned int r;
 
    EINA_SAFETY_ON_NULL_RETURN_VAL(cmd, EINA_FALSE);
@@ -71,11 +66,16 @@ _box_blur_horiz_apply_rgba(Evas_Filter_Command *cmd)
    in = cmd->input->backing;
    out = cmd->output->backing;
 
+   if (cmd->blur.auto_count)
+     _box_blur_auto_radius(radii, r);
+   else for (int k = 0; k < cmd->blur.count; k++)
+     radii[k] = r;
+
    EINA_SAFETY_ON_NULL_RETURN_VAL(in->image.data, EINA_FALSE);
    EINA_SAFETY_ON_NULL_RETURN_VAL(out->image.data, EINA_FALSE);
    EINA_SAFETY_ON_FALSE_RETURN_VAL(out->cache_entry.w >= (2*r + 1), EINA_FALSE);
 
-   _box_blur_horiz_rgba(in->image.data, out->image.data, r,
+   _box_blur_horiz_rgba(in->image.data, out->image.data, radii,
                         in->cache_entry.w, in->cache_entry.h);
 
    return EINA_TRUE;
@@ -85,6 +85,7 @@ static Eina_Bool
 _box_blur_vert_apply_rgba(Evas_Filter_Command *cmd)
 {
    RGBA_Image *in, *out;
+   int radii[7] = {0};
    unsigned int r;
 
    EINA_SAFETY_ON_NULL_RETURN_VAL(cmd, EINA_FALSE);
@@ -95,11 +96,16 @@ _box_blur_vert_apply_rgba(Evas_Filter_Command *cmd)
    in = cmd->input->backing;
    out = cmd->output->backing;
 
+   if (cmd->blur.auto_count)
+     _box_blur_auto_radius(radii, r);
+   else for (int k = 0; k < cmd->blur.count; k++)
+     radii[k] = r;
+
    EINA_SAFETY_ON_NULL_RETURN_VAL(in->image.data, EINA_FALSE);
    EINA_SAFETY_ON_NULL_RETURN_VAL(out->image.data, EINA_FALSE);
    EINA_SAFETY_ON_FALSE_RETURN_VAL(out->cache_entry.h >= (2*r + 1), EINA_FALSE);
 
-   _box_blur_vert_rgba(in->image.data, out->image.data, r,
+   _box_blur_vert_rgba(in->image.data, out->image.data, radii,
                        in->cache_entry.w, in->cache_entry.h);
 
    return EINA_TRUE;