#include "evas_common_private.h"
#include "../evas_filter_private.h"
-#if !defined (FUNCTION_NAME) || !defined (STEP)
-# error Must define FUNCTION_NAME and STEP
-#endif
-
static inline void
-FUNCTION_NAME(const DATA32* restrict src, DATA32* restrict dst,
- const int radius, const int len,
- const int loops, const int loopstep)
+_box_blur_horiz_rgba_step(const DATA32* restrict const srcdata,
+ DATA32* restrict const dstdata,
+ const int* restrict const radii,
+ const int len,
+ const int loops)
{
- DEFINE_DIVIDER(2 * radius + 1);
- const int left = MIN(radius, len);
- const int right = MIN(radius, (len - radius));
+ const DATA32* restrict src;
+ DATA32* restrict dst;
+ DATA32* restrict span1;
+ DATA32* restrict span2;
+
+#if DIV_USING_BITSHIFT
+ int pow2_shifts[6] = {0};
+ int numerators[6] = {0};
+ for (int run = 0; radii[run]; run++)
+ {
+ const int div = radii[run] * 2 + 1;
+ pow2_shifts[run] = evas_filter_smallest_pow2_larger_than(div << 10);
+ numerators[run] = (1 << pow2_shifts[run]) / (div);
+ }
+#endif
- for (int l = loops; l; --l)
+ span1 = alloca(len * sizeof(DATA32));
+ span2 = alloca(len * sizeof(DATA32));
+
+ // For each line, apply as many blurs as requested
+ for (int l = 0; l < loops; l++)
{
- int acc[4] = {0};
- int x, k;
- int divider;
+ int run;
- const DATA8* restrict sl = (DATA8 *) src;
- const DATA8* restrict sr = (DATA8 *) src;
- DATA8* restrict d = (DATA8 *) dst;
+ // New line: reset source & destination pointers
+ src = srcdata + len * l;
+ if (!radii[1]) // Only one run
+ dst = dstdata + len * l;
+ else
+ dst = span1;
- // Read-ahead
- for (x = left; x; x--)
+ // Apply blur with current radius
+ for (run = 0; radii[run]; run++)
{
- for (k = 0; k < 4; k++)
- acc[k] += sr[k];
- sr += STEP;
+ const int radius = radii[run];
+ const int left = MIN(radius, len);
+ const int right = MIN(radius, (len - radius));
+
+#if DIV_USING_BITSHIFT
+ const int pow2 = pow2_shifts[run];
+ const int numerator = numerators[run];
+#else
+ const int divider = 2 * radius + 1;
+#endif
+
+ const DATA8* restrict sl = (DATA8 *) src;
+ const DATA8* restrict sr = (DATA8 *) src;
+ DATA8* restrict d = (DATA8 *) dst;
+ int acc[4] = {0};
+ int x, k;
+
+ // Read-ahead
+ for (x = left; x; x--)
+ {
+ for (k = 0; k < 4; k++)
+ acc[k] += sr[k];
+ sr += sizeof(DATA32);
+ }
+
+ // Left
+ for (x = 0; x < left; x++)
+ {
+ for (k = 0; k < 4; k++)
+ acc[k] += sr[k];
+ sr += sizeof(DATA32);
+
+ const int divider = x + left + 1;
+ d[ALPHA] = acc[ALPHA] / divider;
+ d[RED] = acc[RED] / divider;
+ d[GREEN] = acc[GREEN] / divider;
+ d[BLUE] = acc[BLUE] / divider;
+ d += sizeof(DATA32);
+ }
+
+ // Main part
+ for (x = len - (2 * radius); x > 0; x--)
+ {
+ for (k = 0; k < 4; k++)
+ acc[k] += sr[k];
+ sr += sizeof(DATA32);
+
+ d[ALPHA] = DIVIDE(acc[ALPHA]);
+ d[RED] = DIVIDE(acc[RED]);
+ d[GREEN] = DIVIDE(acc[GREEN]);
+ d[BLUE] = DIVIDE(acc[BLUE]);
+ d += sizeof(DATA32);
+
+ for (k = 0; k < 4; k++)
+ acc[k] -= sl[k];
+ sl += sizeof(DATA32);
+ }
+
+ // Right part
+ for (x = right; x; x--)
+ {
+ const int divider = x + right;
+ d[ALPHA] = acc[ALPHA] / divider;
+ d[RED] = acc[RED] / divider;
+ d[GREEN] = acc[GREEN] / divider;
+ d[BLUE] = acc[BLUE] / divider;
+ d += sizeof(DATA32);
+
+ for (k = 0; k < 4; k++)
+ acc[k] -= sl[k];
+ sl += sizeof(DATA32);
+ }
+
+ // More runs to go: swap spans
+ if (radii[run + 1])
+ {
+ src = dst;
+ if (radii[run + 2])
+ {
+ // Two more runs: swap
+ DATA32* swap = span1;
+ span1 = span2;
+ span2 = swap;
+ dst = span1;
+ }
+ else
+ {
+ // Last run: write directly to dstdata
+ dst = dstdata + len * l;
+ }
+ }
}
+ }
+}
+
+static inline void
+_box_blur_vert_rgba_step(const DATA32* restrict const srcdata,
+ DATA32* restrict const dstdata,
+ const int* restrict const radii,
+ const int len,
+ const int loops)
+{
+ /* Note: This function tries to optimize cache hits by working on
+ * contiguous horizontal spans.
+ */
- // Left
- for (x = 0; x < left; x++)
+ const int step = loops;
+ DATA32* restrict src;
+ DATA32* restrict dst;
+ DATA32* restrict span1;
+ DATA32* restrict span2;
+
+#if DIV_USING_BITSHIFT
+ int pow2_shifts[6] = {0};
+ int numerators[6] = {0};
+ for (int run = 0; radii[run]; run++)
+ {
+ const int div = radii[run] * 2 + 1;
+ pow2_shifts[run] = evas_filter_smallest_pow2_larger_than(div << 10);
+ numerators[run] = (1 << pow2_shifts[run]) / (div);
+ }
+#endif
+
+ span1 = alloca(len * sizeof(DATA32));
+ span2 = alloca(len * sizeof(DATA32));
+
+ // For each line, apply as many blurs as requested
+ for (int l = 0; l < loops; l++)
+ {
+ int run;
+
+ // Rotate input into work span
+ const DATA32* srcptr = srcdata + l;
+ DATA32* s = span1;
+ for (int k = len; k; --k)
{
- for (k = 0; k < 4; k++)
- acc[k] += sr[k];
- sr += STEP;
-
- divider = x + left + 1;
- d[ALPHA] = acc[ALPHA] / divider;
- d[RED] = acc[RED] / divider;
- d[GREEN] = acc[GREEN] / divider;
- d[BLUE] = acc[BLUE] / divider;
- d += STEP;
+ *s++ = *srcptr;
+ srcptr += step;
}
- // Main part
- for (x = len - (2 * radius); x > 0; x--)
+ src = span1;
+ dst = span2;
+
+ // Apply blur with current radius
+ for (run = 0; radii[run]; run++)
{
- for (k = 0; k < 4; k++)
- acc[k] += sr[k];
- sr += STEP;
-
- d[ALPHA] = DIVIDE(acc[ALPHA]);
- d[RED] = DIVIDE(acc[RED]);
- d[GREEN] = DIVIDE(acc[GREEN]);
- d[BLUE] = DIVIDE(acc[BLUE]);
- d += STEP;
-
- for (k = 0; k < 4; k++)
- acc[k] -= sl[k];
- sl += STEP;
+ const int radius = radii[run];
+ const int left = MIN(radius, len);
+ const int right = MIN(radius, (len - radius));
+
+#if DIV_USING_BITSHIFT
+ const int pow2 = pow2_shifts[run];
+ const int numerator = numerators[run];
+#else
+ const int divider = 2 * radius + 1;
+#endif
+
+ const DATA8* restrict sl = (DATA8 *) src;
+ const DATA8* restrict sr = (DATA8 *) src;
+ DATA8* restrict d = (DATA8 *) dst;
+ int acc[4] = {0};
+ int x, k;
+
+ // Read-ahead
+ for (x = left; x; x--)
+ {
+ for (k = 0; k < 4; k++)
+ acc[k] += sr[k];
+ sr += sizeof(DATA32);
+ }
+
+ // Left
+ for (x = 0; x < left; x++)
+ {
+ for (k = 0; k < 4; k++)
+ acc[k] += sr[k];
+ sr += sizeof(DATA32);
+
+ const int divider = x + left + 1;
+ d[ALPHA] = acc[ALPHA] / divider;
+ d[RED] = acc[RED] / divider;
+ d[GREEN] = acc[GREEN] / divider;
+ d[BLUE] = acc[BLUE] / divider;
+ d += sizeof(DATA32);
+ }
+
+ // Main part
+ for (x = len - (2 * radius); x > 0; x--)
+ {
+ for (k = 0; k < 4; k++)
+ acc[k] += sr[k];
+ sr += sizeof(DATA32);
+
+ d[ALPHA] = DIVIDE(acc[ALPHA]);
+ d[RED] = DIVIDE(acc[RED]);
+ d[GREEN] = DIVIDE(acc[GREEN]);
+ d[BLUE] = DIVIDE(acc[BLUE]);
+ d += sizeof(DATA32);
+
+ for (k = 0; k < 4; k++)
+ acc[k] -= sl[k];
+ sl += sizeof(DATA32);
+ }
+
+ // Right part
+ for (x = right; x; x--)
+ {
+ const int divider = x + right;
+ d[ALPHA] = acc[ALPHA] / divider;
+ d[RED] = acc[RED] / divider;
+ d[GREEN] = acc[GREEN] / divider;
+ d[BLUE] = acc[BLUE] / divider;
+ d += sizeof(DATA32);
+
+ for (k = 0; k < 4; k++)
+ acc[k] -= sl[k];
+ sl += sizeof(DATA32);
+ }
+
+ // More runs to go: swap spans
+ if (radii[run + 1])
+ {
+ DATA32* swap = src;
+ src = dst;
+ dst = swap;
+ }
}
- // Right part
- for (x = right; x; x--)
+ // Last run: rotate & copy back to destination
+ DATA32* restrict dstptr = dstdata + l;
+ for (int k = len; k; --k)
{
- divider = x + right;
- d[ALPHA] = acc[ALPHA] / divider;
- d[RED] = acc[RED] / divider;
- d[GREEN] = acc[GREEN] / divider;
- d[BLUE] = acc[BLUE] / divider;
- d += STEP;
-
- for (k = 0; k < 4; k++)
- acc[k] -= sl[k];
- sl += STEP;
+ *dstptr = *dst++;
+ dstptr += step;
}
-
- src += loopstep;
- dst += loopstep;
}
}
-
-#undef FUNCTION_NAME
-#undef STEP
}
}
-#define FUNCTION_NAME _box_blur_horiz_rgba_step
-#define STEP (sizeof(DATA32))
#include "./blur/blur_box_rgba_.c"
static void
-_box_blur_horiz_rgba(DATA32 *src, DATA32 *dst, int radius, int w, int h)
+_box_blur_horiz_rgba(DATA32 *src, DATA32 *dst, int* radii, int w, int h)
{
DEBUG_TIME_BEGIN();
- _box_blur_horiz_rgba_step(src, dst, radius, w, h, w);
+ _box_blur_horiz_rgba_step(src, dst, radii, w, h);
DEBUG_TIME_END();
}
-#define FUNCTION_NAME _box_blur_vert_rgba_step
-#define STEP (loops * sizeof(DATA32))
-#include "./blur/blur_box_rgba_.c"
-
static void
-_box_blur_vert_rgba(DATA32 *src, DATA32 *dst, int radius, int w, int h)
+_box_blur_vert_rgba(DATA32 *src, DATA32 *dst, int* radii, int w, int h)
{
DEBUG_TIME_BEGIN();
- _box_blur_vert_rgba_step(src, dst, radius, h, w, 1);
+ _box_blur_vert_rgba_step(src, dst, radii, h, w);
DEBUG_TIME_END();
}
_box_blur_horiz_apply_rgba(Evas_Filter_Command *cmd)
{
RGBA_Image *in, *out;
+ int radii[7] = {0};
unsigned int r;
EINA_SAFETY_ON_NULL_RETURN_VAL(cmd, EINA_FALSE);
in = cmd->input->backing;
out = cmd->output->backing;
+ if (cmd->blur.auto_count)
+ _box_blur_auto_radius(radii, r);
+ else for (int k = 0; k < cmd->blur.count; k++)
+ radii[k] = r;
+
EINA_SAFETY_ON_NULL_RETURN_VAL(in->image.data, EINA_FALSE);
EINA_SAFETY_ON_NULL_RETURN_VAL(out->image.data, EINA_FALSE);
EINA_SAFETY_ON_FALSE_RETURN_VAL(out->cache_entry.w >= (2*r + 1), EINA_FALSE);
- _box_blur_horiz_rgba(in->image.data, out->image.data, r,
+ _box_blur_horiz_rgba(in->image.data, out->image.data, radii,
in->cache_entry.w, in->cache_entry.h);
return EINA_TRUE;
_box_blur_vert_apply_rgba(Evas_Filter_Command *cmd)
{
RGBA_Image *in, *out;
+ int radii[7] = {0};
unsigned int r;
EINA_SAFETY_ON_NULL_RETURN_VAL(cmd, EINA_FALSE);
in = cmd->input->backing;
out = cmd->output->backing;
+ if (cmd->blur.auto_count)
+ _box_blur_auto_radius(radii, r);
+ else for (int k = 0; k < cmd->blur.count; k++)
+ radii[k] = r;
+
EINA_SAFETY_ON_NULL_RETURN_VAL(in->image.data, EINA_FALSE);
EINA_SAFETY_ON_NULL_RETURN_VAL(out->image.data, EINA_FALSE);
EINA_SAFETY_ON_FALSE_RETURN_VAL(out->cache_entry.h >= (2*r + 1), EINA_FALSE);
- _box_blur_vert_rgba(in->image.data, out->image.data, r,
+ _box_blur_vert_rgba(in->image.data, out->image.data, radii,
in->cache_entry.w, in->cache_entry.h);
return EINA_TRUE;