patch from Tiago Falcão - unroll loops in evas a bit.
authorraster <raster@7cbeb6ba-43b4-40fd-8cce-4c39aea84d33>
Fri, 13 Mar 2009 15:18:39 +0000 (15:18 +0000)
committerraster <raster@7cbeb6ba-43b4-40fd-8cce-4c39aea84d33>
Fri, 13 Mar 2009 15:18:39 +0000 (15:18 +0000)
git-svn-id: svn+ssh://svn.enlightenment.org/var/svn/e/trunk/evas@39467 7cbeb6ba-43b4-40fd-8cce-4c39aea84d33

12 files changed:
src/lib/engines/common/evas_op_blend/op_blend_color_.c
src/lib/engines/common/evas_op_blend/op_blend_mask_color_.c
src/lib/engines/common/evas_op_blend/op_blend_pixel_.c
src/lib/engines/common/evas_op_blend/op_blend_pixel_color_.c
src/lib/engines/common/evas_op_blend/op_blend_pixel_mask_.c
src/lib/engines/common/evas_op_copy/op_copy_color_.c
src/lib/engines/common/evas_op_copy/op_copy_mask_color_.c
src/lib/engines/common/evas_op_copy/op_copy_pixel_.c
src/lib/engines/common/evas_op_copy/op_copy_pixel_color_.c
src/lib/engines/common/evas_op_copy/op_copy_pixel_mask_.c
src/lib/include/evas_common.h
src/lib/include/evas_common_soft16.h

index 09da909..5c7ecd3 100644 (file)
@@ -4,12 +4,12 @@
 #ifdef BUILD_C
 static void
 _op_blend_c_dp(DATA32 *s __UNUSED__, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
-   DATA32 *e = d + l, a = 256 - (c >> 24);
-   while (d < e)
-     {
-       *d = c + MUL_256(a, *d);
-       d++;
-     }
+    DATA32 *e, a = 256 - (c >> 24);
+    UNROLL8_PLD_WHILE(d, l, e,
+                      {
+                         *d = c + MUL_256(a, *d);
+                         d++;
+                      });
 }
 
 #define _op_blend_caa_dp _op_blend_c_dp
@@ -31,8 +31,8 @@ init_blend_color_span_funcs_c(void)
 #ifdef BUILD_C
 static void
 _op_blend_pt_c_dp(DATA32 s, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) {
-       s = 256 - (c >> 24);
-       *d = c + MUL_256(s, *d);
+   s = 256 - (c >> 24);
+   *d = c + MUL_256(s, *d);
 }
 
 #define _op_blend_pt_caa_dp _op_blend_pt_c_dp
@@ -61,13 +61,13 @@ init_blend_color_pt_funcs_c(void)
 #ifdef BUILD_C
 static void
 _op_blend_rel_c_dp(DATA32 *s __UNUSED__, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
-   DATA32 *e = d + l;
-   l = 256 - (c >> 24);
-   while (d < e)
-     {
-       *d = MUL_SYM(*d >> 24, c) + MUL_256(l, *d);
-       d++;
-     }
+   DATA32 *e;
+   int alpha = 256 - (c >> 24);
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        *d = MUL_SYM(*d >> 24, c) + MUL_256(alpha, *d);
+                        d++;
+                     });
 }
 
 #define _op_blend_rel_caa_dp _op_blend_rel_c_dp
@@ -89,8 +89,8 @@ init_blend_rel_color_span_funcs_c(void)
 #ifdef BUILD_C
 static void
 _op_blend_rel_pt_c_dp(DATA32 s, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) {
-       s = *d >> 24;
-       *d = MUL_SYM(s, c) + MUL_256(256 - (c >> 24), *d);
+   s = *d >> 24;
+   *d = MUL_SYM(s, c) + MUL_256(256 - (c >> 24), *d);
 }
 
 #define _op_blend_rel_pt_caa_dp _op_blend_rel_pt_c_dp
index cfb9636..4313714 100644 (file)
@@ -4,48 +4,51 @@
 #ifdef BUILD_C
 static void
 _op_blend_mas_c_dp(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) {
-   DATA32 *e = d + l;
-   l = 256 - (c >> 24);
-   while (d < e) {
-       DATA32 a = *m;
-       switch(a)
-         {
-           case 0:
-               break;
-           case 255:
-               *d = c + MUL_256(l, *d);
-               break;
-           default:
-             {
-               DATA32 mc = MUL_SYM(a, c);
-               a = 256 - (mc >> 24);
-               *d = mc + MUL_256(a, *d);
-             }
-               break;
-         }
-       m++;  d++;
-     }
+   DATA32 *e;
+   int alpha = 256 - (c >> 24);
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        DATA32 a = *m;
+                        switch(a)
+                          {
+                          case 0:
+                             break;
+                          case 255:
+                             *d = c + MUL_256(alpha, *d);
+                             break;
+                          default:
+                               {
+                                  DATA32 mc = MUL_SYM(a, c);
+                                  a = 256 - (mc >> 24);
+                                  *d = mc + MUL_256(a, *d);
+                               }
+                             break;
+                          }
+                        m++;  d++;
+                     });
 }
 
 static void
 _op_blend_mas_can_dp(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) {
-   DATA32 *e = d + l;
-   while (d < e) {
-       l = *m;
-       switch(l)
-         {
-           case 0:
-               break;
-           case 255:
-               *d = c;
-               break;
-           default:
-               l++;
-               *d = INTERP_256(l, c, *d);
-               break;
-         }
-       m++;  d++;
-     }
+    DATA32 *e;
+    int alpha;
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        alpha = *m;
+                        switch(alpha)
+                          {
+                          case 0:
+                             break;
+                          case 255:
+                             *d = c;
+                             break;
+                          default:
+                             alpha++;
+                             *d = INTERP_256(alpha, c, *d);
+                             break;
+                          }
+                        m++;  d++;
+                     });
 }
 
 #define _op_blend_mas_cn_dp _op_blend_mas_can_dp
@@ -74,14 +77,14 @@ init_blend_mask_color_span_funcs_c(void)
 #ifdef BUILD_C
 static void
 _op_blend_pt_mas_c_dp(DATA32 s, DATA8 m, DATA32 c, DATA32 *d) {
-       s = MUL_SYM(m, c);
-       m = 256 - (s >> 24);
-       *d = s + MUL_256(m, *d);
+   s = MUL_SYM(m, c);
+   m = 256 - (s >> 24);
+   *d = s + MUL_256(m, *d);
 }
 
 static void
 _op_blend_pt_mas_can_dp(DATA32 s __UNUSED__, DATA8 m, DATA32 c, DATA32 *d) {
-       *d = INTERP_256(m + 1, c, *d);
+   *d = INTERP_256(m + 1, c, *d);
 }
 
 #define _op_blend_pt_mas_cn_dp _op_blend_pt_mas_can_dp
@@ -114,14 +117,16 @@ init_blend_mask_color_pt_funcs_c(void)
 #ifdef BUILD_C
 static void
 _op_blend_rel_mas_c_dp(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) {
-   DATA32 *e = d + l;
-   while (d < e) {
-       DATA32 mc = MUL_SYM(*m, c);
-       l = 256 - (mc >> 24);
-       *d = MUL_SYM(*d >> 24, mc) + MUL_256(l, *d);
-       d++;
-       m++;
-     }
+   DATA32 *e;
+   int alpha;
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        DATA32 mc = MUL_SYM(*m, c);
+                        alpha = 256 - (mc >> 24);
+                        *d = MUL_SYM(*d >> 24, mc) + MUL_256(alpha, *d);
+                        d++;
+                        m++;
+                     });
 }
 
 #define _op_blend_rel_mas_cn_dp _op_blend_rel_mas_c_dp
@@ -151,9 +156,9 @@ init_blend_rel_mask_color_span_funcs_c(void)
 #ifdef BUILD_C
 static void
 _op_blend_rel_pt_mas_c_dp(DATA32 s, DATA8 m, DATA32 c, DATA32 *d) {
-       s = MUL_SYM(m, c);
-       c = 256 - (s >> 24);
-       *d = MUL_SYM(*d >> 24, s) + MUL_256(c, *d);
+   s = MUL_SYM(m, c);
+   c = 256 - (s >> 24);
+   *d = MUL_SYM(*d >> 24, s) + MUL_256(c, *d);
 }
 
 #define _op_blend_rel_pt_mas_cn_dp _op_blend_rel_pt_mas_c_dp
index 7384c25..a5db1c0 100644 (file)
@@ -4,33 +4,36 @@
 #ifdef BUILD_C
 static void
 _op_blend_p_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c __UNUSED__, DATA32 *d, int l) {
-   DATA32 *e = d + l;
-   while (d < e) {
-       l = 256 - (*s >> 24);
-       *d = *s++ + MUL_256(l, *d);
-       d++;
-     }
+   DATA32 *e;
+   int alpha;
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        alpha = 256 - (*s >> 24);
+                        *d = *s++ + MUL_256(alpha, *d);
+                        d++;
+                     });
 }
 
 static void
 _op_blend_pas_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c __UNUSED__, DATA32 *d, int l) {
-   DATA32 *e = d + l;
-   while (d < e)
-     {
-       switch (*s & 0xff000000)
-         {
-           case 0:
-             break;
-           case 0xff000000:
-               *d = *s;
-             break;
-           default :
-               l = 256 - (*s >> 24);
-               *d = *s + MUL_256(l, *d);
-             break;
-         }
-       s++;  d++;
-     }
+   DATA32 *e;
+   int alpha;
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        switch (*s & 0xff000000)
+                          {
+                          case 0:
+                             break;
+                          case 0xff000000:
+                             *d = *s;
+                             break;
+                          default:
+                             alpha = 256 - (*s >> 24);
+                             *d = *s + MUL_256(alpha, *d);
+                             break;
+                          }
+                        s++;  d++;
+                     });
 }
 
 #define _op_blend_pan_dp NULL
@@ -55,8 +58,8 @@ init_blend_pixel_span_funcs_c(void)
 #ifdef BUILD_C
 static void
 _op_blend_pt_p_dp(DATA32 s, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) {
-       c = 256 - (s >> 24);
-       *d = s + MUL_256(c, *d);
+   c = 256 - (s >> 24);
+   *d = s + MUL_256(c, *d);
 }
 
 #define _op_blend_pt_pas_dp _op_blend_pt_p_dp
@@ -86,24 +89,28 @@ init_blend_pixel_pt_funcs_c(void)
 #ifdef BUILD_C
 static void
 _op_blend_rel_p_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
-   DATA32 *e = d + l;
-   while (d < e) {
-       l = 256 - (*s >> 24);
-       c = 1 + (*d >> 24);
-       *d = MUL_256(c, *s) + MUL_256(l, *d);
-       d++;
-       s++;
-     }
+   DATA32 *e;
+   int alpha;
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        alpha = 256 - (*s >> 24);
+                        c = 1 + (*d >> 24);
+                        *d = MUL_256(c, *s) + MUL_256(alpha, *d);
+                        d++;
+                        s++;
+                     });
 }
 
 static void
 _op_blend_rel_pan_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
-   DATA32 *e = d + l;
-   while (d < e) {
-       c = 1 + (*d >> 24);
-       *d++ = MUL_256(c, *s);
-       s++;
-     }
+   DATA32 *e;
+   int alpha;
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        c = 1 + (*d >> 24);
+                        *d++ = MUL_256(c, *s);
+                        s++;
+                     });
 }
 
 #define _op_blend_rel_pas_dp _op_blend_rel_p_dp
@@ -128,13 +135,13 @@ init_blend_rel_pixel_span_funcs_c(void)
 #ifdef BUILD_C
 static void
 _op_blend_rel_pt_p_dp(DATA32 s, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) {
-       c = 256 - (s >> 24);
-       *d = MUL_SYM(*d >> 24, s) + MUL_256(c, *d);
+   c = 256 - (s >> 24);
+   *d = MUL_SYM(*d >> 24, s) + MUL_256(c, *d);
 }
 
 static void
 _op_blend_rel_pt_pan_dp(DATA32 s, DATA8 m __UNUSED__, DATA32 c __UNUSED__, DATA32 *d) {
-       *d = MUL_SYM(*d >> 24, s);
+   *d = MUL_SYM(*d >> 24, s);
 }
 
 #define _op_blend_rel_pt_pas_dp _op_blend_rel_pt_p_dp
index 0af49bf..ee30321 100644 (file)
@@ -4,69 +4,78 @@
 #ifdef BUILD_C
 static void
 _op_blend_p_c_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
-   DATA32 *e = d + l;
-   while (d < e) {
-       DATA32 sc = MUL4_SYM(c, *s);
-       l = 256 - (sc >> 24);
-       *d = sc + MUL_256(l, *d);
-       d++;
-       s++;
-     }
+   DATA32 *e;
+   int alpha;
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        DATA32 sc = MUL4_SYM(c, *s);
+                        alpha = 256 - (sc >> 24);
+                        *d = sc + MUL_256(alpha, *d);
+                        d++;
+                        s++;
+                     });
 }
 
 static void
 _op_blend_pan_c_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
-   DATA32 *e = d + l;
-   l = 256 - (c >> 24);
-   while (d < e) {
-       *d = ((c & 0xff000000) + MUL3_SYM(c, *s)) + MUL_256(l, *d);
-       d++;
-       s++;
-     }
+   DATA32 *e;
+   int alpha = 256 - (c >> 24);
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        *d = ((c & 0xff000000) + MUL3_SYM(c, *s)) + MUL_256(alpha, *d);
+                        d++;
+                        s++;
+                     });
 }
 
 static void
 _op_blend_p_can_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
-   DATA32 *e = d + l;
-   while (d < e) {
-       l = 256 - (*s >> 24);
-       *d = ((*s & 0xff000000) + MUL3_SYM(c, *s)) + MUL_256(l, *d);
-       d++;
-       s++;
-     }
+   DATA32 *e;
+   int alpha;
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        alpha = 256 - (*s >> 24);
+                        *d = ((*s & 0xff000000) + MUL3_SYM(c, *s)) + MUL_256(alpha, *d);
+                        d++;
+                        s++;
+                     });
 }
 
 static void
 _op_blend_pan_can_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
-   DATA32 *e = d + l;
-   while (d < e) {
-       *d++ = 0xff000000 + MUL3_SYM(c, *s);
-       s++;
-     }
+   DATA32 *e;
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        *d++ = 0xff000000 + MUL3_SYM(c, *s);
+                        s++;
+                     });
 }
 
 static void
 _op_blend_p_caa_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
-   DATA32 *e = d + l;
+   DATA32 *e;
+   int alpha;
    c = 1 + (c & 0xff);
-  while (d < e) {
-       DATA32 sc = MUL_256(c, *s);
-       l = 256 - (sc >> 24);
-       *d = sc + MUL_256(l, *d);
-       d++;
-       s++;
-     }
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        DATA32 sc = MUL_256(c, *s);
+                        alpha = 256 - (sc >> 24);
+                        *d = sc + MUL_256(alpha, *d);
+                        d++;
+                        s++;
+                     });
 }
 
 static void
 _op_blend_pan_caa_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
-   DATA32 *e = d + l;
+   DATA32 *e;
    c = 1 + (c & 0xff);
-   while (d < e) {
-       *d = INTERP_256(c, *s, *d);
-       d++;
-       s++;
-     }
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        *d = INTERP_256(c, *s, *d);
+                        d++;
+                        s++;
+                     });
 }
 
 #define _op_blend_pas_c_dp _op_blend_p_c_dp
@@ -111,9 +120,9 @@ init_blend_pixel_color_span_funcs_c(void)
 #ifdef BUILD_C
 static void
 _op_blend_pt_p_c_dp(DATA32 s, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) {
-       s = MUL4_SYM(c, s);
-       c = 256 - (s >> 24);
-       *d = s + MUL_256(c, *d);
+   s = MUL4_SYM(c, s);
+   c = 256 - (s >> 24);
+   *d = s + MUL_256(c, *d);
 }
 
 #define _op_blend_pt_pas_c_dp _op_blend_pt_p_c_dp
@@ -167,14 +176,16 @@ init_blend_pixel_color_pt_funcs_c(void)
 #ifdef BUILD_C
 static void
 _op_blend_rel_p_c_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
-   DATA32 *e = d + l;
-   while (d < e) {
-       DATA32 sc = MUL4_SYM(c, *s);
-       l = 256 - (sc >> 24);
-       *d = MUL_SYM(*d >> 24, sc) + MUL_256(l, *d);
-       d++;
-       s++;
-     }
+   DATA32 *e;
+   int alpha;
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        DATA32 sc = MUL4_SYM(c, *s);
+                        alpha = 256 - (sc >> 24);
+                        *d = MUL_SYM(*d >> 24, sc) + MUL_256(alpha, *d);
+                        d++;
+                        s++;
+                     });
 }
 
 #define _op_blend_rel_pas_c_dp _op_blend_rel_p_c_dp
@@ -224,9 +235,9 @@ init_blend_rel_pixel_color_span_funcs_c(void)
 #ifdef BUILD_C
 static void
 _op_blend_rel_pt_p_c_dp(DATA32 s, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) {
-       s = MUL4_SYM(c, s);
-       c = 256 - (s >> 24);
-       *d = MUL_SYM(*d >> 24, s) + MUL_256(c, *d);
+   s = MUL4_SYM(c, s);
+   c = 256 - (s >> 24);
+   *d = MUL_SYM(*d >> 24, s) + MUL_256(c, *d);
 }
 
 #define _op_blend_rel_pt_pas_c_dp _op_blend_rel_pt_p_c_dp
index ec9df82..6f029ca 100644 (file)
@@ -4,68 +4,73 @@
 #ifdef BUILD_C
 static void
 _op_blend_p_mas_dp(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
-   DATA32 *e = d + l;
-   while (d < e) {
-       l = *m;
-       switch(l)
-         {
-           case 0:
-               break;
-           case 255:
-               l = 256 - (*s >> 24);
-               *d = *s + MUL_256(l, *d);
-               break;
-           default:
-               c = MUL_SYM(l, *s);
-               l = 256 - (c >> 24);
-               *d = c + MUL_256(l, *d);
-               break;
-         }
-       m++;  s++;  d++;
-     }
+   DATA32 *e;
+   int alpha;
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        alpha = *m;
+                        switch(alpha)
+                          {
+                          case 0:
+                             break;
+                          case 255:
+                             alpha = 256 - (*s >> 24);
+                             *d = *s + MUL_256(alpha, *d);
+                             break;
+                          default:
+                             c = MUL_SYM(alpha, *s);
+                             alpha = 256 - (c >> 24);
+                             *d = c + MUL_256(alpha, *d);
+                             break;
+                          }
+                        m++;  s++;  d++;
+                     });
 }
 
 static void
 _op_blend_pas_mas_dp(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
-   DATA32 *e = d + l;
-   while (d < e) {
-       l = (*s >> 24);
-       switch(*m & l)
-         {
-           case 0:
-               break;
-           case 255:
-               *d = *s;
-               break;
-           default:
-               c = MUL_SYM(l, *s);
-               l = 256 - (c >> 24);
-               *d = c + MUL_256(l, *d);
-               break;
-         }
-       m++;  s++;  d++;
-     }
+   DATA32 *e;
+   int alpha;
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        alpha = (*s >> 24);
+                        switch(alpha)
+                          {
+                          case 0:
+                             break;
+                          case 255:
+                             *d = *s;
+                             break;
+                          default:
+                             c = MUL_SYM(alpha, *s);
+                             alpha = 256 - (c >> 24);
+                             *d = c + MUL_256(alpha, *d);
+                             break;
+                          }
+                     });
 }
 
 static void
 _op_blend_pan_mas_dp(DATA32 *s, DATA8 *m, DATA32 c __UNUSED__, DATA32 *d, int l) {
-   DATA32 *e = d + l;
-   while (d < e) {
-       l = *m;
-       switch(l)
-         {
-           case 0:
-               break;
-           case 255:
-               *d = *s;
-               break;
-           default:
-               l++;
-               *d = INTERP_256(l, *s, *d);
-               break;
-         }
-       m++;  s++;  d++;
-     }
+   DATA32 *e;
+   int alpha;
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        alpha = *m;
+                        switch(alpha)
+                          {
+                          case 0:
+                             break;
+                          case 255:
+                             *d = *s;
+                             break;
+                          default:
+                             alpha++;
+                             *d = INTERP_256(alpha, *s, *d);
+                             break;
+                          }
+                        m++;  s++;  d++;
+                     });
 }
 
 
@@ -89,14 +94,14 @@ init_blend_pixel_mask_span_funcs_c(void)
 #ifdef BUILD_C
 static void
 _op_blend_pt_p_mas_dp(DATA32 s, DATA8 m, DATA32 c, DATA32 *d) {
-       s = MUL_SYM(m, s);
-       c = 256 - (s >> 24);
-       *d = s + MUL_256(c, *d);
+   s = MUL_SYM(m, s);
+   c = 256 - (s >> 24);
+   *d = s + MUL_256(c, *d);
 }
 
 static void
 _op_blend_pt_pan_mas_dp(DATA32 s, DATA8 m, DATA32 c __UNUSED__, DATA32 *d) {
-       *d = INTERP_256(m + 1, s, *d);
+   *d = INTERP_256(m + 1, s, *d);
 }
 
 #define _op_blend_pt_pas_mas_dp _op_blend_pt_p_mas_dp
@@ -125,15 +130,15 @@ init_blend_pixel_mask_pt_funcs_c(void)
 #ifdef BUILD_C
 static void
 _op_blend_rel_p_mas_dp(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
-   DATA32 *e = d + l;
-   while (d < e) {
-       c = MUL_SYM(*m, *s);
-       l = 256 - (c >> 24);
-       *d = MUL_SYM(*d >> 24, c) + MUL_256(l, *d);
-       d++;
-       m++;
-       s++;
-     }
+   DATA32 *e;
+   int alpha;
+   UNROLL8_PLD_WHILE(d, l, e,
+           {
+            c = MUL_SYM(*m, *s);
+            alpha = 256 - (c >> 24);
+            *d = MUL_SYM(*d >> 24, c) + MUL_256(alpha, *d);
+            d++; m++; s++;
+           });
 }
 
 #define _op_blend_rel_pas_mas_dp _op_blend_rel_p_mas_dp
@@ -159,9 +164,9 @@ init_blend_rel_pixel_mask_span_funcs_c(void)
 #ifdef BUILD_C
 static void
 _op_blend_rel_pt_p_mas_dp(DATA32 s, DATA8 m, DATA32 c, DATA32 *d) {
-       s = MUL_SYM(m, s);
-       c = 256 - (s >> 24);
-       *d = MUL_SYM(*d >> 24, s) + MUL_256(c, *d);
+   s = MUL_SYM(m, s);
+   c = 256 - (s >> 24);
+   *d = MUL_SYM(*d >> 24, s) + MUL_256(c, *d);
 }
 
 #define _op_blend_rel_pt_pas_mas_dp _op_blend_rel_pt_p_mas_dp
index 1d077a8..771232d 100644 (file)
@@ -4,10 +4,12 @@
 #ifdef BUILD_C
 static void
 _op_copy_c_dp(DATA32 *s __UNUSED__, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
-   DATA32 *e = d + l;
-   for (; d < e; d++) {
-      *d = c;
-   }
+   DATA32 *e;
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        *d = c;
+                        d++;
+                     });
 }
 
 #define _op_copy_cn_dp _op_copy_c_dp
@@ -37,7 +39,7 @@ init_copy_color_span_funcs_c(void)
 #ifdef BUILD_C
 static void
 _op_copy_pt_c_dp(DATA32 s __UNUSED__, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) {
-      *d = c;
+   *d = c;
 }
 
 #define _op_copy_pt_cn_dp _op_copy_pt_c_dp
@@ -71,10 +73,12 @@ init_copy_color_pt_funcs_c(void)
 #ifdef BUILD_C
 static void
 _op_copy_rel_c_dp(DATA32 *s __UNUSED__, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
-   DATA32 *e = d + l;
-   for (; d < e; d++) {
-       *d = MUL_SYM(*d >> 24, c);
-   }
+   DATA32 *e;
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        *d = MUL_SYM(*d >> 24, c);
+                        d++;
+                     });
 }
 
 
@@ -105,8 +109,8 @@ init_copy_rel_color_span_funcs_c(void)
 #ifdef BUILD_C
 static void
 _op_copy_rel_pt_c_dp(DATA32 s, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) {
-       s = 1 + (*d >> 24);
-       *d = MUL_256(s, c);
+   s = 1 + (*d >> 24);
+   *d = MUL_256(s, c);
 }
 
 
index 505b4ba..c623322 100644 (file)
@@ -4,23 +4,25 @@
 #ifdef BUILD_C
 static void
 _op_copy_mas_c_dp(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) {
-   DATA32 *e = d + l;
-   while (d < e) {
-       l = *m;
-       switch(l)
-         {
-           case 0:
-               break;
-           case 255:
-               *d = c;
-               break;
-           default:
-               l++;
-               *d = INTERP_256(l, c, *d);
-               break;
-         }
-       m++;  d++;
-     }
+   DATA32 *e;
+   int color;
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        color = *m;
+                        switch(color)
+                          {
+                          case 0:
+                             break;
+                          case 255:
+                             *d = c;
+                             break;
+                          default:
+                             color++;
+                             *d = INTERP_256(color, c, *d);
+                             break;
+                          }
+                        m++;  d++;
+                     });
 }
 
 #define _op_copy_mas_cn_dp _op_copy_mas_c_dp
@@ -50,7 +52,7 @@ init_copy_mask_color_span_funcs_c(void)
 #ifdef BUILD_C
 static void
 _op_copy_pt_mas_c_dp(DATA32 s __UNUSED__, DATA8 m, DATA32 c, DATA32 *d) {
-       *d = INTERP_256(m + 1, c, *d);
+   *d = INTERP_256(m + 1, c, *d);
 }
 
 
@@ -85,28 +87,30 @@ init_copy_mask_color_pt_funcs_c(void)
 #ifdef BUILD_C
 static void
 _op_copy_rel_mas_c_dp(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) {
-   DATA32 *e = d + l;
-   while (d < e) {
-       l = *m;
-       switch(l)
-         {
-           case 0:
-               break;
-           case 255:
-               l = 1 + (*d >> 24);
-               *d = MUL_256(l, c);
-               break;
-           default:
-             {
-               DATA32 da = 1 + (*d >> 24);
-               da = MUL_256(da, c);
-               l++;
-               *d = INTERP_256(l, da, *d);
-             }
-               break;
-         }
-       m++;  d++;
-     }
+   DATA32 *e;
+   int color;
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        color = *m;
+                        switch(color)
+                          {
+                          case 0:
+                             break;
+                          case 255:
+                             color = 1 + (*d >> 24);
+                             *d = MUL_256(color, c);
+                             break;
+                          default:
+                               {
+                                  DATA32 da = 1 + (*d >> 24);
+                                  da = MUL_256(da, c);
+                                  color++;
+                                  *d = INTERP_256(color, da, *d);
+                               }
+                             break;
+                          }
+                        m++;  d++;
+                     });
 }
 
 
@@ -137,9 +141,9 @@ init_copy_rel_mask_color_span_funcs_c(void)
 #ifdef BUILD_C
 static void
 _op_copy_rel_pt_mas_c_dp(DATA32 s, DATA8 m, DATA32 c, DATA32 *d) {
-       s = 1 + (*d >> 24);
-       s = MUL_256(s, c);
-       *d = INTERP_256(m + 1, s, *d);
+   s = 1 + (*d >> 24);
+   s = MUL_256(s, c);
+   *d = INTERP_256(m + 1, s, *d);
 }
 
 #define _op_copy_rel_pt_mas_cn_dp _op_copy_rel_pt_mas_c_dp
index aacda83..51cbafc 100644 (file)
@@ -4,10 +4,7 @@
 #ifdef BUILD_C
 static void
 _op_copy_p_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c __UNUSED__, DATA32 *d, int l) {
-   DATA32 *e = d + l;
-   for (; d < e; d++, s++) {
-      *d = *s;
-   }
+   memcpy(d, s, l * sizeof(DATA32));
 }
 
 #define _op_copy_pan_dp _op_copy_p_dp
@@ -33,7 +30,7 @@ init_copy_pixel_span_funcs_c(void)
 #ifdef BUILD_C
 static void
 _op_copy_pt_p_dp(DATA32 s, DATA8 m __UNUSED__, DATA32 c __UNUSED__, DATA32 *d) {
-      *d = s;
+   *d = s;
 }
 
 #define _op_copy_pt_pan_dp _op_copy_pt_p_dp
@@ -63,10 +60,12 @@ init_copy_pixel_pt_funcs_c(void)
 #ifdef BUILD_C
 static void
 _op_copy_rel_p_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c __UNUSED__, DATA32 *d, int l) {
-   DATA32 *e = d + l;
-   for (; d < e; d++, s++) {
-       *d = MUL_SYM(*d >> 24, *s);
-   }
+   DATA32 *e;
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        *d = MUL_SYM(*d >> 24, *s);
+                        d++; s++;
+                     });
 }
 
 
@@ -93,8 +92,8 @@ init_copy_rel_pixel_span_funcs_c(void)
 #ifdef BUILD_C
 static void
 _op_copy_rel_pt_p_dp(DATA32 s, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) {
-       c = 1 + (*d >> 24);
-       *d = MUL_256(c, s);
+   c = 1 + (*d >> 24);
+   *d = MUL_256(c, s);
 }
 
 
index 068958b..7925fdd 100644 (file)
@@ -4,23 +4,25 @@
 #ifdef BUILD_C
 static void
 _op_copy_p_c_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
-   DATA32 *e = d + l;
-   while (d < e) {
-       *d = MUL4_SYM(c, *s);
-       d++;
-       s++;
-     }
+   DATA32 *e;
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        *d = MUL4_SYM(c, *s);
+                        d++;
+                        s++;
+                     });
 }
 
 static void
 _op_copy_p_caa_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
-   DATA32 *e = d + l;
+   DATA32 *e;
    c = 1 + (c >> 24);
-   while (d < e) {
-       *d = MUL_256(c, *s);
-       d++;
-       s++;
-     }
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        *d = MUL_256(c, *s);
+                        d++;
+                        s++;
+                     });
 }
 
 
@@ -70,12 +72,12 @@ init_copy_pixel_color_span_funcs_c(void)
 #ifdef BUILD_C
 static void
 _op_copy_pt_p_c_dp(DATA32 s, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) {
-       *d = MUL4_SYM(c, s);
+   *d = MUL4_SYM(c, s);
 }
 
 static void
 _op_copy_pt_p_caa_dp(DATA32 s, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) {
-       *d = MUL_SYM(c >> 24, s);
+   *d = MUL_SYM(c >> 24, s);
 }
 
 
@@ -129,13 +131,14 @@ init_copy_pixel_color_pt_funcs_c(void)
 #ifdef BUILD_C
 static void
 _op_copy_rel_p_c_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
-   DATA32 *e = d + l;
-   while (d < e) {
-       DATA32 cs = MUL4_SYM(c, *s);
-       *d = MUL_SYM(*d >> 24, cs);
-       d++;
-       s++;
-     }
+   DATA32 *e;
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        DATA32 cs = MUL4_SYM(c, *s);
+                        *d = MUL_SYM(*d >> 24, cs);
+                        d++;
+                        s++;
+                     });
 }
 
 #define _op_copy_rel_pas_c_dp _op_copy_rel_p_c_dp
@@ -185,8 +188,8 @@ init_copy_rel_pixel_color_span_funcs_c(void)
 #ifdef BUILD_C
 static void
 _op_copy_rel_pt_p_c_dp(DATA32 s, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) {
-       s = MUL4_SYM(c, s);
-       *d = MUL_SYM(*d >> 24, s);
+   s = MUL4_SYM(c, s);
+   *d = MUL_SYM(*d >> 24, s);
 }
 
 #define _op_copy_rel_pt_pas_c_dp _op_copy_rel_pt_p_c_dp
index 1f988cf..5d90118 100644 (file)
@@ -4,23 +4,25 @@
 #ifdef BUILD_C
 static void 
 _op_copy_p_mas_dp(DATA32 *s, DATA8 *m, DATA32 c __UNUSED__, DATA32 *d, int l) {
-   DATA32 *e = d + l;
-   while (d < e) {
-       l = *m;
-       switch(l)
-         {
-           case 0:
-               break;
-           case 255:
-               *d = *s;
-               break;
-           default:
-               l++;
-               *d = INTERP_256(l, *s, *d);
-               break;
-         }
-       m++;  s++;  d++;
-     }
+   DATA32 *e;
+   int color;
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        color = *m;
+                        switch(color)
+                          {
+                          case 0:
+                             break;
+                          case 255:
+                             *d = *s;
+                             break;
+                          default:
+                             color++;
+                             *d = INTERP_256(color, *s, *d);
+                             break;
+                          }
+                        m++;  s++;  d++;
+                     });
 }
 
 
@@ -47,7 +49,7 @@ init_copy_pixel_mask_span_funcs_c(void)
 #ifdef BUILD_C
 static void
 _op_copy_pt_p_mas_dp(DATA32 s, DATA8 m, DATA32 c __UNUSED__, DATA32 *d) {
-       *d = INTERP_256(m + 1, s, *d);
+   *d = INTERP_256(m + 1, s, *d);
 }
 
 #define _op_copy_pt_pan_mas_dp _op_copy_pt_p_mas_dp
@@ -75,26 +77,28 @@ init_copy_pixel_mask_pt_funcs_c(void)
 /* copy_rel pixel x mask --> dst */
 
 #ifdef BUILD_C
-static void 
+static void
 _op_copy_rel_p_mas_dp(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
-   DATA32 *e = d + l;
-   while (d < e) {
-       l = *m;
-       switch(l)
-         {
-           case 0:
-               break;
-           case 255:
-               *d = MUL_SYM(*d >> 24, *s);
-               break;
-           default:
-               c = MUL_SYM(*d >> 24, *s);
-               l++;
-               *d = INTERP_256(l, c, *d);
-               break;
-         }
-       m++;  s++;  d++;
-     }
+   DATA32 *e;
+   int color;
+   UNROLL8_PLD_WHILE(d, l, e,
+                     {
+                        color = *m;
+                        switch(color)
+                          {
+                          case 0:
+                             break;
+                          case 255:
+                             *d = MUL_SYM(*d >> 24, *s);
+                             break;
+                          default:
+                             c = MUL_SYM(*d >> 24, *s);
+                             l++;
+                             *d = INTERP_256(l, c, *d);
+                             break;
+                          }
+                        m++;  s++;  d++;
+                     });
 }
 
 
@@ -119,10 +123,10 @@ init_copy_rel_pixel_mask_span_funcs_c(void)
 #endif
 
 #ifdef BUILD_C
-static void 
+static void
 _op_copy_rel_pt_p_mas_dp(DATA32 s, DATA8 m, DATA32 c, DATA32 *d) {
-       c = MUL_SYM(*d >> 24, s);
-       *d = INTERP_256(m + 1, c, *d);
+   c = MUL_SYM(*d >> 24, s);
+   *d = INTERP_256(m + 1, c, *d);
 }
 
 
index 636c454..9bbc030 100644 (file)
@@ -127,6 +127,95 @@ void *alloca (size_t);
 
 /*****************************************************************************/
 
+#if defined(__ARM_ARCH_3M__)
+# define __ARM_ARCH__ 40
+#endif
+#if defined(__ARM_ARCH_4__)
+# define __ARM_ARCH__ 40
+#endif
+#if defined(__ARM_ARCH_4T__)
+# define __ARM_ARCH__ 41
+#endif
+
+#if defined(__ARM_ARCH_5__)
+# define __ARM_ARCH__ 50
+#endif
+#if defined(__ARM_ARCH_5T__)
+# define __ARM_ARCH__ 51
+#endif
+#if defined(__ARM_ARCH_5E__)
+# define __ARM_ARCH__ 52
+#endif
+#if defined(__ARM_ARCH_5TE__)
+# define __ARM_ARCH__ 53
+#endif
+#if defined(__ARM_ARCH_5TEJ__)
+# define __ARM_ARCH__ 54
+#endif
+
+#if defined(__ARM_ARCH_6__)
+# define __ARM_ARCH__ 60
+#endif
+#if defined(__ARM_ARCH_6J__)
+# define __ARM_ARCH__ 61
+#endif
+#if defined(__ARM_ARCH_6K__)
+# define __ARM_ARCH__ 62
+#endif
+#if defined(__ARM_ARCH_6Z__)
+# define __ARM_ARCH__ 63
+#endif
+#if defined(__ARM_ARCH_6ZK__)
+# define __ARM_ARCH__ 64
+#endif
+#if defined(__ARM_ARCH_6T2__)
+# define __ARM_ARCH__ 65
+#endif
+
+#if defined(__ARM_ARCH_7__)
+# define __ARM_ARCH__ 70
+#endif
+#if defined(__ARM_ARCH_7A__)
+# define __ARM_ARCH__ 71
+#endif
+#if defined(__ARM_ARCH_7R__)
+# define __ARM_ARCH__ 72
+#endif
+#if defined(__ARM_ARCH_7M__)
+# define __ARM_ARCH__ 73
+#endif
+
+#if defined(__ARM_ARCH__) && (__ARM_ARCH__ >= 52)
+/* tested on ARMv6 (arm1136j-s), Nokia N800 CPU */
+#define pld(addr, off)                                                  \
+   __asm__("pld [%[address], %[offset]]"::                              \
+           [address] "r" (addr), [offset] "i" (off))
+#else
+#define pld(addr, off)
+#endif /* __ARMEL__ */
+
+/*****************************************************************************/
+
+#define UNROLL2(op...) op op
+#define UNROLL4(op...) UNROLL2(op) UNROLL2(op)
+#define UNROLL8(op...) UNROLL4(op) UNROLL4(op)
+#define UNROLL16(op...) UNROLL8(op) UNROLL8(op)
+
+#define UNROLL8_PLD_WHILE(start, size, end, op)         \
+    pld(start, 0);                                      \
+    end = start + (size & ~7);                          \
+    while (start < end)                                 \
+        {                                               \
+            pld(start, 32);                             \
+            UNROLL8(op);                                \
+        }                                               \
+    end += (size & 7);                                  \
+    pld(start, 32);                                     \
+    while (start <  end)                                \
+        {                                               \
+        op;                                             \
+        }
+
 /*****************************************************************************/
 
 typedef unsigned long long             DATA64;
@@ -277,46 +366,46 @@ struct _Evas_Cache_Target
 
 struct _Image_Entry
 {
-  EINA_INLIST;
-
-  Evas_Cache_Image      *cache;
-
-  const char            *cache_key;
-
-  const char            *file;
-  const char            *key;
-
-  Evas_Cache_Target     *targets;
-
-  time_t                 timestamp;
-  time_t                 laststat;
-
-  int                    references;
-
-  unsigned char          scale;
-
-  RGBA_Image_Loadopts    load_opts;
-  int                    space;
-  int                    w;
-  int                    h;
-
-  struct
-  {
-     int                w;
-     int                h;
-  } allocated;
-
-  struct
-  {
-     void              *module;
-     void              *loader;
-  } info;
+   EINA_INLIST;
 
+   Evas_Cache_Image      *cache;
+   
+   const char            *cache_key;
+   
+   const char            *file;
+   const char            *key;
+   
+   Evas_Cache_Target     *targets;
+   
+   time_t                 timestamp;
+   time_t                 laststat;
+   
+   int                    references;
+   
+   unsigned char          scale;
+   
+   RGBA_Image_Loadopts    load_opts;
+   int                    space;
+   int                    w;
+   int                    h;
+   
+   struct
+     {
+        int             w;
+        int             h;
+     } allocated;
+   
+   struct
+     {
+        void           *module;
+        void           *loader;
+     } info;
+   
 #ifdef BUILD_ASYNC_PRELOAD
-   pthread_mutex_t lock;
+   LK(lock);
 #endif
 
-  Image_Entry_Flags      flags;
+   Image_Entry_Flags      flags;
 };
 
 struct _Engine_Image_Entry
@@ -642,9 +731,8 @@ struct _RGBA_Font_Int
 
    Eina_Hash       *glyphs;
 
-#ifdef HAVE_PTHREAD
-   pthread_mutex_t  ft_mutex;
-#endif
+   LK(ft_mutex);
+   
    Eina_Hash       *kerning;
    Eina_Hash       *indexes;
 
index 422e090..3e08910 100644 (file)
@@ -32,78 +32,6 @@ extern "C" {
    ((((g) >> 2) & 0x3f) << 5) |                                         \
    (((b) >> 3) & 0x1f))
 
-#define UNROLL2(op...) op op
-#define UNROLL4(op...) UNROLL2(op) UNROLL2(op)
-#define UNROLL8(op...) UNROLL4(op) UNROLL4(op)
-#define UNROLL16(op...) UNROLL8(op) UNROLL8(op)
-
-#if defined(__ARM_ARCH_3M__)
-# define __ARM_ARCH__ 40
-#endif
-#if defined(__ARM_ARCH_4__)
-# define __ARM_ARCH__ 40
-#endif
-#if defined(__ARM_ARCH_4T__)
-# define __ARM_ARCH__ 41
-#endif
-
-#if defined(__ARM_ARCH_5__)
-# define __ARM_ARCH__ 50
-#endif
-#if defined(__ARM_ARCH_5T__)
-# define __ARM_ARCH__ 51
-#endif
-#if defined(__ARM_ARCH_5E__)
-# define __ARM_ARCH__ 52
-#endif
-#if defined(__ARM_ARCH_5TE__)
-# define __ARM_ARCH__ 53
-#endif
-#if defined(__ARM_ARCH_5TEJ__)
-# define __ARM_ARCH__ 54
-#endif
-
-#if defined(__ARM_ARCH_6__)
-# define __ARM_ARCH__ 60
-#endif
-#if defined(__ARM_ARCH_6J__)
-# define __ARM_ARCH__ 61
-#endif
-#if defined(__ARM_ARCH_6K__)
-# define __ARM_ARCH__ 62
-#endif
-#if defined(__ARM_ARCH_6Z__)
-# define __ARM_ARCH__ 63
-#endif
-#if defined(__ARM_ARCH_6ZK__)
-# define __ARM_ARCH__ 64
-#endif
-#if defined(__ARM_ARCH_6T2__)
-# define __ARM_ARCH__ 65
-#endif
-
-#if defined(__ARM_ARCH_7__)
-# define __ARM_ARCH__ 70
-#endif
-#if defined(__ARM_ARCH_7A__)
-# define __ARM_ARCH__ 71
-#endif
-#if defined(__ARM_ARCH_7R__)
-# define __ARM_ARCH__ 72
-#endif
-#if defined(__ARM_ARCH_7M__)
-# define __ARM_ARCH__ 73
-#endif
-
-#if defined(__ARM_ARCH__) && (__ARM_ARCH__ >= 52)
-/* tested on ARMv6 (arm1136j-s), Nokia N800 CPU */
-#define pld(addr, off)                                                  \
-   __asm__("pld [%[address], %[offset]]"::                              \
-           [address] "r" (addr), [offset] "i" (off))
-#else
-#define pld(addr, off)
-#endif /* __ARMEL__ */
-
 static inline int
 _calc_stride(int w)
 {