smooth map works (down to 1/2 size its good. its pretty much gl_linear, but
authorraster <raster@7cbeb6ba-43b4-40fd-8cce-4c39aea84d33>
Fri, 23 Oct 2009 13:17:22 +0000 (13:17 +0000)
committerraster <raster@7cbeb6ba-43b4-40fd-8cce-4c39aea84d33>
Fri, 23 Oct 2009 13:17:22 +0000 (13:17 +0000)
in software. but it looks gorgeous. rotations are groovey.

git-svn-id: http://svn.enlightenment.org/svn/e/trunk/evas@43226 7cbeb6ba-43b4-40fd-8cce-4c39aea84d33

src/lib/engines/common/evas_convert_main.c
src/lib/engines/common/evas_map_image.c

index 9d36900..18727ae 100644 (file)
@@ -10,7 +10,6 @@
 #include "evas_convert_grypal_6.h"
 #include "evas_convert_yuv.h"
 
-#ifdef USE_DITHER_44
 const DATA8 _evas_dither_44[4][4] =
 {
      { 0,  8,  2, 10},
@@ -18,9 +17,7 @@ const DATA8 _evas_dither_44[4][4] =
      { 3, 11,  1,  9},
      {15,  7, 13,  5}
 };
-#endif
 
-#ifdef USE_DITHER_128128
 const DATA8 _evas_dither_128128[128][128] =
 {
      { 0, 41, 23, 5, 17, 39, 7, 15, 62, 23, 40, 51, 31, 47, 9, 32, 52, 27, 57, 25, 6, 61, 27, 52, 37, 7, 40, 63, 18, 36, 10, 42, 25, 62, 45, 34, 20, 42, 37, 14, 35, 29, 50, 10, 61, 2, 40, 8, 37, 12, 58, 22, 5, 41, 10, 39, 0, 60, 11, 46, 2, 55, 38, 17, 36, 59, 13, 54, 37, 56, 8, 29, 16, 13, 63, 22, 41, 55, 7, 20, 49, 14, 23, 55, 37, 23, 19, 36, 15, 49, 23, 63, 30, 14, 38, 27, 53, 13, 22, 41, 19, 31, 7, 19, 50, 30, 49, 16, 3, 32, 56, 40, 29, 34, 8, 48, 19, 45, 4, 51, 12, 46, 35, 49, 16, 42, 12, 62 },
@@ -152,7 +149,6 @@ const DATA8 _evas_dither_128128[128][128] =
      { 19, 42, 9, 48, 2, 44, 11, 37, 48, 20, 33, 16, 55, 35, 49, 15, 37, 20, 59, 16, 53, 22, 56, 31, 50, 11, 34, 54, 16, 51, 4, 49, 33, 53, 21, 28, 56, 24, 31, 9, 52, 16, 48, 24, 44, 13, 51, 20, 31, 49, 18, 6, 34, 2, 44, 14, 47, 8, 15, 43, 13, 41, 33, 52, 20, 61, 7, 51, 34, 62, 4, 20, 36, 33, 43, 8, 46, 13, 53, 17, 45, 42, 9, 31, 52, 11, 30, 56, 13, 59, 17, 44, 27, 6, 62, 11, 43, 17, 49, 38, 26, 2, 16, 27, 58, 21, 54, 18, 26, 5, 35, 61, 43, 27, 7, 39, 14, 58, 37, 55, 20, 33, 13, 40, 62, 10, 55, 5 },
      { 51, 14, 61, 29, 59, 20, 55, 31, 0, 49, 11, 60, 3, 26, 22, 56, 0, 40, 12, 43, 41, 8, 36, 0, 17, 57, 24, 2, 46, 26, 61, 18, 0, 38, 12, 59, 6, 49, 3, 57, 19, 63, 5, 33, 18, 54, 28, 56, 0, 43, 26, 46, 63, 27, 56, 22, 27, 54, 38, 28, 63, 24, 10, 45, 0, 31, 42, 21, 12, 25, 44, 49, 59, 6, 26, 50, 3, 34, 27, 59, 0, 35, 62, 16, 4, 58, 47, 0, 43, 24, 37, 2, 54, 20, 46, 31, 0, 56, 34, 5, 55, 45, 60, 37, 0, 40, 10, 38, 63, 46, 15, 20, 0, 53, 21, 62, 30, 11, 24, 27, 40, 0, 57, 26, 3, 45, 27, 35 }
 };
-#endif
 
 EAPI void
 evas_common_convert_init(void)
index 16e597c..e0f9ad8 100644 (file)
@@ -17,6 +17,7 @@ typedef struct _Span Span;
 struct _Span
 {
    int x1, x2;
+   FPc o1, o2;
    FPc  u[2], v[2];
 };
 
@@ -31,6 +32,9 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
                                RGBA_Map_Point *p, 
                                int smooth, int level);
 
+//extern const DATA8 _evas_dither_44[4][4];
+//extern const DATA8 _evas_dither_128128[128][128];
+  
 static FPc
 _interp(int x1, int x2, int p, FPc u1, FPc u2)
 {
@@ -84,75 +88,22 @@ evas_common_map4_rgba(RGBA_Image *src, RGBA_Image *dst,
    dc->clip.use = c; dc->clip.x = cx; dc->clip.y = cy; dc->clip.w = cw; dc->clip.h = ch;
 }
 
+// 12.63 % of time - this can improve
 static void
-evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
-                               RGBA_Draw_Context *dc,
-                               RGBA_Map_Point *p, 
-                               int smooth, int level)
+_calc_spans(RGBA_Map_Point *p, Line *spans, int ystart, int yend, int cx, int cy, int cw, int ch)
 {
-   int i;
-   int c, cx, cy, cw, ch;
-   int ytop, ybottom, ystart, yend, y, yp, sw, shp, swp;
+   int i, y, yp;
    int py[4];
-   Line *spans = NULL;
-   DATA32 *buf, *sp;
-   RGBA_Gfx_Func func;
-   
-   c = dc->clip.use; cx = dc->clip.x; cy = dc->clip.y; cw = dc->clip.w; ch = dc->clip.h;
-   if (!c)
-     {
-        cx = 0;
-        cy = 0;
-        cw = dst->cache_entry.w;
-        ch = dst->cache_entry.h;
-     }
-   
-   ytop = p[0].y;
-   for (i = 1; i < 4; i++)
-     {
-        if (p[i].y < ytop) ytop = p[i].y;
-     }
-   ybottom = p[0].y;
-   for (i = 1; i < 4; i++)
-     {
-        if (p[i].y > ybottom) ybottom = p[i].y;
-     }
-   
-   ytop = ytop >> FP;
-   ybottom = ybottom >> FP;
-   
-   if ((ytop >= (cy + ch)) || (ybottom < cy)) return;
-   
-   if (ytop < cy) ystart = cy;
-   else ystart = ytop;
-   if (ybottom >= (cy + ch)) yend = (cy + ch) - 1;
-   else yend = ybottom;
-
-   sp = src->image.data;
-   sw = src->cache_entry.w;
-   swp = sw << FP;
-   shp = src->cache_entry.h << FP;
+   int edge[4][4], edge_num, swapped, order[4];
+   FPc uv[4][2], u, v, x, h, t;
    
-#if 1
-   // maybe faster on x86?
+#if 1 // maybe faster on x86?
    for (i = 0; i < 4; i++) py[i] = p[i].y >> FP;
 # define PY(x) (py[x])   
 #else
 # define PY(x) (p[x].y >> FP) 
 #endif
-   spans = alloca((yend - ystart + 1) * sizeof(Line));
-   memset(spans, 0, (yend - ystart + 1) * sizeof(Line));
    
-   for (i = 0; i < 4; i++)
-     {
-        if (p[i].u < 0) p[i].u = 0;
-        else if (p[i].u > (src->cache_entry.w << FP))
-          p[i].u = src->cache_entry.w << FP;
-        
-        if (p[i].v < 0) p[i].v = 0;
-        else if (p[i].v > (src->cache_entry.h << FP))
-          p[i].v = src->cache_entry.h << FP;
-     }
    if ((PY(0) == PY(1)) && (PY(0) == PY(2)) && (PY(0) == PY(3)))
      {
         // all on one line. eg:
@@ -165,8 +116,6 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
      {
         for (y = ystart; y <= yend; y++)
           {
-             int edge[4][3], edge_num, swapped, order[4];
-             FPc uv[4][2], u, v, x, h, t;
         
              yp = y - ystart;
              edge_num = 0;
@@ -225,13 +174,12 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
              // calculate line x points for each edge
              for (i = 0; i < edge_num; i++)
                {
-                  int e1 = edge[i][0], e2 = edge[i][1];
+                  int e1 = edge[i][0];
+                  int e2 = edge[i][1];
 
                   h = (p[e2].y - p[e1].y) >> FP; // height of edge
                   t = (((y << FP) + (FP1 - 1)) - p[e1].y) >> FP;
                   x = p[e2].x - p[e1].x;
-//                  printf("   x = %i, p1 = %i, p2 = %i\n", x, p[e1].x, p[e2].x);
-//                  printf("   + %i\n", ((x * t) / h));
                   x = p[e1].x + ((x * t) / h);
                   
                   u = p[e2].u - p[e1].u;
@@ -243,10 +191,7 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
                   uv[i][1] = v;
                   uv[i][0] = u;
                   edge[i][2] = x >> FP;
-//                  printf("%i | %3i: %3i - %i / %i [%i->%i]\n", 
-//                         y, i, x >> FP, 
-//                         t, h >> FP, 
-//                         p[e1].x >> FP, p[e2].x >> FP);
+                  edge[i][3] = x;
                   // also fill in order
                   order[i] = i;
                }
@@ -270,19 +215,21 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
                {
                   i = 0;
                   spans[yp].span[i].x1 = edge[order[0]][2];
+                  spans[yp].span[i].o1 = edge[order[0]][3];
                   spans[yp].span[i].u[0] = uv[order[0]][0];
                   spans[yp].span[i].v[0] = uv[order[0]][1];
                   spans[yp].span[i].x2 = edge[order[1]][2];
+                  spans[yp].span[i].o2 = edge[order[1]][3];
                   spans[yp].span[i].u[1] = uv[order[1]][0];
                   spans[yp].span[i].v[1] = uv[order[1]][1];
                   if ((spans[yp].span[i].x1 >= (cx + cw)) ||
-                      (spans[yp].span[i].x2 < 0))
+                      (spans[yp].span[i].x2 < cx))
                     {
                        spans[yp].span[i].x1 = -1;
                     }
                   else
                     {
-                       if (spans[yp].span[i].x1 < 0)
+                       if (spans[yp].span[i].x1 < cx)
                          {
                             spans[yp].span[i].u[0] = 
                               _interp(spans[yp].span[i].x1,
@@ -294,7 +241,8 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
                                       spans[yp].span[i].x2, 0, 
                                       spans[yp].span[i].v[0],
                                       spans[yp].span[i].v[1]);
-                            spans[yp].span[i].x1 = 0;
+                            spans[yp].span[i].x1 = cx;
+                            spans[yp].span[i].o1 = cx << FP;
                          }
                        if (spans[yp].span[i].x2 >= (cx + cw))
                          {
@@ -309,6 +257,7 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
                                       spans[yp].span[i].v[0],
                                       spans[yp].span[i].v[1]);
                             spans[yp].span[i].x2 = (cx + cw);
+                            spans[yp].span[i].o2 = (cx + cw) << FP;
                          }
                        i++;
                        spans[yp].span[i].x1 = -1;
@@ -324,13 +273,13 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
                   spans[yp].span[i].u[1] = uv[order[1]][0];
                   spans[yp].span[i].v[1] = uv[order[1]][1];
                   if ((spans[yp].span[i].x1 >= (cx + cw)) ||
-                      (spans[yp].span[i].x2 < 0))
+                      (spans[yp].span[i].x2 < cx))
                     {
                        spans[yp].span[i].x1 = -1;
                     }
                   else
                     {
-                       if (spans[yp].span[i].x1 < 0)
+                       if (spans[yp].span[i].x1 < cx)
                          {
                             spans[yp].span[i].u[0] = 
                               _interp(spans[yp].span[i].x1,
@@ -342,7 +291,8 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
                                       spans[yp].span[i].x2, 0, 
                                       spans[yp].span[i].v[0],
                                       spans[yp].span[i].v[1]);
-                            spans[yp].span[i].x1 = 0;
+                            spans[yp].span[i].x1 = cx;
+                            spans[yp].span[i].o1 = cx << FP;
                          }
                        if (spans[yp].span[i].x2 >= (cx + cw))
                          {
@@ -357,6 +307,7 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
                                       spans[yp].span[i].v[0],
                                       spans[yp].span[i].v[1]);
                             spans[yp].span[i].x2 = (cx + cw);
+                            spans[yp].span[i].o2 = (cx + cw) << FP;
                          }
                        i++;
                     }
@@ -367,13 +318,13 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
                   spans[yp].span[i].u[1] = uv[order[3]][0];
                   spans[yp].span[i].v[1] = uv[order[3]][1];
                   if ((spans[yp].span[i].x1 >= (cx + cw)) ||
-                      (spans[yp].span[i].x2 < 0))
+                      (spans[yp].span[i].x2 < cx))
                     {
                        spans[yp].span[i].x1 = -1;
                     }
                   else
                     {
-                       int l = 0;
+                       int l = cx;
                        
                        if (i > 0) l = spans[yp].span[i - 1].x2;
                        if (spans[yp].span[i].x1 < l)
@@ -388,7 +339,8 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
                                       spans[yp].span[i].x2, l, 
                                       spans[yp].span[i].v[0],
                                       spans[yp].span[i].v[1]);
-                            spans[yp].span[i].x1 = 0;
+                            spans[yp].span[i].x1 = l;
+                            spans[yp].span[i].o1 = l << FP;
                          }
                        if (spans[yp].span[i].x2 >= (cx + cw))
                          {
@@ -403,6 +355,7 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
                                       spans[yp].span[i].v[0],
                                       spans[yp].span[i].v[1]);
                             spans[yp].span[i].x2 = (cx + cw);
+                            spans[yp].span[i].o2 = (cx + cw) << FP;
                          }
                     }
                }
@@ -412,43 +365,197 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
                }
           }
      }
+}
+
+// 66.74 % of time
+static void
+evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
+                               RGBA_Draw_Context *dc,
+                               RGBA_Map_Point *p, 
+                               int smooth, int level)
+{
+   int i;
+   int c, cx, cy, cw, ch;
+   int ytop, ybottom, ystart, yend, y, yp, sw, sh, shp, swp, direct;
+   Line *spans;
+   DATA32 *buf, *sp;
+   RGBA_Gfx_Func func;
+
+   // get the clip
+   c = dc->clip.use; cx = dc->clip.x; cy = dc->clip.y; cw = dc->clip.w; ch = dc->clip.h;
+   if (!c)
+     {
+        cx = 0;
+        cy = 0;
+        cw = dst->cache_entry.w;
+        ch = dst->cache_entry.h;
+     }
+   
+   // find y yop line and y bottom line
+   ytop = p[0].y;
+   for (i = 1; i < 4; i++)
+     {
+        if (p[i].y < ytop) ytop = p[i].y;
+     }
+   ybottom = p[0].y;
+   for (i = 1; i < 4; i++)
+     {
+        if (p[i].y > ybottom) ybottom = p[i].y;
+     }
+   
+   // convert to screen space from fixed point
+   ytop = ytop >> FP;
+   ybottom = ybottom >> FP;
+   
+   // if its outside the clip vertical bounds - don't bother
+   if ((ytop >= (cy + ch)) || (ybottom < cy)) return;
+   
+   // limit to the clip vertical bounds
+   if (ytop < cy) ystart = cy;
+   else ystart = ytop;
+   if (ybottom >= (cy + ch)) yend = (cy + ch) - 1;
+   else yend = ybottom;
+
+   // get some source image information
+   sp = src->image.data;
+   sw = src->cache_entry.w;
+   swp = sw << (FP + FPI);
+   shp = src->cache_entry.h << (FP + FPI);
+
+   // limit u,v coords of points to be within the source image
+   for (i = 0; i < 4; i++)
+     {
+        if (p[i].u < 0) p[i].u = 0;
+        else if (p[i].u > (src->cache_entry.w << FP))
+          p[i].u = src->cache_entry.w << FP;
+        
+        if (p[i].v < 0) p[i].v = 0;
+        else if (p[i].v > (src->cache_entry.h << FP))
+          p[i].v = src->cache_entry.h << FP;
+     }
+   
+   // allocate some spans to hold out span list
+   spans = alloca((yend - ystart + 1) * sizeof(Line));
+   if (!spans) return;
+   memset(spans, 0, (yend - ystart + 1) * sizeof(Line));
 
-   if ((!src->cache_entry.flags.alpha) &&
-       (!dst->cache_entry.flags.alpha) &&
+   // calculate the spans list
+   _calc_spans(p, spans, ystart, yend, cx, cy, cw, ch);
+   
+   // walk through spans and render
+   
+   // if operation is solid, bypass buf and draw func and draw direct to dst
+   direct = 0;
+   if ((!src->cache_entry.flags.alpha) && (!dst->cache_entry.flags.alpha) &&
        (!dc->mul.use))
+     direct = 1;
+   else
+     {
+        buf = alloca(cw * sizeof(DATA32));
+        if (!buf) return;
+        
+        if (dc->mul.use)
+          func = evas_common_gfx_func_composite_pixel_color_span_get(src, dc->mul.col, dst, cw, dc->render_op);
+        else
+          func = evas_common_gfx_func_composite_pixel_span_get(src, dst, cw, dc->render_op);
+     }
+   if (smooth)
      {
         for (y = ystart; y <= yend; y++)
           {
-             int x, w, ww, dx, dy, sx, sy;
-             FPc u, v, ud, vd, up, vp;
-             DATA32 *d, *dptr, *s, *so[4], val1, val2;
+             int x, w, ww;
+             FPc u, v, ud, vd, dv;
+             DATA32 *d, *s, *so[4], val1, val2;
              yp = y - ystart;
              
              for (i = 0; i < 2; i++)
                {
                   if (spans[yp].span[i].x1 >= 0)
                     {
+                       long long tl;
+                       
                        x = spans[yp].span[i].x1;
                        w = (spans[yp].span[i].x2 - x);
                        
                        if (w <= 0) continue;
+                       
+                       dv = (spans[yp].span[i].o2 - spans[yp].span[i].o1);
+                       
                        ww = w;
-                       d = dst->image.data + (y * dst->cache_entry.w) + x;
                        u = spans[yp].span[i].u[0] << FPI;
                        v = spans[yp].span[i].v[0] << FPI;
                        ud = ((spans[yp].span[i].u[1] << FPI) - u) / w;
                        vd = ((spans[yp].span[i].v[1] << FPI) - v) / w;
+                       tl = (long long)ud * (w << FP);
+                       tl = tl / dv;
+                       ud = tl;
+                       u -= (ud * (spans[yp].span[i].o1 - (spans[yp].span[i].x1 << FP))) / FP1;
+                       
+                       tl = (long long)vd * (w << FP);
+                       tl = tl / dv;
+                       vd = tl;
+                       v -= (vd * (spans[yp].span[i].o1 - (spans[yp].span[i].x1 << FP))) / FP1;
+                       
                        if (ud < 0) u -= 1;
                        if (vd < 0) v -= 1;
+                       
+                       if (direct)
+                         d = dst->image.data + (y * dst->cache_entry.w) + x;
+                       else
+                         d = buf;
+                       
                        while (ww > 0)
                          {
-                            s = sp + ((v >> (FP + FPI)) * sw) + 
-                              (u >> (FP + FPI));
-                            *d++ = *s;
+                            FPc u1, v1, u2, v2;
+                            FPc rv, ru;
+                            DATA32 vala, valb, val1, val2;
+                            
+                            u1 = u;
+                            if (u1 < 0) u1 = 0;
+                            else if (u1 >= swp) u1 = swp - 1;
+                            
+                            v1 = v;
+                            if (v1 < 0) v1 = 0;
+                            else if (v1 >= shp) v1 = shp - 1;
+                            
+                            u2 = u1 + FPFPI1;
+                            if (u2 >= swp) u2 = swp - 1;
+                            
+                            v2 = v1 + FPFPI1;
+                            if (v2 >= shp) v2 = shp - 1;
+                            
+                            ru = (u >> (FP + FPI - 8)) & 0xff;
+                            rv = (v >> (FP + FPI - 8)) & 0xff;
+                            
+                            s = sp + ((v1 >> (FP + FPI)) * sw) + 
+                              (u1 >> (FP + FPI));
+                            val1 = *s;
+                            s = sp + ((v1 >> (FP + FPI)) * sw) + 
+                              (u2 >> (FP + FPI));
+                            val2 = *s;
+                            vala = INTERP_256(ru, val2, val1);
+                            
+                            s = sp + ((v2 >> (FP + FPI)) * sw) + 
+                              (u1 >> (FP + FPI));
+                            val1 = *s;
+                            s = sp + ((v2 >> (FP + FPI)) * sw) + 
+                              (u2 >> (FP + FPI));
+                            val2 = *s;
+                            valb = INTERP_256(ru, val2, val1);
+                            
+                            *d++ = INTERP_256(rv, valb, vala);
+                            
                             u += ud;
                             v += vd;
                             ww--;
                          }
+                       
+                       if (!direct)
+                         {
+                            d = dst->image.data;
+                            d += (y * dst->cache_entry.w) + x;
+                            func(buf, NULL, dc->mul.col, d, w);
+                         }
                     }
                   else break;
                }
@@ -456,18 +563,11 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
      }
    else
      {
-        buf = alloca(cw * sizeof(DATA32));
-        
-        if (dc->mul.use)
-          func = evas_common_gfx_func_composite_pixel_color_span_get(src, dc->mul.col, dst, cw, dc->render_op);
-        else
-          func = evas_common_gfx_func_composite_pixel_span_get(src, dst, cw, dc->render_op);
-   
         for (y = ystart; y <= yend; y++)
           {
-             int x, w, ww, dx, dy, sx, sy;
-             FPc u, v, ud, vd, up, vp;
-             DATA32 *d, *dptr, *s, *so[4], val1, val2;
+             int x, w, ww;
+             FPc u, v, ud, vd;
+             DATA32 *d, *s;
              yp = y - ystart;
              
              for (i = 0; i < 2; i++)
@@ -479,13 +579,18 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
                        
                        if (w <= 0) continue;
                        ww = w;
-                       d = buf;
                        u = spans[yp].span[i].u[0] << FPI;
                        v = spans[yp].span[i].v[0] << FPI;
                        ud = ((spans[yp].span[i].u[1] << FPI) - u) / w;
                        vd = ((spans[yp].span[i].v[1] << FPI) - v) / w;
                        if (ud < 0) u -= 1;
                        if (vd < 0) v -= 1;
+                       
+                       if (direct)
+                         d = dst->image.data + (y * dst->cache_entry.w) + x;
+                       else
+                         d = buf;
+                       
                        while (ww > 0)
                          {
                             s = sp + ((v >> (FP + FPI)) * sw) + 
@@ -495,9 +600,13 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
                             v += vd;
                             ww--;
                          }
-                       dptr = dst->image.data;
-                       dptr += (y * dst->cache_entry.w) + x;
-                       func(buf, NULL, dc->mul.col, dptr, w);
+                       
+                       if (!direct)
+                         {
+                            d = dst->image.data;
+                            d += (y * dst->cache_entry.w) + x;
+                            func(buf, NULL, dc->mul.col, d, w);
+                         }
                     }
                   else break;
                }