#include "evas_convert_grypal_6.h"
#include "evas_convert_yuv.h"
-#ifdef USE_DITHER_44
const DATA8 _evas_dither_44[4][4] =
{
{ 0, 8, 2, 10},
{ 3, 11, 1, 9},
{15, 7, 13, 5}
};
-#endif
-#ifdef USE_DITHER_128128
const DATA8 _evas_dither_128128[128][128] =
{
{ 0, 41, 23, 5, 17, 39, 7, 15, 62, 23, 40, 51, 31, 47, 9, 32, 52, 27, 57, 25, 6, 61, 27, 52, 37, 7, 40, 63, 18, 36, 10, 42, 25, 62, 45, 34, 20, 42, 37, 14, 35, 29, 50, 10, 61, 2, 40, 8, 37, 12, 58, 22, 5, 41, 10, 39, 0, 60, 11, 46, 2, 55, 38, 17, 36, 59, 13, 54, 37, 56, 8, 29, 16, 13, 63, 22, 41, 55, 7, 20, 49, 14, 23, 55, 37, 23, 19, 36, 15, 49, 23, 63, 30, 14, 38, 27, 53, 13, 22, 41, 19, 31, 7, 19, 50, 30, 49, 16, 3, 32, 56, 40, 29, 34, 8, 48, 19, 45, 4, 51, 12, 46, 35, 49, 16, 42, 12, 62 },
{ 19, 42, 9, 48, 2, 44, 11, 37, 48, 20, 33, 16, 55, 35, 49, 15, 37, 20, 59, 16, 53, 22, 56, 31, 50, 11, 34, 54, 16, 51, 4, 49, 33, 53, 21, 28, 56, 24, 31, 9, 52, 16, 48, 24, 44, 13, 51, 20, 31, 49, 18, 6, 34, 2, 44, 14, 47, 8, 15, 43, 13, 41, 33, 52, 20, 61, 7, 51, 34, 62, 4, 20, 36, 33, 43, 8, 46, 13, 53, 17, 45, 42, 9, 31, 52, 11, 30, 56, 13, 59, 17, 44, 27, 6, 62, 11, 43, 17, 49, 38, 26, 2, 16, 27, 58, 21, 54, 18, 26, 5, 35, 61, 43, 27, 7, 39, 14, 58, 37, 55, 20, 33, 13, 40, 62, 10, 55, 5 },
{ 51, 14, 61, 29, 59, 20, 55, 31, 0, 49, 11, 60, 3, 26, 22, 56, 0, 40, 12, 43, 41, 8, 36, 0, 17, 57, 24, 2, 46, 26, 61, 18, 0, 38, 12, 59, 6, 49, 3, 57, 19, 63, 5, 33, 18, 54, 28, 56, 0, 43, 26, 46, 63, 27, 56, 22, 27, 54, 38, 28, 63, 24, 10, 45, 0, 31, 42, 21, 12, 25, 44, 49, 59, 6, 26, 50, 3, 34, 27, 59, 0, 35, 62, 16, 4, 58, 47, 0, 43, 24, 37, 2, 54, 20, 46, 31, 0, 56, 34, 5, 55, 45, 60, 37, 0, 40, 10, 38, 63, 46, 15, 20, 0, 53, 21, 62, 30, 11, 24, 27, 40, 0, 57, 26, 3, 45, 27, 35 }
};
-#endif
EAPI void
evas_common_convert_init(void)
struct _Span
{
int x1, x2;
+ FPc o1, o2;
FPc u[2], v[2];
};
RGBA_Map_Point *p,
int smooth, int level);
+//extern const DATA8 _evas_dither_44[4][4];
+//extern const DATA8 _evas_dither_128128[128][128];
+
static FPc
_interp(int x1, int x2, int p, FPc u1, FPc u2)
{
dc->clip.use = c; dc->clip.x = cx; dc->clip.y = cy; dc->clip.w = cw; dc->clip.h = ch;
}
+// 12.63 % of time - this can improve
static void
-evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
- RGBA_Draw_Context *dc,
- RGBA_Map_Point *p,
- int smooth, int level)
+_calc_spans(RGBA_Map_Point *p, Line *spans, int ystart, int yend, int cx, int cy, int cw, int ch)
{
- int i;
- int c, cx, cy, cw, ch;
- int ytop, ybottom, ystart, yend, y, yp, sw, shp, swp;
+ int i, y, yp;
int py[4];
- Line *spans = NULL;
- DATA32 *buf, *sp;
- RGBA_Gfx_Func func;
-
- c = dc->clip.use; cx = dc->clip.x; cy = dc->clip.y; cw = dc->clip.w; ch = dc->clip.h;
- if (!c)
- {
- cx = 0;
- cy = 0;
- cw = dst->cache_entry.w;
- ch = dst->cache_entry.h;
- }
-
- ytop = p[0].y;
- for (i = 1; i < 4; i++)
- {
- if (p[i].y < ytop) ytop = p[i].y;
- }
- ybottom = p[0].y;
- for (i = 1; i < 4; i++)
- {
- if (p[i].y > ybottom) ybottom = p[i].y;
- }
-
- ytop = ytop >> FP;
- ybottom = ybottom >> FP;
-
- if ((ytop >= (cy + ch)) || (ybottom < cy)) return;
-
- if (ytop < cy) ystart = cy;
- else ystart = ytop;
- if (ybottom >= (cy + ch)) yend = (cy + ch) - 1;
- else yend = ybottom;
-
- sp = src->image.data;
- sw = src->cache_entry.w;
- swp = sw << FP;
- shp = src->cache_entry.h << FP;
+ int edge[4][4], edge_num, swapped, order[4];
+ FPc uv[4][2], u, v, x, h, t;
-#if 1
- // maybe faster on x86?
+#if 1 // maybe faster on x86?
for (i = 0; i < 4; i++) py[i] = p[i].y >> FP;
# define PY(x) (py[x])
#else
# define PY(x) (p[x].y >> FP)
#endif
- spans = alloca((yend - ystart + 1) * sizeof(Line));
- memset(spans, 0, (yend - ystart + 1) * sizeof(Line));
- for (i = 0; i < 4; i++)
- {
- if (p[i].u < 0) p[i].u = 0;
- else if (p[i].u > (src->cache_entry.w << FP))
- p[i].u = src->cache_entry.w << FP;
-
- if (p[i].v < 0) p[i].v = 0;
- else if (p[i].v > (src->cache_entry.h << FP))
- p[i].v = src->cache_entry.h << FP;
- }
if ((PY(0) == PY(1)) && (PY(0) == PY(2)) && (PY(0) == PY(3)))
{
// all on one line. eg:
{
for (y = ystart; y <= yend; y++)
{
- int edge[4][3], edge_num, swapped, order[4];
- FPc uv[4][2], u, v, x, h, t;
yp = y - ystart;
edge_num = 0;
// calculate line x points for each edge
for (i = 0; i < edge_num; i++)
{
- int e1 = edge[i][0], e2 = edge[i][1];
+ int e1 = edge[i][0];
+ int e2 = edge[i][1];
h = (p[e2].y - p[e1].y) >> FP; // height of edge
t = (((y << FP) + (FP1 - 1)) - p[e1].y) >> FP;
x = p[e2].x - p[e1].x;
-// printf(" x = %i, p1 = %i, p2 = %i\n", x, p[e1].x, p[e2].x);
-// printf(" + %i\n", ((x * t) / h));
x = p[e1].x + ((x * t) / h);
u = p[e2].u - p[e1].u;
uv[i][1] = v;
uv[i][0] = u;
edge[i][2] = x >> FP;
-// printf("%i | %3i: %3i - %i / %i [%i->%i]\n",
-// y, i, x >> FP,
-// t, h >> FP,
-// p[e1].x >> FP, p[e2].x >> FP);
+ edge[i][3] = x;
// also fill in order
order[i] = i;
}
{
i = 0;
spans[yp].span[i].x1 = edge[order[0]][2];
+ spans[yp].span[i].o1 = edge[order[0]][3];
spans[yp].span[i].u[0] = uv[order[0]][0];
spans[yp].span[i].v[0] = uv[order[0]][1];
spans[yp].span[i].x2 = edge[order[1]][2];
+ spans[yp].span[i].o2 = edge[order[1]][3];
spans[yp].span[i].u[1] = uv[order[1]][0];
spans[yp].span[i].v[1] = uv[order[1]][1];
if ((spans[yp].span[i].x1 >= (cx + cw)) ||
- (spans[yp].span[i].x2 < 0))
+ (spans[yp].span[i].x2 < cx))
{
spans[yp].span[i].x1 = -1;
}
else
{
- if (spans[yp].span[i].x1 < 0)
+ if (spans[yp].span[i].x1 < cx)
{
spans[yp].span[i].u[0] =
_interp(spans[yp].span[i].x1,
spans[yp].span[i].x2, 0,
spans[yp].span[i].v[0],
spans[yp].span[i].v[1]);
- spans[yp].span[i].x1 = 0;
+ spans[yp].span[i].x1 = cx;
+ spans[yp].span[i].o1 = cx << FP;
}
if (spans[yp].span[i].x2 >= (cx + cw))
{
spans[yp].span[i].v[0],
spans[yp].span[i].v[1]);
spans[yp].span[i].x2 = (cx + cw);
+ spans[yp].span[i].o2 = (cx + cw) << FP;
}
i++;
spans[yp].span[i].x1 = -1;
spans[yp].span[i].u[1] = uv[order[1]][0];
spans[yp].span[i].v[1] = uv[order[1]][1];
if ((spans[yp].span[i].x1 >= (cx + cw)) ||
- (spans[yp].span[i].x2 < 0))
+ (spans[yp].span[i].x2 < cx))
{
spans[yp].span[i].x1 = -1;
}
else
{
- if (spans[yp].span[i].x1 < 0)
+ if (spans[yp].span[i].x1 < cx)
{
spans[yp].span[i].u[0] =
_interp(spans[yp].span[i].x1,
spans[yp].span[i].x2, 0,
spans[yp].span[i].v[0],
spans[yp].span[i].v[1]);
- spans[yp].span[i].x1 = 0;
+ spans[yp].span[i].x1 = cx;
+ spans[yp].span[i].o1 = cx << FP;
}
if (spans[yp].span[i].x2 >= (cx + cw))
{
spans[yp].span[i].v[0],
spans[yp].span[i].v[1]);
spans[yp].span[i].x2 = (cx + cw);
+ spans[yp].span[i].o2 = (cx + cw) << FP;
}
i++;
}
spans[yp].span[i].u[1] = uv[order[3]][0];
spans[yp].span[i].v[1] = uv[order[3]][1];
if ((spans[yp].span[i].x1 >= (cx + cw)) ||
- (spans[yp].span[i].x2 < 0))
+ (spans[yp].span[i].x2 < cx))
{
spans[yp].span[i].x1 = -1;
}
else
{
- int l = 0;
+ int l = cx;
if (i > 0) l = spans[yp].span[i - 1].x2;
if (spans[yp].span[i].x1 < l)
spans[yp].span[i].x2, l,
spans[yp].span[i].v[0],
spans[yp].span[i].v[1]);
- spans[yp].span[i].x1 = 0;
+ spans[yp].span[i].x1 = l;
+ spans[yp].span[i].o1 = l << FP;
}
if (spans[yp].span[i].x2 >= (cx + cw))
{
spans[yp].span[i].v[0],
spans[yp].span[i].v[1]);
spans[yp].span[i].x2 = (cx + cw);
+ spans[yp].span[i].o2 = (cx + cw) << FP;
}
}
}
}
}
}
+}
+
+// 66.74 % of time
+static void
+evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
+ RGBA_Draw_Context *dc,
+ RGBA_Map_Point *p,
+ int smooth, int level)
+{
+ int i;
+ int c, cx, cy, cw, ch;
+ int ytop, ybottom, ystart, yend, y, yp, sw, sh, shp, swp, direct;
+ Line *spans;
+ DATA32 *buf, *sp;
+ RGBA_Gfx_Func func;
+
+ // get the clip
+ c = dc->clip.use; cx = dc->clip.x; cy = dc->clip.y; cw = dc->clip.w; ch = dc->clip.h;
+ if (!c)
+ {
+ cx = 0;
+ cy = 0;
+ cw = dst->cache_entry.w;
+ ch = dst->cache_entry.h;
+ }
+
+ // find y yop line and y bottom line
+ ytop = p[0].y;
+ for (i = 1; i < 4; i++)
+ {
+ if (p[i].y < ytop) ytop = p[i].y;
+ }
+ ybottom = p[0].y;
+ for (i = 1; i < 4; i++)
+ {
+ if (p[i].y > ybottom) ybottom = p[i].y;
+ }
+
+ // convert to screen space from fixed point
+ ytop = ytop >> FP;
+ ybottom = ybottom >> FP;
+
+ // if its outside the clip vertical bounds - don't bother
+ if ((ytop >= (cy + ch)) || (ybottom < cy)) return;
+
+ // limit to the clip vertical bounds
+ if (ytop < cy) ystart = cy;
+ else ystart = ytop;
+ if (ybottom >= (cy + ch)) yend = (cy + ch) - 1;
+ else yend = ybottom;
+
+ // get some source image information
+ sp = src->image.data;
+ sw = src->cache_entry.w;
+ swp = sw << (FP + FPI);
+ shp = src->cache_entry.h << (FP + FPI);
+
+ // limit u,v coords of points to be within the source image
+ for (i = 0; i < 4; i++)
+ {
+ if (p[i].u < 0) p[i].u = 0;
+ else if (p[i].u > (src->cache_entry.w << FP))
+ p[i].u = src->cache_entry.w << FP;
+
+ if (p[i].v < 0) p[i].v = 0;
+ else if (p[i].v > (src->cache_entry.h << FP))
+ p[i].v = src->cache_entry.h << FP;
+ }
+
+ // allocate some spans to hold out span list
+ spans = alloca((yend - ystart + 1) * sizeof(Line));
+ if (!spans) return;
+ memset(spans, 0, (yend - ystart + 1) * sizeof(Line));
- if ((!src->cache_entry.flags.alpha) &&
- (!dst->cache_entry.flags.alpha) &&
+ // calculate the spans list
+ _calc_spans(p, spans, ystart, yend, cx, cy, cw, ch);
+
+ // walk through spans and render
+
+ // if operation is solid, bypass buf and draw func and draw direct to dst
+ direct = 0;
+ if ((!src->cache_entry.flags.alpha) && (!dst->cache_entry.flags.alpha) &&
(!dc->mul.use))
+ direct = 1;
+ else
+ {
+ buf = alloca(cw * sizeof(DATA32));
+ if (!buf) return;
+
+ if (dc->mul.use)
+ func = evas_common_gfx_func_composite_pixel_color_span_get(src, dc->mul.col, dst, cw, dc->render_op);
+ else
+ func = evas_common_gfx_func_composite_pixel_span_get(src, dst, cw, dc->render_op);
+ }
+ if (smooth)
{
for (y = ystart; y <= yend; y++)
{
- int x, w, ww, dx, dy, sx, sy;
- FPc u, v, ud, vd, up, vp;
- DATA32 *d, *dptr, *s, *so[4], val1, val2;
+ int x, w, ww;
+ FPc u, v, ud, vd, dv;
+ DATA32 *d, *s, *so[4], val1, val2;
yp = y - ystart;
for (i = 0; i < 2; i++)
{
if (spans[yp].span[i].x1 >= 0)
{
+ long long tl;
+
x = spans[yp].span[i].x1;
w = (spans[yp].span[i].x2 - x);
if (w <= 0) continue;
+
+ dv = (spans[yp].span[i].o2 - spans[yp].span[i].o1);
+
ww = w;
- d = dst->image.data + (y * dst->cache_entry.w) + x;
u = spans[yp].span[i].u[0] << FPI;
v = spans[yp].span[i].v[0] << FPI;
ud = ((spans[yp].span[i].u[1] << FPI) - u) / w;
vd = ((spans[yp].span[i].v[1] << FPI) - v) / w;
+ tl = (long long)ud * (w << FP);
+ tl = tl / dv;
+ ud = tl;
+ u -= (ud * (spans[yp].span[i].o1 - (spans[yp].span[i].x1 << FP))) / FP1;
+
+ tl = (long long)vd * (w << FP);
+ tl = tl / dv;
+ vd = tl;
+ v -= (vd * (spans[yp].span[i].o1 - (spans[yp].span[i].x1 << FP))) / FP1;
+
if (ud < 0) u -= 1;
if (vd < 0) v -= 1;
+
+ if (direct)
+ d = dst->image.data + (y * dst->cache_entry.w) + x;
+ else
+ d = buf;
+
while (ww > 0)
{
- s = sp + ((v >> (FP + FPI)) * sw) +
- (u >> (FP + FPI));
- *d++ = *s;
+ FPc u1, v1, u2, v2;
+ FPc rv, ru;
+ DATA32 vala, valb, val1, val2;
+
+ u1 = u;
+ if (u1 < 0) u1 = 0;
+ else if (u1 >= swp) u1 = swp - 1;
+
+ v1 = v;
+ if (v1 < 0) v1 = 0;
+ else if (v1 >= shp) v1 = shp - 1;
+
+ u2 = u1 + FPFPI1;
+ if (u2 >= swp) u2 = swp - 1;
+
+ v2 = v1 + FPFPI1;
+ if (v2 >= shp) v2 = shp - 1;
+
+ ru = (u >> (FP + FPI - 8)) & 0xff;
+ rv = (v >> (FP + FPI - 8)) & 0xff;
+
+ s = sp + ((v1 >> (FP + FPI)) * sw) +
+ (u1 >> (FP + FPI));
+ val1 = *s;
+ s = sp + ((v1 >> (FP + FPI)) * sw) +
+ (u2 >> (FP + FPI));
+ val2 = *s;
+ vala = INTERP_256(ru, val2, val1);
+
+ s = sp + ((v2 >> (FP + FPI)) * sw) +
+ (u1 >> (FP + FPI));
+ val1 = *s;
+ s = sp + ((v2 >> (FP + FPI)) * sw) +
+ (u2 >> (FP + FPI));
+ val2 = *s;
+ valb = INTERP_256(ru, val2, val1);
+
+ *d++ = INTERP_256(rv, valb, vala);
+
u += ud;
v += vd;
ww--;
}
+
+ if (!direct)
+ {
+ d = dst->image.data;
+ d += (y * dst->cache_entry.w) + x;
+ func(buf, NULL, dc->mul.col, d, w);
+ }
}
else break;
}
}
else
{
- buf = alloca(cw * sizeof(DATA32));
-
- if (dc->mul.use)
- func = evas_common_gfx_func_composite_pixel_color_span_get(src, dc->mul.col, dst, cw, dc->render_op);
- else
- func = evas_common_gfx_func_composite_pixel_span_get(src, dst, cw, dc->render_op);
-
for (y = ystart; y <= yend; y++)
{
- int x, w, ww, dx, dy, sx, sy;
- FPc u, v, ud, vd, up, vp;
- DATA32 *d, *dptr, *s, *so[4], val1, val2;
+ int x, w, ww;
+ FPc u, v, ud, vd;
+ DATA32 *d, *s;
yp = y - ystart;
for (i = 0; i < 2; i++)
if (w <= 0) continue;
ww = w;
- d = buf;
u = spans[yp].span[i].u[0] << FPI;
v = spans[yp].span[i].v[0] << FPI;
ud = ((spans[yp].span[i].u[1] << FPI) - u) / w;
vd = ((spans[yp].span[i].v[1] << FPI) - v) / w;
if (ud < 0) u -= 1;
if (vd < 0) v -= 1;
+
+ if (direct)
+ d = dst->image.data + (y * dst->cache_entry.w) + x;
+ else
+ d = buf;
+
while (ww > 0)
{
s = sp + ((v >> (FP + FPI)) * sw) +
v += vd;
ww--;
}
- dptr = dst->image.data;
- dptr += (y * dst->cache_entry.w) + x;
- func(buf, NULL, dc->mul.col, dptr, w);
+
+ if (!direct)
+ {
+ d = dst->image.data;
+ d += (y * dst->cache_entry.w) + x;
+ func(buf, NULL, dc->mul.col, d, w);
+ }
}
else break;
}