return _mm_packs_pu16 (lo, hi);
}
-static force_inline uint32_t
-store8888 (__m64 v)
+static force_inline void
+store8888 (uint32_t *dest, __m64 v)
{
- return _mm_cvtsi64_si32 (pack8888 (v, _mm_setzero_si64 ()));
+ v = pack8888 (v, _mm_setzero_si64());
+ *dest = _mm_cvtsi64_si32 (v);
}
/* Expand 16 bits positioned at @pos (0-3) of a mmx register into
m = expand_alpha (m);
s = pix_multiply (s, m);
- ssrc = store8888 (s);
+ store8888 (&ssrc, s);
}
return ssrc;
__m64 s, sa;
s = load8888 (ssrc);
sa = expand_alpha (s);
- *dest = store8888 (over (s, sa, load8888 (*dest)));
+ store8888 (dest, over (s, sa, load8888 (*dest)));
}
++dest;
d = load8888 (*dest);
da = expand_alpha (d);
- *dest = store8888 (over (d, da, load8888 (s)));
+ store8888 (dest, over (d, da, load8888 (s)));
++dest;
++src;
a = expand_alpha (a);
x = pix_multiply (x, a);
- *dest = store8888 (x);
+ store8888 (dest, x);
++dest;
++src;
a = load8888 (combine (src, mask));
a = expand_alpha (a);
x = pix_multiply (x, a);
- *dest = store8888 (x);
+ store8888 (dest, x);
++dest;
++src;
a = expand_alpha (a);
a = negate (a);
x = pix_multiply (x, a);
- *dest = store8888 (x);
+ store8888 (dest, x);
++dest;
++src;
a = negate (a);
x = pix_multiply (x, a);
- *dest = store8888 (x);
+ store8888 (dest, x);
++dest;
++src;
sia = negate (sia);
da = expand_alpha (d);
s = pix_add_mul (s, da, d, sia);
- *dest = store8888 (s);
+ store8888 (dest, s);
++dest;
++src;
dia = expand_alpha (d);
dia = negate (dia);
s = pix_add_mul (s, dia, d, sa);
- *dest = store8888 (s);
+ store8888 (dest, s);
++dest;
++src;
sia = negate (sia);
dia = negate (dia);
s = pix_add_mul (s, dia, d, sia);
- *dest = store8888 (s);
+ store8888 (dest, s);
++dest;
++src;
s = load8888 (combine (src, mask));
d = load8888 (*dest);
s = pix_add (s, d);
- *dest = store8888 (s);
+ store8888 (dest, s);
++dest;
++src;
}
md = pix_add (md, ms);
- *dest = store8888 (md);
+ store8888 (dest, md);
++src;
++dest;
__m64 s = load8888 (*src);
s = pix_multiply (s, a);
- *dest = store8888 (s);
+ store8888 (dest, s);
++src;
++mask;
__m64 d = load8888 (*dest);
__m64 sa = expand_alpha (s);
- *dest = store8888 (in_over (s, sa, a, d));
+ store8888 (dest, in_over (s, sa, a, d));
++src;
++dest;
__m64 d = load8888 (*dest);
__m64 da = expand_alpha (d);
- *dest = store8888 (over (d, da, in (s, a)));
+ store8888 (dest, over (d, da, in (s, a)));
++src;
++dest;
s = pix_multiply (s, a);
s = pix_multiply (s, da);
- *dest = store8888 (s);
+ store8888 (dest, s);
++src;
++dest;
a = pix_multiply (a, sa);
d = pix_multiply (d, a);
- *dest = store8888 (d);
+ store8888 (dest, d);
++src;
++dest;
da = negate (da);
s = pix_multiply (s, a);
s = pix_multiply (s, da);
- *dest = store8888 (s);
+ store8888 (dest, s);
++src;
++dest;
a = pix_multiply (a, sa);
a = negate (a);
d = pix_multiply (d, a);
- *dest = store8888 (d);
+ store8888 (dest, d);
++src;
++dest;
a = pix_multiply (a, sa);
a = negate (a);
d = pix_add_mul (d, a, s, da);
- *dest = store8888 (d);
+ store8888 (dest, d);
++src;
++dest;
a = pix_multiply (a, sa);
da = negate (da);
d = pix_add_mul (d, a, s, da);
- *dest = store8888 (d);
+ store8888 (dest, d);
++src;
++dest;
da = negate (da);
a = negate (a);
d = pix_add_mul (d, a, s, da);
- *dest = store8888 (d);
+ store8888 (dest, d);
++src;
++dest;
s = pix_multiply (s, a);
d = pix_add (s, d);
- *dest = store8888 (d);
+ store8888 (dest, d);
++src;
++dest;
while (w && (unsigned long)dst & 7)
{
- *dst = store8888 (over (vsrc, vsrca, load8888 (*dst)));
+ store8888 (dst, over (vsrc, vsrca, load8888 (*dst)));
w--;
dst++;
if (w)
{
- *dst = store8888 (over (vsrc, vsrca, load8888 (*dst)));
+ store8888 (dst, over (vsrc, vsrca, load8888 (*dst)));
}
}
{
__m64 vdest = load8888 (*q);
vdest = in_over (vsrc, vsrca, load8888 (m), vdest);
- *q = store8888 (vdest);
+ store8888 (q, vdest);
}
twidth--;
{
__m64 vdest = load8888 (*q);
vdest = in_over (vsrc, vsrca, load8888 (m), vdest);
- *q = store8888 (vdest);
+ store8888 (q, vdest);
}
twidth--;
__m64 s = load8888 (*src);
__m64 d = load8888 (*dst);
- *dst = store8888 (in_over (s, expand_alpha (s), vmask, d));
+ store8888 (dst, in_over (s, expand_alpha (s), vmask, d));
w--;
dst++;
__m64 s = load8888 (*src);
__m64 d = load8888 (*dst);
- *dst = store8888 (in_over (s, expand_alpha (s), vmask, d));
+ store8888 (dst, in_over (s, expand_alpha (s), vmask, d));
}
}
__m64 s = load8888 (*src | 0xff000000);
__m64 d = load8888 (*dst);
- *dst = store8888 (in_over (s, srca, vmask, d));
+ store8888 (dst, in_over (s, srca, vmask, d));
w--;
dst++;
__m64 s = load8888 (*src | 0xff000000);
__m64 d = load8888 (*dst);
- *dst = store8888 (in_over (s, srca, vmask, d));
+ store8888 (dst, in_over (s, srca, vmask, d));
w--;
dst++;
__m64 ms, sa;
ms = load8888 (s);
sa = expand_alpha (ms);
- *dst = store8888 (over (ms, sa, load8888 (*dst)));
+ store8888 (dst, over (ms, sa, load8888 (*dst)));
}
dst++;
expand_alpha_rev (to_m64 (m)),
load8888 (*dst));
- *dst = store8888 (vdest);
+ store8888 (dst, vdest);
}
w--;
vdest = in_over (
vsrc, vsrca, expand_alpha_rev (to_m64 (m)), vdest);
- *dst = store8888 (vdest);
+ store8888 (dst, vdest);
}
}
}
{
__m64 vdest = in (vsrc, expand_alpha_rev (to_m64 (m)));
- *dst = store8888 (vdest);
+ store8888 (dst, vdest);
}
else
{
__m64 vdest = load8888 (*dst);
vdest = in (vsrc, expand_alpha_rev (to_m64 (m)));
- *dst = store8888 (vdest);
+ store8888 (dst, vdest);
}
else
{
__m64 s = load8888 (*src);
__m64 d = load8888 (*dst);
- *dst = store8888 (over_rev_non_pre (s, d));
+ store8888 (dst, over_rev_non_pre (s, d));
w--;
dst++;
__m64 s = load8888 (*src);
__m64 d = load8888 (*dst);
- *dst = store8888 (over_rev_non_pre (s, d));
+ store8888 (dst, over_rev_non_pre (s, d));
}
}
vmask = load8888 (ldl_u((uint32_t *)mask));
vdest = load8888 (*(uint32_t *)dst);
- *(uint32_t *)dst = store8888 (in (in (vsrca, vmask), vdest));
+ store8888 ((uint32_t *)dst, in (in (vsrca, vmask), vdest));
dst += 4;
mask += 4;
uint32_t *s = (uint32_t *)src;
uint32_t *d = (uint32_t *)dst;
- *d = store8888 (in (load8888 (ldl_u((uint32_t *)s)), load8888 (*d)));
+ store8888 (d, in (load8888 (ldl_u((uint32_t *)s)), load8888 (*d)));
w -= 4;
dst += 4;
vmask = load8888 (ldl_u((uint32_t *)mask));
vdest = load8888 (*(uint32_t *)dst);
- *(uint32_t *)dst = store8888 (_mm_adds_pu8 (in (vsrca, vmask), vdest));
+ store8888 ((uint32_t *)dst, _mm_adds_pu8 (in (vsrca, vmask), vdest));
dst += 4;
mask += 4;
if (m == 0xff)
{
- *dst = store8888 (s);
+ store8888 (dst, s);
}
else
{
__m64 vm = expand_alpha_rev (to_m64 (m));
__m64 vdest = in_over (s, sa, vm, load8888 (*dst));
- *dst = store8888 (vdest);
+ store8888 (dst, vdest);
}
}