vector unsigned int vdest, vsrc, vmask;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, mask_mask, src_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
/* printf("%s\n",__PRETTY_FUNCTION__); */
int width)
{
int i;
- vector unsigned int vdest, vsrc, vmask;
+ vector unsigned int vdest, vsrc, vmask, vsrca;
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, mask_mask, src_mask, store_mask;
-
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
-
+
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
{
-
LOAD_VECTORSC (dest, src, mask);
-
- vdest = pix_add_mul (pix_multiply (vsrc, vmask), splat_alpha (vdest),
- vdest,
- negate (pix_multiply (vmask,
- splat_alpha (vmask))));
-
+
+ vsrca = splat_alpha (vsrc);
+
+ vsrc = pix_multiply (vsrc, vmask);
+ vmask = pix_multiply (vmask, vsrca);
+
+ vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)),
+ negate (vmask), vdest);
+
STORE_VECTOR (dest);
-
+
src += 4;
dest += 4;
mask += 4;
}
-
+
for (i = width % 4; --i >= 0;)
{
uint32_t a = mask[i];
uint32_t d = dest[i];
uint32_t sa = ALPHA_8 (s);
uint32_t da = ALPHA_8 (d);
-
+
UN8x4_MUL_UN8x4 (s, a);
UN8x4_MUL_UN8 (a, sa);
UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da);