#include "pixman-vmx.h"
#include <altivec.h>
+/*
+ x_c = (x_c * a) / 255
+*/
+#define FbByteMul(x, a) do { \
+ uint32_t t = ((x & 0xff00ff) * a) + 0x800080; \
+ t = (t + ((t >> 8) & 0xff00ff)) >> 8; \
+ t &= 0xff00ff; \
+ \
+ x = (((x >> 8) & 0xff00ff) * a) + 0x800080; \
+ x = (x + ((x >> 8) & 0xff00ff)); \
+ x &= 0xff00ff00; \
+ x += t; \
+ } while (0)
+
+/*
+ x_c = (x_c * a) / 255 + y
+*/
+#define FbByteMulAdd(x, a, y) do { \
+ uint32_t t = ((x & 0xff00ff) * a) + 0x800080; \
+ t = (t + ((t >> 8) & 0xff00ff)) >> 8; \
+ t &= 0xff00ff; \
+ t += y & 0xff00ff; \
+ t |= 0x1000100 - ((t >> 8) & 0xff00ff); \
+ t &= 0xff00ff; \
+ \
+ x = (((x >> 8) & 0xff00ff) * a) + 0x800080; \
+ x = (x + ((x >> 8) & 0xff00ff)) >> 8; \
+ x &= 0xff00ff; \
+ x += (y >> 8) & 0xff00ff; \
+ x |= 0x1000100 - ((x >> 8) & 0xff00ff); \
+ x &= 0xff00ff; \
+ x <<= 8; \
+ x += t; \
+ } while (0)
+
+/*
+ x_c = (x_c * a + y_c * b) / 255
+*/
+#define FbByteAddMul(x, a, y, b) do { \
+ uint32_t t; \
+ uint32_t r = (x >> 24) * a + (y >> 24) * b + 0x80; \
+ r += (r >> 8); \
+ r >>= 8; \
+ \
+ t = (x & 0xff00) * a + (y & 0xff00) * b; \
+ t += (t >> 8) + 0x8000; \
+ t >>= 16; \
+ \
+ t |= r << 16; \
+ t |= 0x1000100 - ((t >> 8) & 0xff00ff); \
+ t &= 0xff00ff; \
+ t <<= 8; \
+ \
+ r = ((x >> 16) & 0xff) * a + ((y >> 16) & 0xff) * b + 0x80; \
+ r += (r >> 8); \
+ r >>= 8; \
+ \
+ x = (x & 0xff) * a + (y & 0xff) * b + 0x80; \
+ x += (x >> 8); \
+ x >>= 8; \
+ x |= r << 16; \
+ x |= 0x1000100 - ((x >> 8) & 0xff00ff); \
+ x &= 0xff00ff; \
+ x |= t; \
+ } while (0)
+
+/*
+ x_c = (x_c * a_c) / 255
+*/
+#define FbByteMulC(x, a) do { \
+ uint32_t t; \
+ uint32_t r = (x & 0xff) * (a & 0xff); \
+ r |= (x & 0xff0000) * ((a >> 16) & 0xff); \
+ r += 0x800080; \
+ r = (r + ((r >> 8) & 0xff00ff)) >> 8; \
+ r &= 0xff00ff; \
+ \
+ x >>= 8; \
+ t = (x & 0xff) * ((a >> 8) & 0xff); \
+ t |= (x & 0xff0000) * (a >> 24); \
+ t += 0x800080; \
+ t = t + ((t >> 8) & 0xff00ff); \
+ x = r | (t & 0xff00ff00); \
+ \
+ } while (0)
+
+/*
+ x_c = (x_c * a) / 255 + y
+*/
+#define FbByteMulAddC(x, a, y) do { \
+ uint32_t t; \
+ uint32_t r = (x & 0xff) * (a & 0xff); \
+ r |= (x & 0xff0000) * ((a >> 16) & 0xff); \
+ r += 0x800080; \
+ r = (r + ((r >> 8) & 0xff00ff)) >> 8; \
+ r &= 0xff00ff; \
+ r += y & 0xff00ff; \
+ r |= 0x1000100 - ((r >> 8) & 0xff00ff); \
+ r &= 0xff00ff; \
+ \
+ x >>= 8; \
+ t = (x & 0xff) * ((a >> 8) & 0xff); \
+ t |= (x & 0xff0000) * (a >> 24); \
+ t += 0x800080; \
+ t = (t + ((t >> 8) & 0xff00ff)) >> 8; \
+ t &= 0xff00ff; \
+ t += (y >> 8) & 0xff00ff; \
+ t |= 0x1000100 - ((t >> 8) & 0xff00ff); \
+ t &= 0xff00ff; \
+ x = r | (t << 8); \
+ } while (0)
+
+/*
+ x_c = (x_c * a_c + y_c * b) / 255
+*/
+#define FbByteAddMulC(x, a, y, b) do { \
+ uint32_t t; \
+ uint32_t r = (x >> 24) * (a >> 24) + (y >> 24) * b; \
+ r += (r >> 8) + 0x80; \
+ r >>= 8; \
+ \
+ t = (x & 0xff00) * ((a >> 8) & 0xff) + (y & 0xff00) * b; \
+ t += (t >> 8) + 0x8000; \
+ t >>= 16; \
+ \
+ t |= r << 16; \
+ t |= 0x1000100 - ((t >> 8) & 0xff00ff); \
+ t &= 0xff00ff; \
+ t <<= 8; \
+ \
+ r = ((x >> 16) & 0xff) * ((a >> 16) & 0xff) + ((y >> 16) & 0xff) * b + 0x80; \
+ r += (r >> 8); \
+ r >>= 8; \
+ \
+ x = (x & 0xff) * (a & 0xff) + (y & 0xff) * b + 0x80; \
+ x += (x >> 8); \
+ x >>= 8; \
+ x |= r << 16; \
+ x |= 0x1000100 - ((x >> 8) & 0xff00ff); \
+ x &= 0xff00ff; \
+ x |= t; \
+ } while (0)
+
+/*
+ x_c = min(x_c + y_c, 255)
+*/
+#define FbByteAdd(x, y) do { \
+ uint32_t t; \
+ uint32_t r = (x & 0xff00ff) + (y & 0xff00ff); \
+ r |= 0x1000100 - ((r >> 8) & 0xff00ff); \
+ r &= 0xff00ff; \
+ \
+ t = ((x >> 8) & 0xff00ff) + ((y >> 8) & 0xff00ff); \
+ t |= 0x1000100 - ((t >> 8) & 0xff00ff); \
+ r |= (t & 0xff00ff) << 8; \
+ x = r; \
+ } while (0)
+
static inline vector unsigned int
splat_alpha (vector unsigned int pix) {
return vec_perm (pix, pix,