X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=pixman%2Fpixman-fast-path.c;h=bbdc8e8b0fa932b20947140a9e8ae1c36bd378e0;hb=c82c2c38538f5c3f25cf81ad697040d2332d64de;hp=1bdb323ef8d47a8401c0f294d44e2ddafe8736b2;hpb=006f21b02b23e1865c0e35d0f9b97af63f52a469;p=profile%2Fivi%2Fpixman.git diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c index 1bdb323..bbdc8e8 100644 --- a/pixman/pixman-fast-path.c +++ b/pixman/pixman-fast-path.c @@ -23,10 +23,14 @@ * Author: Keith Packard, SuSE, Inc. */ +#ifdef HAVE_CONFIG_H #include +#endif #include +#include #include "pixman-private.h" #include "pixman-combine32.h" +#include "pixman-inlines.h" static force_inline uint32_t fetch_24 (uint8_t *a) @@ -50,7 +54,8 @@ fetch_24 (uint8_t *a) } static force_inline void -store_24 (uint8_t *a, uint32_t v) +store_24 (uint8_t *a, + uint32_t v) { if (((unsigned long)a) & 1) { @@ -60,7 +65,7 @@ store_24 (uint8_t *a, uint32_t v) #else *a = (uint8_t) (v); *(uint16_t *)(a + 1) = (uint16_t) (v >> 8); -#endif +#endif } else { @@ -70,26 +75,28 @@ store_24 (uint8_t *a, uint32_t v) #else *(uint16_t *)a = (uint16_t)v; *(a + 2) = (uint8_t)(v >> 16); -#endif +#endif } } static force_inline uint32_t -fbOver (uint32_t src, uint32_t dest) +over (uint32_t src, + uint32_t dest) { - uint32_t a = ~src >> 24; + uint32_t a = ~src >> 24; - FbByteMulAdd(dest, a, src); + UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src); return dest; } static uint32_t -fbIn (uint32_t x, uint8_t y) +in (uint32_t x, + uint8_t y) { - uint16_t a = y; + uint16_t a = y; - FbByteMul (x, a); + UN8x4_MUL_UN8 (x, a); return x; } @@ -97,43 +104,33 @@ fbIn (uint32_t x, uint8_t y) /* * Naming convention: * - * opSRCxMASKxDST + * op_src_mask_dest */ static void -fast_CompositeOver_x888_8_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) +fast_composite_over_x888_8_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) { - uint32_t *src, *srcLine; - uint32_t *dst, *dstLine; - uint8_t *mask, *maskLine; - int srcStride, maskStride, dstStride; + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *src, *src_line; + uint32_t *dst, *dst_line; + uint8_t *mask, *mask_line; + int src_stride, mask_stride, dst_stride; uint8_t m; uint32_t s, d; - uint16_t w; + int32_t w; - fbComposeGetStart (dst_image, dest_x, dest_y, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (mask_image, mask_x, mask_y, uint8_t, maskStride, maskLine, 1); - fbComposeGetStart (src_image, src_x, src_y, uint32_t, srcStride, srcLine, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); while (height--) { - src = srcLine; - srcLine += srcStride; - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; + src = src_line; + src_line += src_stride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; w = width; while (w--) @@ -144,11 +141,13 @@ fast_CompositeOver_x888_8_8888 (pixman_implementation_t *imp, s = *src | 0xff000000; if (m == 0xff) + { *dst = s; + } else { - d = fbIn (s, m); - *dst = fbOver (d, *dst); + d = in (s, m); + *dst = over (d, *dst); } } src++; @@ -158,54 +157,43 @@ fast_CompositeOver_x888_8_8888 (pixman_implementation_t *imp, } static void -fast_CompositeIn_n_8_8 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t *iSrc, - pixman_image_t *iMask, - pixman_image_t *iDst, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) +fast_composite_in_n_8_8 (pixman_implementation_t *imp, + pixman_composite_info_t *info) { - uint32_t src, srca; - uint8_t *dstLine, *dst; - uint8_t *maskLine, *mask, m; - int dstStride, maskStride; - uint16_t w; - uint16_t t; + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src, srca; + uint8_t *dst_line, *dst; + uint8_t *mask_line, *mask, m; + int dst_stride, mask_stride; + int32_t w; + uint16_t t; - src = _pixman_image_get_solid(iSrc, iDst->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); srca = src >> 24; - fbComposeGetStart (iDst, dest_x, dest_y, uint8_t, dstStride, dstLine, 1); - fbComposeGetStart (iMask, mask_x, mask_y, uint8_t, maskStride, maskLine, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - if (srca == 0xff) { + if (srca == 0xff) + { while (height--) { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; w = width; while (w--) { m = *mask++; + if (m == 0) - { *dst = 0; - } else if (m != 0xff) - { - *dst = IntMult(m, *dst, t); - } + *dst = MUL_UN8 (m, *dst, t); + dst++; } } @@ -214,116 +202,91 @@ fast_CompositeIn_n_8_8 (pixman_implementation_t *imp, { while (height--) { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; w = width; while (w--) { m = *mask++; - m = IntMult(m, srca, t); + m = MUL_UN8 (m, srca, t); + if (m == 0) - { *dst = 0; - } else if (m != 0xff) - { - *dst = IntMult(m, *dst, t); - } + *dst = MUL_UN8 (m, *dst, t); + dst++; } } } } - static void -fast_CompositeIn_8_8 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t *iSrc, - pixman_image_t *iMask, - pixman_image_t *iDst, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) +fast_composite_in_8_8 (pixman_implementation_t *imp, + pixman_composite_info_t *info) { - uint8_t *dstLine, *dst; - uint8_t *srcLine, *src; - int dstStride, srcStride; - uint16_t w; - uint8_t s; - uint16_t t; + PIXMAN_COMPOSITE_ARGS (info); + uint8_t *dst_line, *dst; + uint8_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + uint8_t s; + uint16_t t; - fbComposeGetStart (iSrc, src_x, src_y, uint8_t, srcStride, srcLine, 1); - fbComposeGetStart (iDst, dest_x, dest_y, uint8_t, dstStride, dstLine, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; w = width; while (w--) { s = *src++; + if (s == 0) - { *dst = 0; - } else if (s != 0xff) - { - *dst = IntMult(s, *dst, t); - } + *dst = MUL_UN8 (s, *dst, t); + dst++; } } } static void -fast_CompositeOver_n_8_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) +fast_composite_over_n_8_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) { - uint32_t src, srca; - uint32_t *dstLine, *dst, d; - uint8_t *maskLine, *mask, m; - int dstStride, maskStride; - uint16_t w; + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src, srca; + uint32_t *dst_line, *dst, d; + uint8_t *mask_line, *mask, m; + int dst_stride, mask_stride; + int32_t w; - src = _pixman_image_get_solid(src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); srca = src >> 24; if (src == 0) return; - fbComposeGetStart (dst_image, dest_x, dest_y, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (mask_image, mask_x, mask_y, uint8_t, maskStride, maskLine, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; w = width; while (w--) @@ -334,12 +297,12 @@ fast_CompositeOver_n_8_8888 (pixman_implementation_t *imp, if (srca == 0xff) *dst = src; else - *dst = fbOver (src, *dst); + *dst = over (src, *dst); } else if (m) { - d = fbIn (src, m); - *dst = fbOver (d, *dst); + d = in (src, m); + *dst = over (d, *dst); } dst++; } @@ -347,41 +310,77 @@ fast_CompositeOver_n_8_8888 (pixman_implementation_t *imp, } static void -fast_CompositeOver_n_8888_8888_ca (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) +fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp, + pixman_composite_info_t *info) { - uint32_t src, srca; - uint32_t *dstLine, *dst, d; - uint32_t *maskLine, *mask, ma; - int dstStride, maskStride; - uint16_t w; + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src, s; + uint32_t *dst_line, *dst, d; + uint32_t *mask_line, *mask, ma; + int dst_stride, mask_stride; + int32_t w; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + if (src == 0) + return; - src = _pixman_image_get_solid(src_image, dst_image->bits.format); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + ma = *mask++; + + if (ma) + { + d = *dst; + s = src; + + UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d); + + *dst = s; + } + + dst++; + } + } +} + +static void +fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src, srca, s; + uint32_t *dst_line, *dst, d; + uint32_t *mask_line, *mask, ma; + int dst_stride, mask_stride; + int32_t w; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); srca = src >> 24; if (src == 0) return; - fbComposeGetStart (dst_image, dest_x, dest_y, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (mask_image, mask_x, mask_y, uint32_t, maskStride, maskLine, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; w = width; while (w--) @@ -392,16 +391,17 @@ fast_CompositeOver_n_8888_8888_ca (pixman_implementation_t *imp, if (srca == 0xff) *dst = src; else - *dst = fbOver (src, *dst); + *dst = over (src, *dst); } else if (ma) { d = *dst; + s = src; - FbByteMulC (src, ma); - FbByteMul (ma, srca); + UN8x4_MUL_UN8x4 (s, ma); + UN8x4_MUL_UN8 (ma, srca); ma = ~ma; - FbByteMulAddC (d, ma, src); + UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s); *dst = d; } @@ -412,42 +412,32 @@ fast_CompositeOver_n_8888_8888_ca (pixman_implementation_t *imp, } static void -fast_CompositeOver_n_8_0888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) +fast_composite_over_n_8_0888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) { - uint32_t src, srca; - uint8_t *dstLine, *dst; - uint32_t d; - uint8_t *maskLine, *mask, m; - int dstStride, maskStride; - uint16_t w; + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src, srca; + uint8_t *dst_line, *dst; + uint32_t d; + uint8_t *mask_line, *mask, m; + int dst_stride, mask_stride; + int32_t w; - src = _pixman_image_get_solid(src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); srca = src >> 24; if (src == 0) return; - fbComposeGetStart (dst_image, dest_x, dest_y, uint8_t, dstStride, dstLine, 3); - fbComposeGetStart (mask_image, mask_x, mask_y, uint8_t, maskStride, maskLine, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; w = width; while (w--) @@ -456,18 +446,20 @@ fast_CompositeOver_n_8_0888 (pixman_implementation_t *imp, if (m == 0xff) { if (srca == 0xff) + { d = src; + } else { - d = fetch_24(dst); - d = fbOver (src, d); + d = fetch_24 (dst); + d = over (src, d); } - store_24(dst, d); + store_24 (dst, d); } else if (m) { - d = fbOver (fbIn(src,m), fetch_24(dst)); - store_24(dst, d); + d = over (in (src, m), fetch_24 (dst)); + store_24 (dst, d); } dst += 3; } @@ -475,42 +467,32 @@ fast_CompositeOver_n_8_0888 (pixman_implementation_t *imp, } static void -fast_CompositeOver_n_8_0565 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) +fast_composite_over_n_8_0565 (pixman_implementation_t *imp, + pixman_composite_info_t *info) { - uint32_t src, srca; - uint16_t *dstLine, *dst; - uint32_t d; - uint8_t *maskLine, *mask, m; - int dstStride, maskStride; - uint16_t w; + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src, srca; + uint16_t *dst_line, *dst; + uint32_t d; + uint8_t *mask_line, *mask, m; + int dst_stride, mask_stride; + int32_t w; - src = _pixman_image_get_solid(src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); srca = src >> 24; if (src == 0) return; - fbComposeGetStart (dst_image, dest_x, dest_y, uint16_t, dstStride, dstLine, 1); - fbComposeGetStart (mask_image, mask_x, mask_y, uint8_t, maskStride, maskLine, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; w = width; while (w--) @@ -519,19 +501,21 @@ fast_CompositeOver_n_8_0565 (pixman_implementation_t *imp, if (m == 0xff) { if (srca == 0xff) + { d = src; + } else { d = *dst; - d = fbOver (src, CONVERT_0565_TO_0888(d)); + d = over (src, CONVERT_0565_TO_0888 (d)); } - *dst = CONVERT_8888_TO_0565(d); + *dst = CONVERT_8888_TO_0565 (d); } else if (m) { d = *dst; - d = fbOver (fbIn(src,m), CONVERT_0565_TO_0888(d)); - *dst = CONVERT_8888_TO_0565(d); + d = over (in (src, m), CONVERT_0565_TO_0888 (d)); + *dst = CONVERT_8888_TO_0565 (d); } dst++; } @@ -539,45 +523,35 @@ fast_CompositeOver_n_8_0565 (pixman_implementation_t *imp, } static void -fast_CompositeOver_n_8888_0565_ca (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) +fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, + pixman_composite_info_t *info) { - uint32_t src, srca; - uint16_t src16; - uint16_t *dstLine, *dst; - uint32_t d; - uint32_t *maskLine, *mask, ma; - int dstStride, maskStride; - uint16_t w; + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src, srca, s; + uint16_t src16; + uint16_t *dst_line, *dst; + uint32_t d; + uint32_t *mask_line, *mask, ma; + int dst_stride, mask_stride; + int32_t w; - src = _pixman_image_get_solid(src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); srca = src >> 24; if (src == 0) return; - src16 = CONVERT_8888_TO_0565(src); + src16 = CONVERT_8888_TO_0565 (src); - fbComposeGetStart (dst_image, dest_x, dest_y, uint16_t, dstStride, dstLine, 1); - fbComposeGetStart (mask_image, mask_x, mask_y, uint32_t, maskStride, maskLine, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; w = width; while (w--) @@ -592,21 +566,23 @@ fast_CompositeOver_n_8888_0565_ca (pixman_implementation_t *imp, else { d = *dst; - d = fbOver (src, CONVERT_0565_TO_0888(d)); - *dst = CONVERT_8888_TO_0565(d); + d = over (src, CONVERT_0565_TO_0888 (d)); + *dst = CONVERT_8888_TO_0565 (d); } } else if (ma) { d = *dst; - d = CONVERT_0565_TO_0888(d); + d = CONVERT_0565_TO_0888 (d); - FbByteMulC (src, ma); - FbByteMul (ma, srca); + s = src; + + UN8x4_MUL_UN8x4 (s, ma); + UN8x4_MUL_UN8 (ma, srca); ma = ~ma; - FbByteMulAddC (d, ma, src); - - *dst = CONVERT_8888_TO_0565(d); + UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s); + + *dst = CONVERT_8888_TO_0565 (d); } dst++; } @@ -615,34 +591,24 @@ fast_CompositeOver_n_8888_0565_ca (pixman_implementation_t *imp, static void fast_composite_over_8888_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { - uint32_t *dstLine, *dst; - uint32_t *srcLine, *src, s; - int dstStride, srcStride; - uint8_t a; - uint16_t w; + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint32_t *src_line, *src, s; + int dst_stride, src_stride; + uint8_t a; + int32_t w; - fbComposeGetStart (dst_image, dest_x, dest_y, uint32_t, dstStride, dstLine, 1); - fbComposeGetStart (src_image, src_x, src_y, uint32_t, srcStride, srcLine, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; w = width; while (w--) @@ -652,43 +618,60 @@ fast_composite_over_8888_8888 (pixman_implementation_t *imp, if (a == 0xff) *dst = s; else if (s) - *dst = fbOver (s, *dst); + *dst = over (s, *dst); dst++; } } } static void -fast_CompositeSrc_8888_0888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) +fast_composite_src_x888_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + *dst++ = (*src++) | 0xff000000; + } +} + +#if 0 +static void +fast_composite_over_8888_0888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) { - uint8_t *dstLine, *dst; - uint32_t d; - uint32_t *srcLine, *src, s; - uint8_t a; - int dstStride, srcStride; - uint16_t w; + PIXMAN_COMPOSITE_ARGS (info); + uint8_t *dst_line, *dst; + uint32_t d; + uint32_t *src_line, *src, s; + uint8_t a; + int dst_stride, src_stride; + int32_t w; - fbComposeGetStart (dst_image, dest_x, dest_y, uint8_t, dstStride, dstLine, 3); - fbComposeGetStart (src_image, src_x, src_y, uint32_t, srcStride, srcLine, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; w = width; while (w--) @@ -700,46 +683,37 @@ fast_CompositeSrc_8888_0888 (pixman_implementation_t *imp, if (a == 0xff) d = s; else - d = fbOver (s, fetch_24(dst)); + d = over (s, fetch_24 (dst)); - store_24(dst, d); + store_24 (dst, d); } dst += 3; } } } +#endif static void fast_composite_over_8888_0565 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { - uint16_t *dstLine, *dst; - uint32_t d; - uint32_t *srcLine, *src, s; - uint8_t a; - int dstStride, srcStride; - uint16_t w; + PIXMAN_COMPOSITE_ARGS (info); + uint16_t *dst_line, *dst; + uint32_t d; + uint32_t *src_line, *src, s; + uint8_t a; + int dst_stride, src_stride; + int32_t w; - fbComposeGetStart (src_image, src_x, src_y, uint32_t, srcStride, srcLine, 1); - fbComposeGetStart (dst_image, dest_x, dest_y, uint16_t, dstStride, dstLine, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; w = width; while (w--) @@ -749,13 +723,15 @@ fast_composite_over_8888_0565 (pixman_implementation_t *imp, if (s) { if (a == 0xff) + { d = s; + } else { d = *dst; - d = fbOver (s, CONVERT_0565_TO_0888(d)); + d = over (s, CONVERT_0565_TO_0888 (d)); } - *dst = CONVERT_8888_TO_0565(d); + *dst = CONVERT_8888_TO_0565 (d); } dst++; } @@ -763,76 +739,56 @@ fast_composite_over_8888_0565 (pixman_implementation_t *imp, } static void -fast_CompositeSrc_x888_0565 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) +fast_composite_src_x888_0565 (pixman_implementation_t *imp, + pixman_composite_info_t *info) { - uint16_t *dstLine, *dst; - uint32_t *srcLine, *src, s; - int dstStride, srcStride; - uint16_t w; + PIXMAN_COMPOSITE_ARGS (info); + uint16_t *dst_line, *dst; + uint32_t *src_line, *src, s; + int dst_stride, src_stride; + int32_t w; - fbComposeGetStart (src_image, src_x, src_y, uint32_t, srcStride, srcLine, 1); - fbComposeGetStart (dst_image, dest_x, dest_y, uint16_t, dstStride, dstLine, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; w = width; while (w--) { s = *src++; - *dst = CONVERT_8888_TO_0565(s); + *dst = CONVERT_8888_TO_0565 (s); dst++; } } } static void -fast_CompositeAdd_8000_8000 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) +fast_composite_add_8_8 (pixman_implementation_t *imp, + pixman_composite_info_t *info) { - uint8_t *dstLine, *dst; - uint8_t *srcLine, *src; - int dstStride, srcStride; - uint16_t w; - uint8_t s, d; - uint16_t t; + PIXMAN_COMPOSITE_ARGS (info); + uint8_t *dst_line, *dst; + uint8_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + uint8_t s, d; + uint16_t t; - fbComposeGetStart (src_image, src_x, src_y, uint8_t, srcStride, srcLine, 1); - fbComposeGetStart (dst_image, dest_x, dest_y, uint8_t, dstStride, dstLine, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; w = width; while (w--) @@ -854,35 +810,25 @@ fast_CompositeAdd_8000_8000 (pixman_implementation_t *imp, } static void -fast_CompositeAdd_8888_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) +fast_composite_add_8888_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) { - uint32_t *dstLine, *dst; - uint32_t *srcLine, *src; - int dstStride, srcStride; - uint16_t w; - uint32_t s, d; + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + uint32_t s, d; - fbComposeGetStart (src_image, src_x, src_y, uint32_t, srcStride, srcLine, 1); - fbComposeGetStart (dst_image, dest_x, dest_y, uint32_t, dstStride, dstLine, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); while (height--) { - dst = dstLine; - dstLine += dstStride; - src = srcLine; - srcLine += srcStride; + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; w = width; while (w--) @@ -894,7 +840,7 @@ fast_CompositeAdd_8888_8888 (pixman_implementation_t *imp, { d = *dst; if (d) - FbByteAdd(s,d); + UN8x4_ADD_UN8x4 (s, d); } *dst = s; } @@ -904,342 +850,1030 @@ fast_CompositeAdd_8888_8888 (pixman_implementation_t *imp, } static void -fast_CompositeAdd_8888_8_8 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) +fast_composite_add_n_8_8 (pixman_implementation_t *imp, + pixman_composite_info_t *info) { - uint8_t *dstLine, *dst; - uint8_t *maskLine, *mask; - int dstStride, maskStride; - uint16_t w; - uint32_t src; - uint8_t sa; - - fbComposeGetStart (dst_image, dest_x, dest_y, uint8_t, dstStride, dstLine, 1); - fbComposeGetStart (mask_image, mask_x, mask_y, uint8_t, maskStride, maskLine, 1); - src = _pixman_image_get_solid (src_image, dst_image->bits.format); + PIXMAN_COMPOSITE_ARGS (info); + uint8_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + int32_t w; + uint32_t src; + uint8_t sa; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); sa = (src >> 24); while (height--) { - dst = dstLine; - dstLine += dstStride; - mask = maskLine; - maskLine += maskStride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; w = width; while (w--) { - uint16_t tmp; - uint16_t a; - uint32_t m, d; - uint32_t r; + uint16_t tmp; + uint16_t a; + uint32_t m, d; + uint32_t r; a = *mask++; d = *dst; - m = IntMult (sa, a, tmp); - r = IntAdd (m, d, tmp); + m = MUL_UN8 (sa, a, tmp); + r = ADD_UN8 (m, d, tmp); *dst++ = r; } } } +#ifdef WORDS_BIGENDIAN +#define CREATE_BITMASK(n) (0x80000000 >> (n)) +#define UPDATE_BITMASK(n) ((n) >> 1) +#else +#define CREATE_BITMASK(n) (1 << (n)) +#define UPDATE_BITMASK(n) ((n) << 1) +#endif + +#define TEST_BIT(p, n) \ + (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31)) +#define SET_BIT(p, n) \ + do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0); + +static void +fast_composite_add_1000_1000 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t, + src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, 0, dest_y, uint32_t, + dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + /* + * TODO: improve performance by processing uint32_t data instead + * of individual bits + */ + if (TEST_BIT (src, src_x + w)) + SET_BIT (dst, dest_x + w); + } + } +} + +static void +fast_composite_over_n_1_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src, srca; + uint32_t *dst, *dst_line; + uint32_t *mask, *mask_line; + int mask_stride, dst_stride; + uint32_t bitcache, bitmask; + int32_t w; + + if (width <= 0) + return; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + srca = src >> 24; + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, + dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t, + mask_stride, mask_line, 1); + mask_line += mask_x >> 5; + + if (srca == 0xff) + { + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + bitcache = *mask++; + bitmask = CREATE_BITMASK (mask_x & 31); + + while (w--) + { + if (bitmask == 0) + { + bitcache = *mask++; + bitmask = CREATE_BITMASK (0); + } + if (bitcache & bitmask) + *dst = src; + bitmask = UPDATE_BITMASK (bitmask); + dst++; + } + } + } + else + { + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + bitcache = *mask++; + bitmask = CREATE_BITMASK (mask_x & 31); + + while (w--) + { + if (bitmask == 0) + { + bitcache = *mask++; + bitmask = CREATE_BITMASK (0); + } + if (bitcache & bitmask) + *dst = over (src, *dst); + bitmask = UPDATE_BITMASK (bitmask); + dst++; + } + } + } +} + +static void +fast_composite_over_n_1_0565 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src, srca; + uint16_t *dst, *dst_line; + uint32_t *mask, *mask_line; + int mask_stride, dst_stride; + uint32_t bitcache, bitmask; + int32_t w; + uint32_t d; + uint16_t src565; + + if (width <= 0) + return; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + srca = src >> 24; + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, + dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t, + mask_stride, mask_line, 1); + mask_line += mask_x >> 5; + + if (srca == 0xff) + { + src565 = CONVERT_8888_TO_0565 (src); + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + bitcache = *mask++; + bitmask = CREATE_BITMASK (mask_x & 31); + + while (w--) + { + if (bitmask == 0) + { + bitcache = *mask++; + bitmask = CREATE_BITMASK (0); + } + if (bitcache & bitmask) + *dst = src565; + bitmask = UPDATE_BITMASK (bitmask); + dst++; + } + } + } + else + { + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + bitcache = *mask++; + bitmask = CREATE_BITMASK (mask_x & 31); + + while (w--) + { + if (bitmask == 0) + { + bitcache = *mask++; + bitmask = CREATE_BITMASK (0); + } + if (bitcache & bitmask) + { + d = over (src, CONVERT_0565_TO_0888 (*dst)); + *dst = CONVERT_8888_TO_0565 (d); + } + bitmask = UPDATE_BITMASK (bitmask); + dst++; + } + } + } +} + /* * Simple bitblt */ static void -fast_CompositeSolidFill (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) +fast_composite_solid_fill (pixman_implementation_t *imp, + pixman_composite_info_t *info) { - uint32_t src; + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src; - src = _pixman_image_get_solid(src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - if (dst_image->bits.format == PIXMAN_a8) + if (dest_image->bits.format == PIXMAN_a1) + { + src = src >> 31; + } + else if (dest_image->bits.format == PIXMAN_a8) + { src = src >> 24; - else if (dst_image->bits.format == PIXMAN_r5g6b5 || - dst_image->bits.format == PIXMAN_b5g6r5) + } + else if (dest_image->bits.format == PIXMAN_r5g6b5 || + dest_image->bits.format == PIXMAN_b5g6r5) + { src = CONVERT_8888_TO_0565 (src); + } - pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride, - PIXMAN_FORMAT_BPP (dst_image->bits.format), - dest_x, dest_y, - width, height, - src); + pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, + PIXMAN_FORMAT_BPP (dest_image->bits.format), + dest_x, dest_y, + width, height, + src); } static void -fast_CompositeSrc_8888_x888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) +fast_composite_src_memcpy (pixman_implementation_t *imp, + pixman_composite_info_t *info) { - uint32_t *dst; - uint32_t *src; - int dstStride, srcStride; - uint32_t n_bytes = width * sizeof (uint32_t); + PIXMAN_COMPOSITE_ARGS (info); + int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8; + uint32_t n_bytes = width * bpp; + int dst_stride, src_stride; + uint8_t *dst; + uint8_t *src; - fbComposeGetStart (src_image, src_x, src_y, uint32_t, srcStride, src, 1); - fbComposeGetStart (dst_image, dest_x, dest_y, uint32_t, dstStride, dst, 1); + src_stride = src_image->bits.rowstride * 4; + dst_stride = dest_image->bits.rowstride * 4; + + src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp; + dst = (uint8_t *)dest_image->bits.bits + dest_y * dst_stride + dest_x * bpp; while (height--) { memcpy (dst, src, n_bytes); - dst += dstStride; - src += srcStride; + dst += dst_stride; + src += src_stride; } } -static const pixman_fast_path_t c_fast_paths[] = +FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER) +FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE) +FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD) +FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL) +FAST_NEAREST (x888_8888_cover, x888, 8888, uint32_t, uint32_t, SRC, COVER) +FAST_NEAREST (x888_8888_pad, x888, 8888, uint32_t, uint32_t, SRC, PAD) +FAST_NEAREST (x888_8888_normal, x888, 8888, uint32_t, uint32_t, SRC, NORMAL) +FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER) +FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE) +FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD) +FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL) +FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER) +FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE) +FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD) +FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL) +FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL) +FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER) +FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE) +FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD) +FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL) + +/* Use more unrolling for src_0565_0565 because it is typically CPU bound */ +static force_inline void +scaled_nearest_scanline_565_565_SRC (uint16_t * dst, + const uint16_t * src, + int32_t w, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t fully_transparent_src) { - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_r5g6b5, fast_CompositeOver_n_8_0565, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_b5g6r5, fast_CompositeOver_n_8_0565, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_r8g8b8, fast_CompositeOver_n_8_0888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_b8g8r8, fast_CompositeOver_n_8_0888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, fast_CompositeOver_n_8_8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, fast_CompositeOver_n_8_8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, fast_CompositeOver_n_8_8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, fast_CompositeOver_n_8_8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, fast_CompositeOver_n_8888_8888_ca, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_x8r8g8b8, fast_CompositeOver_n_8888_8888_ca, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_r5g6b5, fast_CompositeOver_n_8888_0565_ca, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_a8b8g8r8, fast_CompositeOver_n_8888_8888_ca, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_x8b8g8r8, fast_CompositeOver_n_8888_8888_ca, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_b5g6r5, fast_CompositeOver_n_8888_0565_ca, NEED_COMPONENT_ALPHA }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, fast_CompositeOver_x888_8_8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, fast_CompositeOver_x888_8_8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_x8b8g8r8, fast_CompositeOver_x888_8_8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_a8b8g8r8, fast_CompositeOver_x888_8_8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, fast_composite_over_8888_8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fast_composite_over_8888_8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, fast_composite_over_8888_0565, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, fast_composite_over_8888_8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fast_composite_over_8888_8888, 0 }, - { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, fast_composite_over_8888_0565, 0 }, - { PIXMAN_OP_ADD, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, fast_CompositeAdd_8888_8888, 0 }, - { PIXMAN_OP_ADD, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, fast_CompositeAdd_8888_8888, 0 }, - { PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, fast_CompositeAdd_8000_8000, 0 }, - { PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, fast_CompositeAdd_8888_8_8, 0 }, - { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_null, PIXMAN_a8r8g8b8, fast_CompositeSolidFill, 0 }, - { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_null, PIXMAN_x8r8g8b8, fast_CompositeSolidFill, 0 }, - { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_null, PIXMAN_a8b8g8r8, fast_CompositeSolidFill, 0 }, - { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_null, PIXMAN_x8b8g8r8, fast_CompositeSolidFill, 0 }, - { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_null, PIXMAN_a8, fast_CompositeSolidFill, 0 }, - { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_null, PIXMAN_r5g6b5, fast_CompositeSolidFill, 0 }, - { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fast_CompositeSrc_8888_x888, 0 }, - { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fast_CompositeSrc_8888_x888, 0 }, - { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fast_CompositeSrc_8888_x888, 0 }, - { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fast_CompositeSrc_8888_x888, 0 }, - { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, fast_CompositeSrc_x888_0565, 0 }, - { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, fast_CompositeSrc_x888_0565, 0 }, - { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, fast_CompositeSrc_x888_0565, 0 }, - { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, fast_CompositeSrc_x888_0565, 0 }, - { PIXMAN_OP_IN, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, fast_CompositeIn_8_8, 0 }, - { PIXMAN_OP_IN, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, fast_CompositeIn_n_8_8, 0 }, - { PIXMAN_OP_NONE }, -}; + uint16_t tmp1, tmp2, tmp3, tmp4; + while ((w -= 4) >= 0) + { + tmp1 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + tmp2 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + tmp3 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + tmp4 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + *dst++ = tmp1; + *dst++ = tmp2; + *dst++ = tmp3; + *dst++ = tmp4; + } + if (w & 2) + { + tmp1 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + tmp2 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + *dst++ = tmp1; + *dst++ = tmp2; + } + if (w & 1) + *dst++ = src[pixman_fixed_to_int (vx)]; +} + +FAST_NEAREST_MAINLOOP (565_565_cover_SRC, + scaled_nearest_scanline_565_565_SRC, + uint16_t, uint16_t, COVER) +FAST_NEAREST_MAINLOOP (565_565_none_SRC, + scaled_nearest_scanline_565_565_SRC, + uint16_t, uint16_t, NONE) +FAST_NEAREST_MAINLOOP (565_565_pad_SRC, + scaled_nearest_scanline_565_565_SRC, + uint16_t, uint16_t, PAD) + +static force_inline uint32_t +fetch_nearest (pixman_repeat_t src_repeat, + pixman_format_code_t format, + uint32_t *src, int x, int src_width) +{ + if (repeat (src_repeat, &x, src_width)) + { + if (format == PIXMAN_x8r8g8b8) + return *(src + x) | 0xff000000; + else + return *(src + x); + } + else + { + return 0; + } +} + +static force_inline void +combine_over (uint32_t s, uint32_t *dst) +{ + if (s) + { + uint8_t ia = 0xff - (s >> 24); + + if (ia) + UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s); + else + *dst = s; + } +} + +static force_inline void +combine_src (uint32_t s, uint32_t *dst) +{ + *dst = s; +} static void -fast_CompositeSrcScaleNearest (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t *src_image, - pixman_image_t *mask_image, - pixman_image_t *dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) +fast_composite_scaled_nearest (pixman_implementation_t *imp, + pixman_composite_info_t *info) { - uint32_t *dst; - uint32_t *src; - int dstStride, srcStride; - int i, j; + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line; + uint32_t *src_line; + int dst_stride, src_stride; + int src_width, src_height; + pixman_repeat_t src_repeat; + pixman_fixed_t unit_x, unit_y; + pixman_format_code_t src_format; pixman_vector_t v; - - fbComposeGetStart (dst_image, dest_x, dest_y, uint32_t, dstStride, dst, 1); + pixman_fixed_t vy; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); /* pass in 0 instead of src_x and src_y because src_x and src_y need to be - * transformed from destination space to source space */ - fbComposeGetStart (src_image, 0, 0, uint32_t, srcStride, src, 1); - + * transformed from destination space to source space + */ + PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1); + /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed(src_x) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed(src_y) + pixman_fixed_1 / 2; + v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; v.vector[2] = pixman_fixed_1; - + if (!pixman_transform_point_3d (src_image->common.transform, &v)) - return; - + return; + + unit_x = src_image->common.transform->matrix[0][0]; + unit_y = src_image->common.transform->matrix[1][1]; + /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ v.vector[0] -= pixman_fixed_e; v.vector[1] -= pixman_fixed_e; - - for (j = 0; j < height; j++) { + + src_height = src_image->bits.height; + src_width = src_image->bits.width; + src_repeat = src_image->common.repeat; + src_format = src_image->bits.format; + + vy = v.vector[1]; + while (height--) + { pixman_fixed_t vx = v.vector[0]; - pixman_fixed_t vy = v.vector[1]; - for (i = 0; i < width; ++i) { - pixman_bool_t inside_bounds; - uint32_t result; - int x, y; - x = vx >> 16; - y = vy >> 16; - - /* apply the repeat function */ - switch (src_image->common.repeat) { - case PIXMAN_REPEAT_NORMAL: - x = MOD (x, src_image->bits.width); - y = MOD (y, src_image->bits.height); - inside_bounds = TRUE; - break; - - case PIXMAN_REPEAT_PAD: - x = CLIP (x, 0, src_image->bits.width-1); - y = CLIP (y, 0, src_image->bits.height-1); - inside_bounds = TRUE; - break; - - case PIXMAN_REPEAT_REFLECT: - x = MOD (x, src_image->bits.width * 2); - if (x >= src_image->bits.width) - x = src_image->bits.width * 2 - x - 1; - y = MOD (y, src_image->bits.height * 2); - if (y >= src_image->bits.height) - y = src_image->bits.height * 2 - y - 1; - inside_bounds = TRUE; - break; - - case PIXMAN_REPEAT_NONE: - default: - inside_bounds = (x >= 0 && x < src_image->bits.width && y >= 0 && y < src_image->bits.height); - break; - } - - if (inside_bounds) { - //XXX: we should move this multiplication out of the loop - result = *(src + y * srcStride + x); - } else { - result = 0; - } - *(dst + i) = result; - - /* adjust the x location by a unit vector in the x direction: - * this is equivalent to transforming x+1 of the destination point to source space */ - vx += src_image->common.transform->matrix[0][0]; - } + int y = pixman_fixed_to_int (vy); + uint32_t *dst = dst_line; + + dst_line += dst_stride; + /* adjust the y location by a unit vector in the y direction * this is equivalent to transforming y+1 of the destination point to source space */ - v.vector[1] += src_image->common.transform->matrix[1][1]; - dst += dstStride; + vy += unit_y; + + if (!repeat (src_repeat, &y, src_height)) + { + if (op == PIXMAN_OP_SRC) + memset (dst, 0, sizeof (*dst) * width); + } + else + { + int w = width; + + uint32_t *src = src_line + y * src_stride; + + while (w >= 2) + { + uint32_t s1, s2; + int x1, x2; + + x1 = pixman_fixed_to_int (vx); + vx += unit_x; + + x2 = pixman_fixed_to_int (vx); + vx += unit_x; + + w -= 2; + + s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width); + s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width); + + if (op == PIXMAN_OP_OVER) + { + combine_over (s1, dst++); + combine_over (s2, dst++); + } + else + { + combine_src (s1, dst++); + combine_src (s2, dst++); + } + } + + while (w--) + { + uint32_t s; + int x; + + x = pixman_fixed_to_int (vx); + vx += unit_x; + + s = fetch_nearest (src_repeat, src_format, src, x, src_width); + + if (op == PIXMAN_OP_OVER) + combine_over (s, dst++); + else + combine_src (s, dst++); + } + } } } -static void -fast_path_composite (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t *src, - pixman_image_t *mask, - pixman_image_t *dest, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) +#define CACHE_LINE_SIZE 64 + +#define FAST_SIMPLE_ROTATE(suffix, pix_type) \ + \ +static void \ +blt_rotated_90_trivial_##suffix (pix_type *dst, \ + int dst_stride, \ + const pix_type *src, \ + int src_stride, \ + int w, \ + int h) \ +{ \ + int x, y; \ + for (y = 0; y < h; y++) \ + { \ + const pix_type *s = src + (h - y - 1); \ + pix_type *d = dst + dst_stride * y; \ + for (x = 0; x < w; x++) \ + { \ + *d++ = *s; \ + s += src_stride; \ + } \ + } \ +} \ + \ +static void \ +blt_rotated_270_trivial_##suffix (pix_type *dst, \ + int dst_stride, \ + const pix_type *src, \ + int src_stride, \ + int w, \ + int h) \ +{ \ + int x, y; \ + for (y = 0; y < h; y++) \ + { \ + const pix_type *s = src + src_stride * (w - 1) + y; \ + pix_type *d = dst + dst_stride * y; \ + for (x = 0; x < w; x++) \ + { \ + *d++ = *s; \ + s -= src_stride; \ + } \ + } \ +} \ + \ +static void \ +blt_rotated_90_##suffix (pix_type *dst, \ + int dst_stride, \ + const pix_type *src, \ + int src_stride, \ + int W, \ + int H) \ +{ \ + int x; \ + int leading_pixels = 0, trailing_pixels = 0; \ + const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \ + \ + /* \ + * split processing into handling destination as TILE_SIZExH cache line \ + * aligned vertical stripes (optimistically assuming that destination \ + * stride is a multiple of cache line, if not - it will be just a bit \ + * slower) \ + */ \ + \ + if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \ + { \ + leading_pixels = TILE_SIZE - (((uintptr_t)dst & \ + (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ + if (leading_pixels > W) \ + leading_pixels = W; \ + \ + /* unaligned leading part NxH (where N < TILE_SIZE) */ \ + blt_rotated_90_trivial_##suffix ( \ + dst, \ + dst_stride, \ + src, \ + src_stride, \ + leading_pixels, \ + H); \ + \ + dst += leading_pixels; \ + src += leading_pixels * src_stride; \ + W -= leading_pixels; \ + } \ + \ + if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \ + { \ + trailing_pixels = (((uintptr_t)(dst + W) & \ + (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ + if (trailing_pixels > W) \ + trailing_pixels = W; \ + W -= trailing_pixels; \ + } \ + \ + for (x = 0; x < W; x += TILE_SIZE) \ + { \ + /* aligned middle part TILE_SIZExH */ \ + blt_rotated_90_trivial_##suffix ( \ + dst + x, \ + dst_stride, \ + src + src_stride * x, \ + src_stride, \ + TILE_SIZE, \ + H); \ + } \ + \ + if (trailing_pixels) \ + { \ + /* unaligned trailing part NxH (where N < TILE_SIZE) */ \ + blt_rotated_90_trivial_##suffix ( \ + dst + W, \ + dst_stride, \ + src + W * src_stride, \ + src_stride, \ + trailing_pixels, \ + H); \ + } \ +} \ + \ +static void \ +blt_rotated_270_##suffix (pix_type *dst, \ + int dst_stride, \ + const pix_type *src, \ + int src_stride, \ + int W, \ + int H) \ +{ \ + int x; \ + int leading_pixels = 0, trailing_pixels = 0; \ + const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \ + \ + /* \ + * split processing into handling destination as TILE_SIZExH cache line \ + * aligned vertical stripes (optimistically assuming that destination \ + * stride is a multiple of cache line, if not - it will be just a bit \ + * slower) \ + */ \ + \ + if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \ + { \ + leading_pixels = TILE_SIZE - (((uintptr_t)dst & \ + (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ + if (leading_pixels > W) \ + leading_pixels = W; \ + \ + /* unaligned leading part NxH (where N < TILE_SIZE) */ \ + blt_rotated_270_trivial_##suffix ( \ + dst, \ + dst_stride, \ + src + src_stride * (W - leading_pixels), \ + src_stride, \ + leading_pixels, \ + H); \ + \ + dst += leading_pixels; \ + W -= leading_pixels; \ + } \ + \ + if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \ + { \ + trailing_pixels = (((uintptr_t)(dst + W) & \ + (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ + if (trailing_pixels > W) \ + trailing_pixels = W; \ + W -= trailing_pixels; \ + src += trailing_pixels * src_stride; \ + } \ + \ + for (x = 0; x < W; x += TILE_SIZE) \ + { \ + /* aligned middle part TILE_SIZExH */ \ + blt_rotated_270_trivial_##suffix ( \ + dst + x, \ + dst_stride, \ + src + src_stride * (W - x - TILE_SIZE), \ + src_stride, \ + TILE_SIZE, \ + H); \ + } \ + \ + if (trailing_pixels) \ + { \ + /* unaligned trailing part NxH (where N < TILE_SIZE) */ \ + blt_rotated_270_trivial_##suffix ( \ + dst + W, \ + dst_stride, \ + src - trailing_pixels * src_stride, \ + src_stride, \ + trailing_pixels, \ + H); \ + } \ +} \ + \ +static void \ +fast_composite_rotate_90_##suffix (pixman_implementation_t *imp, \ + pixman_composite_info_t *info) \ +{ \ + PIXMAN_COMPOSITE_ARGS (info); \ + pix_type *dst_line; \ + pix_type *src_line; \ + int dst_stride, src_stride; \ + int src_x_t, src_y_t; \ + \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type, \ + dst_stride, dst_line, 1); \ + src_x_t = -src_y + pixman_fixed_to_int ( \ + src_image->common.transform->matrix[0][2] + \ + pixman_fixed_1 / 2 - pixman_fixed_e) - height;\ + src_y_t = src_x + pixman_fixed_to_int ( \ + src_image->common.transform->matrix[1][2] + \ + pixman_fixed_1 / 2 - pixman_fixed_e); \ + PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \ + src_stride, src_line, 1); \ + blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride, \ + width, height); \ +} \ + \ +static void \ +fast_composite_rotate_270_##suffix (pixman_implementation_t *imp, \ + pixman_composite_info_t *info) \ +{ \ + PIXMAN_COMPOSITE_ARGS (info); \ + pix_type *dst_line; \ + pix_type *src_line; \ + int dst_stride, src_stride; \ + int src_x_t, src_y_t; \ + \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type, \ + dst_stride, dst_line, 1); \ + src_x_t = src_y + pixman_fixed_to_int ( \ + src_image->common.transform->matrix[0][2] + \ + pixman_fixed_1 / 2 - pixman_fixed_e); \ + src_y_t = -src_x + pixman_fixed_to_int ( \ + src_image->common.transform->matrix[1][2] + \ + pixman_fixed_1 / 2 - pixman_fixed_e) - width; \ + PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \ + src_stride, src_line, 1); \ + blt_rotated_270_##suffix (dst_line, dst_stride, src_line, src_stride, \ + width, height); \ +} + +FAST_SIMPLE_ROTATE (8, uint8_t) +FAST_SIMPLE_ROTATE (565, uint16_t) +FAST_SIMPLE_ROTATE (8888, uint32_t) + +static const pixman_fast_path_t c_fast_paths[] = +{ + PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5, fast_composite_over_n_1_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5, fast_composite_over_n_1_0565), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8), + PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1000_1000), + PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8), + PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8), + PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8), + + SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888), + + SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888), + + SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565), + SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565), + + SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565), + + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8r8g8b8, a8r8g8b8, x888_8888), + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8b8g8r8, a8b8g8r8, x888_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8r8g8b8, a8r8g8b8, x888_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8b8g8r8, a8b8g8r8, x888_8888), + SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, x888_8888), + SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, x888_8888), + + SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888), + + SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565), + +#define NEAREST_FAST_PATH(op,s,d) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, SCALED_NEAREST_FLAGS, \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest, \ + } + + NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8), + NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8), + NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8), + NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8), + + NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8), + NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8), + NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8), + NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8), + + NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8), + NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8), + NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8), + NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8), + + NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8), + NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8), + NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8), + NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8), + +#define SIMPLE_ROTATE_FLAGS(angle) \ + (FAST_PATH_ROTATE_ ## angle ## _TRANSFORM | \ + FAST_PATH_NEAREST_FILTER | \ + FAST_PATH_SAMPLES_COVER_CLIP | \ + FAST_PATH_STANDARD_FLAGS) + +#define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (90), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_rotate_90_##suffix, \ + }, \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (270), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_rotate_270_##suffix, \ + } + + SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888), + SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888), + SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888), + SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565), + SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8), + + { PIXMAN_OP_NONE }, +}; + +#ifdef WORDS_BIGENDIAN +#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (32 - (offs) - (n))) +#else +#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (offs)) +#endif + +static force_inline void +pixman_fill1_line (uint32_t *dst, int offs, int width, int v) { - if (src->type == BITS - && src->common.transform - && !mask - && op == PIXMAN_OP_SRC - && !src->common.alpha_map && !dest->common.alpha_map - && (src->common.filter == PIXMAN_FILTER_NEAREST) - && PIXMAN_FORMAT_BPP(dest->bits.format) == 32 - && src->bits.format == dest->bits.format - && !src->common.read_func && !src->common.write_func - && !dest->common.read_func && !dest->common.write_func) + if (offs) { - /* ensure that the transform matrix only has a scale */ - if (src->common.transform->matrix[0][1] == 0 && - src->common.transform->matrix[1][0] == 0 && - src->common.transform->matrix[2][0] == 0 && - src->common.transform->matrix[2][1] == 0 && - src->common.transform->matrix[2][2] == pixman_fixed_1) + int leading_pixels = 32 - offs; + if (leading_pixels >= width) { - _pixman_walk_composite_region (imp, op, - src, mask, dest, - src_x, src_y, - mask_x, mask_y, - dest_x, dest_y, - width, height, - fast_CompositeSrcScaleNearest); + if (v) + *dst |= A1_FILL_MASK (width, offs); + else + *dst &= ~A1_FILL_MASK (width, offs); return; } + else + { + if (v) + *dst++ |= A1_FILL_MASK (leading_pixels, offs); + else + *dst++ &= ~A1_FILL_MASK (leading_pixels, offs); + width -= leading_pixels; + } } - - if (_pixman_run_fast_path (c_fast_paths, imp, - op, src, mask, dest, - src_x, src_y, - mask_x, mask_y, - dest_x, dest_y, - width, height)) + while (width >= 32) { - return; + if (v) + *dst++ = 0xFFFFFFFF; + else + *dst++ = 0; + width -= 32; + } + if (width > 0) + { + if (v) + *dst |= A1_FILL_MASK (width, 0); + else + *dst &= ~A1_FILL_MASK (width, 0); } +} + +static void +pixman_fill1 (uint32_t *bits, + int stride, + int x, + int y, + int width, + int height, + uint32_t xor) +{ + uint32_t *dst = bits + y * stride + (x >> 5); + int offs = x & 31; - _pixman_implementation_composite (imp->delegate, op, - src, mask, dest, - src_x, src_y, - mask_x, mask_y, - dest_x, dest_y, - width, height); + if (xor & 1) + { + while (height--) + { + pixman_fill1_line (dst, offs, width, 1); + dst += stride; + } + } + else + { + while (height--) + { + pixman_fill1_line (dst, offs, width, 0); + dst += stride; + } + } } static void -pixman_fill8 (uint32_t *bits, - int stride, - int x, - int y, - int width, - int height, - uint32_t xor) +pixman_fill8 (uint32_t *bits, + int stride, + int x, + int y, + int width, + int height, + uint32_t xor) { int byte_stride = stride * (int) sizeof (uint32_t); uint8_t *dst = (uint8_t *) bits; @@ -1259,14 +1893,15 @@ pixman_fill8 (uint32_t *bits, static void pixman_fill16 (uint32_t *bits, - int stride, - int x, - int y, - int width, - int height, - uint32_t xor) + int stride, + int x, + int y, + int width, + int height, + uint32_t xor) { - int short_stride = (stride * (int) sizeof (uint32_t)) / (int) sizeof (uint16_t); + int short_stride = + (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t); uint16_t *dst = (uint16_t *)bits; uint16_t v = xor & 0xffff; int i; @@ -1284,12 +1919,12 @@ pixman_fill16 (uint32_t *bits, static void pixman_fill32 (uint32_t *bits, - int stride, - int x, - int y, - int width, - int height, - uint32_t xor) + int stride, + int x, + int y, + int width, + int height, + uint32_t xor) { int i; @@ -1306,46 +1941,48 @@ pixman_fill32 (uint32_t *bits, static pixman_bool_t fast_path_fill (pixman_implementation_t *imp, - uint32_t *bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t xor) + uint32_t * bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t xor) { switch (bpp) { + case 1: + pixman_fill1 (bits, stride, x, y, width, height, xor); + break; + case 8: pixman_fill8 (bits, stride, x, y, width, height, xor); break; - + case 16: pixman_fill16 (bits, stride, x, y, width, height, xor); break; - + case 32: pixman_fill32 (bits, stride, x, y, width, height, xor); break; - + default: return _pixman_implementation_fill ( imp->delegate, bits, stride, bpp, x, y, width, height, xor); break; } - + return TRUE; } pixman_implementation_t * -_pixman_implementation_create_fast_path (void) +_pixman_implementation_create_fast_path (pixman_implementation_t *fallback) { - pixman_implementation_t *general = _pixman_implementation_create_general (); - pixman_implementation_t *imp = _pixman_implementation_create (general); + pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths); - imp->composite = fast_path_composite; imp->fill = fast_path_fill; - + return imp; }