From e9b0740af76853f58df72cd40cd7cb4e2ac7261b Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka Date: Thu, 16 Sep 2010 13:02:18 +0300 Subject: [PATCH] Nearest scaling fast path macro split into two parts Scanline processing is now split into a separate function. This provides an easy way of overriding it with a platform specific implementation, which may use SIMD optimizations. Only basic C data types are used as the arguments for this function, so it may be implemented entirely in assembly or be generated by some JIT engine. Also as a result of this split, the complexity of code is reduced a bit and now it should be easier to introduce support for the currently missing NONE, PAD and REFLECT repeat types. --- pixman/pixman-fast-path.h | 209 ++++++++++++++++++++++++++-------------------- 1 file changed, 118 insertions(+), 91 deletions(-) diff --git a/pixman/pixman-fast-path.h b/pixman/pixman-fast-path.h index f289288..7babd66 100644 --- a/pixman/pixman-fast-path.h +++ b/pixman/pixman-fast-path.h @@ -76,98 +76,24 @@ repeat (pixman_repeat_t repeat, int *c, int size) 565 source, but it is needed to build. */ #define GET_0565_ALPHA(s) 0xff -#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \ - src_type_t, dst_type_t, OP, repeat_mode) \ -static void \ -fast_composite_scaled_nearest_ ## scale_func_name ## _ ## OP (pixman_implementation_t *imp, \ - pixman_op_t op, \ - pixman_image_t * src_image, \ - pixman_image_t * mask_image, \ - pixman_image_t * dst_image, \ - int32_t src_x, \ - int32_t src_y, \ - int32_t mask_x, \ - int32_t mask_y, \ - int32_t dst_x, \ - int32_t dst_y, \ - int32_t width, \ - int32_t height) \ +#define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \ + src_type_t, dst_type_t, OP, repeat_mode) \ +static force_inline void \ +scanline_func_name (dst_type_t *dst, \ + src_type_t *src, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx) \ { \ - dst_type_t *dst_line; \ - src_type_t *src_first_line; \ - uint32_t d; \ - src_type_t s1, s2; \ - uint8_t a1, a2; \ - int w; \ - int x1, x2, y; \ - pixman_fixed_t orig_vx; \ - pixman_fixed_t max_vx, max_vy; \ - pixman_vector_t v; \ - pixman_fixed_t vx, vy; \ - pixman_fixed_t unit_x, unit_y; \ - \ - src_type_t *src; \ - dst_type_t *dst; \ - int src_stride, dst_stride; \ - \ - if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \ - abort(); \ - \ - if (PIXMAN_REPEAT_ ## repeat_mode != PIXMAN_REPEAT_NORMAL && \ - PIXMAN_REPEAT_ ## repeat_mode != PIXMAN_REPEAT_NONE) \ - { \ - abort(); \ - } \ - \ - PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \ - /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \ - * transformed from destination space to source space */ \ - PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \ - \ - /* reference point is the center of the pixel */ \ - v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \ - v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \ - v.vector[2] = pixman_fixed_1; \ - \ - if (!pixman_transform_point_3d (src_image->common.transform, &v)) \ - return; \ - \ - unit_x = src_image->common.transform->matrix[0][0]; \ - unit_y = src_image->common.transform->matrix[1][1]; \ + uint32_t d; \ + src_type_t s1, s2; \ + uint8_t a1, a2; \ + int x1, x2; \ \ - /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \ - v.vector[0] -= pixman_fixed_e; \ - v.vector[1] -= pixman_fixed_e; \ - \ - vx = v.vector[0]; \ - vy = v.vector[1]; \ - \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ - { \ - /* Clamp repeating positions inside the actual samples */ \ - max_vx = src_image->bits.width << 16; \ - max_vy = src_image->bits.height << 16; \ - \ - repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \ - repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ - } \ - \ - orig_vx = vx; \ + if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \ + abort(); \ \ - while (--height >= 0) \ - { \ - dst = dst_line; \ - dst_line += dst_stride; \ - \ - y = vy >> 16; \ - vy += unit_y; \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ - repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ - \ - src = src_first_line + src_stride * y; \ - \ - w = width; \ - vx = orig_vx; \ while ((w -= 2) >= 0) \ { \ x1 = vx >> 16; \ @@ -258,9 +184,103 @@ fast_composite_scaled_nearest_ ## scale_func_name ## _ ## OP (pixman_implementat *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ } \ } \ +} + +#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \ + repeat_mode) \ +static void \ +fast_composite_scaled_nearest_ ## scale_func_name (pixman_implementation_t *imp, \ + pixman_op_t op, \ + pixman_image_t * src_image, \ + pixman_image_t * mask_image, \ + pixman_image_t * dst_image, \ + int32_t src_x, \ + int32_t src_y, \ + int32_t mask_x, \ + int32_t mask_y, \ + int32_t dst_x, \ + int32_t dst_y, \ + int32_t width, \ + int32_t height) \ +{ \ + dst_type_t *dst_line; \ + src_type_t *src_first_line; \ + int y; \ + pixman_fixed_t max_vx = max_vx; /* suppress uninitialized variable warning */ \ + pixman_fixed_t max_vy; \ + pixman_vector_t v; \ + pixman_fixed_t vx, vy; \ + pixman_fixed_t unit_x, unit_y; \ + \ + src_type_t *src; \ + dst_type_t *dst; \ + int src_stride, dst_stride; \ + \ + if (PIXMAN_REPEAT_ ## repeat_mode != PIXMAN_REPEAT_NORMAL && \ + PIXMAN_REPEAT_ ## repeat_mode != PIXMAN_REPEAT_NONE) \ + { \ + abort(); \ + } \ + \ + PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \ + /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \ + * transformed from destination space to source space */ \ + PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \ + \ + /* reference point is the center of the pixel */ \ + v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \ + v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \ + v.vector[2] = pixman_fixed_1; \ + \ + if (!pixman_transform_point_3d (src_image->common.transform, &v)) \ + return; \ + \ + unit_x = src_image->common.transform->matrix[0][0]; \ + unit_y = src_image->common.transform->matrix[1][1]; \ + \ + /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \ + v.vector[0] -= pixman_fixed_e; \ + v.vector[1] -= pixman_fixed_e; \ + \ + vx = v.vector[0]; \ + vy = v.vector[1]; \ + \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + { \ + /* Clamp repeating positions inside the actual samples */ \ + max_vx = src_image->bits.width << 16; \ + max_vy = src_image->bits.height << 16; \ + \ + repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \ + repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ + } \ + \ + while (--height >= 0) \ + { \ + dst = dst_line; \ + dst_line += dst_stride; \ + \ + y = vy >> 16; \ + vy += unit_y; \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ + \ + src = src_first_line + src_stride * y; \ + \ + scanline_func (dst, src, width, vx, unit_x, max_vx); \ } \ } +#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \ + src_type_t, dst_type_t, OP, repeat_mode) \ + FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \ + SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \ + OP, repeat_mode) \ + FAST_NEAREST_MAINLOOP(scale_func_name##_##OP, \ + scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \ + src_type_t, dst_type_t, repeat_mode) + + #define SCALED_NEAREST_FLAGS \ (FAST_PATH_SCALE_TRANSFORM | \ FAST_PATH_NO_ALPHA_MAP | \ @@ -268,7 +288,7 @@ fast_composite_scaled_nearest_ ## scale_func_name ## _ ## OP (pixman_implementat FAST_PATH_NO_ACCESSORS | \ FAST_PATH_NO_WIDE_FORMAT) -#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \ +#define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \ { PIXMAN_OP_ ## op, \ PIXMAN_ ## s, \ (SCALED_NEAREST_FLAGS | \ @@ -277,7 +297,9 @@ fast_composite_scaled_nearest_ ## scale_func_name ## _ ## OP (pixman_implementat PIXMAN_null, 0, \ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ - }, \ + } + +#define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \ { PIXMAN_OP_ ## op, \ PIXMAN_ ## s, \ SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ @@ -286,4 +308,9 @@ fast_composite_scaled_nearest_ ## scale_func_name ## _ ## OP (pixman_implementat fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ } +/* Prefer the use of 'cover' variant, because it is faster */ +#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func) + #endif -- 2.7.4