1 /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
3 * Copyright © 2000 SuSE, Inc.
4 * Copyright © 2007 Red Hat, Inc.
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of SuSE not be used in advertising or
11 * publicity pertaining to distribution of the software without specific,
12 * written prior permission. SuSE makes no representations about the
13 * suitability of this software for any purpose. It is provided "as is"
14 * without express or implied warranty.
16 * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
18 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
23 * Author: Keith Packard, SuSE, Inc.
26 #ifndef PIXMAN_FAST_PATH_H__
27 #define PIXMAN_FAST_PATH_H__
29 #include "pixman-private.h"
31 #define PIXMAN_REPEAT_COVER -1
33 /* Flags describing input parameters to fast path macro template.
34 * Turning on some flag values may indicate that
35 * "some property X is available so template can use this" or
36 * "some property X should be handled by template".
38 * FLAG_HAVE_SOLID_MASK
39 * Input mask is solid so template should handle this.
41 * FLAG_HAVE_NON_SOLID_MASK
42 * Input mask is bits mask so template should handle this.
44 * FLAG_HAVE_SOLID_MASK and FLAG_HAVE_NON_SOLID_MASK are mutually
45 * exclusive. (It's not allowed to turn both flags on)
48 #define FLAG_HAVE_SOLID_MASK (1 << 1)
49 #define FLAG_HAVE_NON_SOLID_MASK (1 << 2)
51 /* To avoid too short repeated scanline function calls, extend source
52 * scanlines having width less than below constant value.
54 #define REPEAT_NORMAL_MIN_WIDTH 64
56 static force_inline pixman_bool_t
57 repeat (pixman_repeat_t repeat, int *c, int size)
59 if (repeat == PIXMAN_REPEAT_NONE)
61 if (*c < 0 || *c >= size)
64 else if (repeat == PIXMAN_REPEAT_NORMAL)
71 else if (repeat == PIXMAN_REPEAT_PAD)
73 *c = CLIP (*c, 0, size - 1);
77 *c = MOD (*c, size * 2);
79 *c = size * 2 - *c - 1;
85 * For each scanline fetched from source image with PAD repeat:
86 * - calculate how many pixels need to be padded on the left side
87 * - calculate how many pixels need to be padded on the right side
88 * - update width to only count pixels which are fetched from the image
89 * All this information is returned via 'width', 'left_pad', 'right_pad'
90 * arguments. The code is assuming that 'unit_x' is positive.
92 * Note: 64-bit math is used in order to avoid potential overflows, which
93 * is probably excessive in many cases. This particular function
94 * may need its own correctness test and performance tuning.
96 static force_inline void
97 pad_repeat_get_scanline_bounds (int32_t source_image_width,
99 pixman_fixed_t unit_x,
104 int64_t max_vx = (int64_t) source_image_width << 16;
108 tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
116 *left_pad = (int32_t) tmp;
117 *width -= (int32_t) tmp;
124 tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
130 else if (tmp >= *width)
136 *right_pad = *width - (int32_t) tmp;
137 *width = (int32_t) tmp;
141 /* A macroified version of specialized nearest scalers for some
142 * common 8888 and 565 formats. It supports SRC and OVER ops.
144 * There are two repeat versions, one that handles repeat normal,
145 * and one without repeat handling that only works if the src region
146 * used is completely covered by the pre-repeated source samples.
148 * The loops are unrolled to process two pixels per iteration for better
149 * performance on most CPU architectures (superscalar processors
150 * can issue several operations simultaneously, other processors can hide
151 * instructions latencies by pipelining operations). Unrolling more
152 * does not make much sense because the compiler will start running out
153 * of spare registers soon.
156 #define GET_8888_ALPHA(s) ((s) >> 24)
157 /* This is not actually used since we don't have an OVER with
158 565 source, but it is needed to build. */
159 #define GET_0565_ALPHA(s) 0xff
160 #define GET_x888_ALPHA(s) 0xff
162 #define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \
163 src_type_t, dst_type_t, OP, repeat_mode) \
164 static force_inline void \
165 scanline_func_name (dst_type_t *dst, \
166 const src_type_t *src, \
169 pixman_fixed_t unit_x, \
170 pixman_fixed_t max_vx, \
171 pixman_bool_t fully_transparent_src) \
178 if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src) \
181 if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \
184 while ((w -= 2) >= 0) \
188 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
190 /* This works because we know that unit_x is positive */ \
191 while (vx >= max_vx) \
198 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
200 /* This works because we know that unit_x is positive */ \
201 while (vx >= max_vx) \
206 if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
208 a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
209 a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \
213 *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
217 d = CONVERT_ ## DST_FORMAT ## _TO_8888 (*dst); \
218 s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \
220 UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
221 *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
227 *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \
231 d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \
232 s2 = CONVERT_## SRC_FORMAT ## _TO_8888 (s2); \
234 UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \
235 *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
239 else /* PIXMAN_OP_SRC */ \
241 *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
242 *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \
251 if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
253 a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
257 *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
261 d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \
262 s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \
264 UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
265 *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
269 else /* PIXMAN_OP_SRC */ \
271 *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
276 #define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \
277 dst_type_t, repeat_mode, have_mask, mask_is_solid) \
279 fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, \
280 pixman_composite_info_t *info) \
282 PIXMAN_COMPOSITE_ARGS (info); \
283 dst_type_t *dst_line; \
284 mask_type_t *mask_line; \
285 src_type_t *src_first_line; \
287 pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \
288 pixman_fixed_t max_vy; \
290 pixman_fixed_t vx, vy; \
291 pixman_fixed_t unit_x, unit_y; \
292 int32_t left_pad, right_pad; \
296 mask_type_t solid_mask; \
297 const mask_type_t *mask = &solid_mask; \
298 int src_stride, mask_stride, dst_stride; \
300 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \
304 solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \
306 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \
307 mask_stride, mask_line, 1); \
309 /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
310 * transformed from destination space to source space */ \
311 PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
313 /* reference point is the center of the pixel */ \
314 v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
315 v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
316 v.vector[2] = pixman_fixed_1; \
318 if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
321 unit_x = src_image->common.transform->matrix[0][0]; \
322 unit_y = src_image->common.transform->matrix[1][1]; \
324 /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \
325 v.vector[0] -= pixman_fixed_e; \
326 v.vector[1] -= pixman_fixed_e; \
331 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
333 /* Clamp repeating positions inside the actual samples */ \
334 max_vx = src_image->bits.width << 16; \
335 max_vy = src_image->bits.height << 16; \
337 repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \
338 repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
341 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \
342 PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
344 pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \
345 &width, &left_pad, &right_pad); \
346 vx += left_pad * unit_x; \
349 while (--height >= 0) \
352 dst_line += dst_stride; \
353 if (have_mask && !mask_is_solid) \
356 mask_line += mask_stride; \
361 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
362 repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
363 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
365 repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \
366 src = src_first_line + src_stride * y; \
369 scanline_func (mask, dst, src, left_pad, 0, 0, 0, FALSE); \
373 scanline_func (mask + (mask_is_solid ? 0 : left_pad), \
374 dst + left_pad, src, width, vx, unit_x, 0, FALSE); \
378 scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \
379 dst + left_pad + width, src + src_image->bits.width - 1, \
380 right_pad, 0, 0, 0, FALSE); \
383 else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
385 static const src_type_t zero[1] = { 0 }; \
386 if (y < 0 || y >= src_image->bits.height) \
388 scanline_func (mask, dst, zero, left_pad + width + right_pad, 0, 0, 0, TRUE); \
391 src = src_first_line + src_stride * y; \
394 scanline_func (mask, dst, zero, left_pad, 0, 0, 0, TRUE); \
398 scanline_func (mask + (mask_is_solid ? 0 : left_pad), \
399 dst + left_pad, src, width, vx, unit_x, 0, FALSE); \
403 scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \
404 dst + left_pad + width, zero, right_pad, 0, 0, 0, TRUE); \
409 src = src_first_line + src_stride * y; \
410 scanline_func (mask, dst, src, width, vx, unit_x, max_vx, FALSE); \
415 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
416 #define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \
417 dst_type_t, repeat_mode, have_mask, mask_is_solid) \
418 FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t, \
419 dst_type_t, repeat_mode, have_mask, mask_is_solid)
421 #define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t, \
423 static force_inline void \
424 scanline_func##scale_func_name##_wrapper ( \
425 const uint8_t *mask, \
427 const src_type_t *src, \
430 pixman_fixed_t unit_x, \
431 pixman_fixed_t max_vx, \
432 pixman_bool_t fully_transparent_src) \
434 scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); \
436 FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper, \
437 src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE)
439 #define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \
441 FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t, \
442 dst_type_t, repeat_mode)
444 #define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \
445 src_type_t, dst_type_t, OP, repeat_mode) \
446 FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
447 SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \
449 FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP, \
450 scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
451 src_type_t, dst_type_t, repeat_mode)
454 #define SCALED_NEAREST_FLAGS \
455 (FAST_PATH_SCALE_TRANSFORM | \
456 FAST_PATH_NO_ALPHA_MAP | \
457 FAST_PATH_NEAREST_FILTER | \
458 FAST_PATH_NO_ACCESSORS | \
459 FAST_PATH_NARROW_FORMAT)
461 #define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \
462 { PIXMAN_OP_ ## op, \
464 (SCALED_NEAREST_FLAGS | \
465 FAST_PATH_NORMAL_REPEAT | \
466 FAST_PATH_X_UNIT_POSITIVE), \
468 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
469 fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
472 #define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \
473 { PIXMAN_OP_ ## op, \
475 (SCALED_NEAREST_FLAGS | \
476 FAST_PATH_PAD_REPEAT | \
477 FAST_PATH_X_UNIT_POSITIVE), \
479 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
480 fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
483 #define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func) \
484 { PIXMAN_OP_ ## op, \
486 (SCALED_NEAREST_FLAGS | \
487 FAST_PATH_NONE_REPEAT | \
488 FAST_PATH_X_UNIT_POSITIVE), \
490 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
491 fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
494 #define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \
495 { PIXMAN_OP_ ## op, \
497 SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
499 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
500 fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
503 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \
504 { PIXMAN_OP_ ## op, \
506 (SCALED_NEAREST_FLAGS | \
507 FAST_PATH_NORMAL_REPEAT | \
508 FAST_PATH_X_UNIT_POSITIVE), \
509 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
510 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
511 fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
514 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func) \
515 { PIXMAN_OP_ ## op, \
517 (SCALED_NEAREST_FLAGS | \
518 FAST_PATH_PAD_REPEAT | \
519 FAST_PATH_X_UNIT_POSITIVE), \
520 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
521 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
522 fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
525 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func) \
526 { PIXMAN_OP_ ## op, \
528 (SCALED_NEAREST_FLAGS | \
529 FAST_PATH_NONE_REPEAT | \
530 FAST_PATH_X_UNIT_POSITIVE), \
531 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
532 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
533 fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
536 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func) \
537 { PIXMAN_OP_ ## op, \
539 SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
540 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
541 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
542 fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
545 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \
546 { PIXMAN_OP_ ## op, \
548 (SCALED_NEAREST_FLAGS | \
549 FAST_PATH_NORMAL_REPEAT | \
550 FAST_PATH_X_UNIT_POSITIVE), \
551 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
552 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
553 fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
556 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \
557 { PIXMAN_OP_ ## op, \
559 (SCALED_NEAREST_FLAGS | \
560 FAST_PATH_PAD_REPEAT | \
561 FAST_PATH_X_UNIT_POSITIVE), \
562 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
563 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
564 fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
567 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \
568 { PIXMAN_OP_ ## op, \
570 (SCALED_NEAREST_FLAGS | \
571 FAST_PATH_NONE_REPEAT | \
572 FAST_PATH_X_UNIT_POSITIVE), \
573 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
574 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
575 fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
578 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \
579 { PIXMAN_OP_ ## op, \
581 SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
582 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
583 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
584 fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
587 /* Prefer the use of 'cover' variant, because it is faster */
588 #define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
589 SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \
590 SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \
591 SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \
592 SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
594 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \
595 SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
596 SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
597 SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
599 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func) \
600 SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \
601 SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
602 SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
604 /*****************************************************************************/
607 * Identify 5 zones in each scanline for bilinear scaling. Depending on
608 * whether 2 pixels to be interpolated are fetched from the image itself,
609 * from the padding area around it or from both image and padding area.
611 static force_inline void
612 bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width,
614 pixman_fixed_t unit_x,
621 int width1 = *width, left_pad1, right_pad1;
622 int width2 = *width, left_pad2, right_pad2;
624 pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x,
625 &width1, &left_pad1, &right_pad1);
626 pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1,
627 unit_x, &width2, &left_pad2, &right_pad2);
629 *left_pad = left_pad2;
630 *left_tz = left_pad1 - left_pad2;
631 *right_tz = right_pad2 - right_pad1;
632 *right_pad = right_pad1;
633 *width -= *left_pad + *left_tz + *right_tz + *right_pad;
637 * Main loop template for single pass bilinear scaling. It needs to be
638 * provided with 'scanline_func' which should do the compositing operation.
639 * The needed function has the following prototype:
641 * scanline_func (dst_type_t * dst,
642 * const mask_type_ * mask,
643 * const src_type_t * src_top,
644 * const src_type_t * src_bottom,
649 * pixman_fixed_t unit_x,
650 * pixman_fixed_t max_vx,
651 * pixman_bool_t zero_src)
654 * dst - destination scanline buffer for storing results
655 * mask - mask buffer (or single value for solid mask)
656 * src_top, src_bottom - two source scanlines
657 * width - number of pixels to process
658 * weight_top - weight of the top row for interpolation
659 * weight_bottom - weight of the bottom row for interpolation
660 * vx - initial position for fetching the first pair of
661 * pixels from the source buffer
662 * unit_x - position increment needed to move to the next pair
664 * max_vx - image size as a fixed point value, can be used for
665 * implementing NORMAL repeat (when it is supported)
666 * zero_src - boolean hint variable, which is set to TRUE when
667 * all source pixels are fetched from zero padding
668 * zone for NONE repeat
670 * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to 256,
671 * but sometimes it may be less than that for NONE repeat when handling
672 * fuzzy antialiased top or bottom image edges. Also both top and
673 * bottom weight variables are guaranteed to have value in 0-255
674 * range and can fit into unsigned byte or be used with 8-bit SIMD
675 * multiplication instructions.
677 #define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \
678 dst_type_t, repeat_mode, flags) \
680 fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \
681 pixman_composite_info_t *info) \
683 PIXMAN_COMPOSITE_ARGS (info); \
684 dst_type_t *dst_line; \
685 mask_type_t *mask_line; \
686 src_type_t *src_first_line; \
688 pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \
690 pixman_fixed_t vx, vy; \
691 pixman_fixed_t unit_x, unit_y; \
692 int32_t left_pad, left_tz, right_tz, right_pad; \
695 mask_type_t solid_mask; \
696 const mask_type_t *mask = &solid_mask; \
697 int src_stride, mask_stride, dst_stride; \
700 pixman_fixed_t src_width_fixed; \
702 pixman_bool_t need_src_extension; \
704 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \
705 if (flags & FLAG_HAVE_SOLID_MASK) \
707 solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \
710 else if (flags & FLAG_HAVE_NON_SOLID_MASK) \
712 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \
713 mask_stride, mask_line, 1); \
716 /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
717 * transformed from destination space to source space */ \
718 PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
720 /* reference point is the center of the pixel */ \
721 v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
722 v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
723 v.vector[2] = pixman_fixed_1; \
725 if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
728 unit_x = src_image->common.transform->matrix[0][0]; \
729 unit_y = src_image->common.transform->matrix[1][1]; \
731 v.vector[0] -= pixman_fixed_1 / 2; \
732 v.vector[1] -= pixman_fixed_1 / 2; \
736 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \
737 PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
739 bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, \
740 &left_pad, &left_tz, &width, &right_tz, &right_pad); \
741 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
743 /* PAD repeat does not need special handling for 'transition zones' and */ \
744 /* they can be combined with 'padding zones' safely */ \
745 left_pad += left_tz; \
746 right_pad += right_tz; \
747 left_tz = right_tz = 0; \
749 v.vector[0] += left_pad * unit_x; \
752 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
755 repeat (PIXMAN_REPEAT_NORMAL, &vx, pixman_int_to_fixed(src_image->bits.width)); \
756 max_x = pixman_fixed_to_int (vx + (width - 1) * unit_x) + 1; \
758 if (src_image->bits.width < REPEAT_NORMAL_MIN_WIDTH) \
762 while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x) \
763 src_width += src_image->bits.width; \
765 need_src_extension = TRUE; \
769 src_width = src_image->bits.width; \
770 need_src_extension = FALSE; \
773 src_width_fixed = pixman_int_to_fixed (src_width); \
776 while (--height >= 0) \
778 int weight1, weight2; \
780 dst_line += dst_stride; \
782 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
785 mask_line += mask_stride; \
788 y1 = pixman_fixed_to_int (vy); \
789 weight2 = (vy >> 8) & 0xff; \
792 /* normal case, both row weights are in 0-255 range and fit unsigned byte */ \
794 weight1 = 256 - weight2; \
798 /* set both top and bottom row to the same scanline, and weights to 128+128 */ \
800 weight1 = weight2 = 128; \
803 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
805 src_type_t *src1, *src2; \
806 src_type_t buf1[2]; \
807 src_type_t buf2[2]; \
808 repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); \
809 repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); \
810 src1 = src_first_line + src_stride * y1; \
811 src2 = src_first_line + src_stride * y2; \
815 buf1[0] = buf1[1] = src1[0]; \
816 buf2[0] = buf2[1] = src2[0]; \
817 scanline_func (dst, mask, \
818 buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE); \
820 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
825 scanline_func (dst, mask, \
826 src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \
828 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
833 buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; \
834 buf2[0] = buf2[1] = src2[src_image->bits.width - 1]; \
835 scanline_func (dst, mask, \
836 buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE); \
839 else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
841 src_type_t *src1, *src2; \
842 src_type_t buf1[2]; \
843 src_type_t buf2[2]; \
844 /* handle top/bottom zero padding by just setting weights to 0 if needed */ \
850 if (y1 >= src_image->bits.height) \
853 y1 = src_image->bits.height - 1; \
860 if (y2 >= src_image->bits.height) \
863 y2 = src_image->bits.height - 1; \
865 src1 = src_first_line + src_stride * y1; \
866 src2 = src_first_line + src_stride * y2; \
870 buf1[0] = buf1[1] = 0; \
871 buf2[0] = buf2[1] = 0; \
872 scanline_func (dst, mask, \
873 buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE); \
875 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
884 scanline_func (dst, mask, \
885 buf1, buf2, left_tz, weight1, weight2, \
886 pixman_fixed_frac (vx), unit_x, 0, FALSE); \
888 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
890 vx += left_tz * unit_x; \
894 scanline_func (dst, mask, \
895 src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \
897 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
899 vx += width * unit_x; \
903 buf1[0] = src1[src_image->bits.width - 1]; \
905 buf2[0] = src2[src_image->bits.width - 1]; \
907 scanline_func (dst, mask, \
908 buf1, buf2, right_tz, weight1, weight2, \
909 pixman_fixed_frac (vx), unit_x, 0, FALSE); \
911 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
916 buf1[0] = buf1[1] = 0; \
917 buf2[0] = buf2[1] = 0; \
918 scanline_func (dst, mask, \
919 buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE); \
922 else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
924 int32_t num_pixels; \
925 int32_t width_remain; \
926 src_type_t * src_line_top; \
927 src_type_t * src_line_bottom; \
928 src_type_t buf1[2]; \
929 src_type_t buf2[2]; \
930 src_type_t extended_src_line0[REPEAT_NORMAL_MIN_WIDTH*2]; \
931 src_type_t extended_src_line1[REPEAT_NORMAL_MIN_WIDTH*2]; \
934 repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); \
935 repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); \
936 src_line_top = src_first_line + src_stride * y1; \
937 src_line_bottom = src_first_line + src_stride * y2; \
939 if (need_src_extension) \
941 for (i=0; i<src_width;) \
943 for (j=0; j<src_image->bits.width; j++, i++) \
945 extended_src_line0[i] = src_line_top[j]; \
946 extended_src_line1[i] = src_line_bottom[j]; \
950 src_line_top = &extended_src_line0[0]; \
951 src_line_bottom = &extended_src_line1[0]; \
954 /* Top & Bottom wrap around buffer */ \
955 buf1[0] = src_line_top[src_width - 1]; \
956 buf1[1] = src_line_top[0]; \
957 buf2[0] = src_line_bottom[src_width - 1]; \
958 buf2[1] = src_line_bottom[0]; \
960 width_remain = width; \
962 while (width_remain > 0) \
964 /* We use src_width_fixed because it can make vx in original source range */ \
965 repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \
967 /* Wrap around part */ \
968 if (pixman_fixed_to_int (vx) == src_width - 1) \
970 /* for positive unit_x \
971 * num_pixels = max(n) + 1, where vx + n*unit_x < src_width_fixed \
973 * vx is in range [0, src_width_fixed - pixman_fixed_e] \
974 * So we are safe from overflow. \
976 num_pixels = ((src_width_fixed - vx - pixman_fixed_e) / unit_x) + 1; \
978 if (num_pixels > width_remain) \
979 num_pixels = width_remain; \
981 scanline_func (dst, mask, buf1, buf2, num_pixels, \
982 weight1, weight2, pixman_fixed_frac(vx), \
983 unit_x, src_width_fixed, FALSE); \
985 width_remain -= num_pixels; \
986 vx += num_pixels * unit_x; \
989 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
990 mask += num_pixels; \
992 repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \
995 /* Normal scanline composite */ \
996 if (pixman_fixed_to_int (vx) != src_width - 1 && width_remain > 0) \
998 /* for positive unit_x \
999 * num_pixels = max(n) + 1, where vx + n*unit_x < (src_width_fixed - 1) \
1001 * vx is in range [0, src_width_fixed - pixman_fixed_e] \
1002 * So we are safe from overflow here. \
1004 num_pixels = ((src_width_fixed - pixman_fixed_1 - vx - pixman_fixed_e) \
1007 if (num_pixels > width_remain) \
1008 num_pixels = width_remain; \
1010 scanline_func (dst, mask, src_line_top, src_line_bottom, num_pixels, \
1011 weight1, weight2, vx, unit_x, src_width_fixed, FALSE); \
1013 width_remain -= num_pixels; \
1014 vx += num_pixels * unit_x; \
1015 dst += num_pixels; \
1017 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1018 mask += num_pixels; \
1024 scanline_func (dst, mask, src_first_line + src_stride * y1, \
1025 src_first_line + src_stride * y2, width, \
1026 weight1, weight2, vx, unit_x, max_vx, FALSE); \
1031 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
1032 #define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \
1033 dst_type_t, repeat_mode, flags) \
1034 FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\
1035 dst_type_t, repeat_mode, flags)
1037 #define SCALED_BILINEAR_FLAGS \
1038 (FAST_PATH_SCALE_TRANSFORM | \
1039 FAST_PATH_NO_ALPHA_MAP | \
1040 FAST_PATH_BILINEAR_FILTER | \
1041 FAST_PATH_NO_ACCESSORS | \
1042 FAST_PATH_NARROW_FORMAT)
1044 #define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func) \
1045 { PIXMAN_OP_ ## op, \
1047 (SCALED_BILINEAR_FLAGS | \
1048 FAST_PATH_PAD_REPEAT | \
1049 FAST_PATH_X_UNIT_POSITIVE), \
1051 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1052 fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
1055 #define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func) \
1056 { PIXMAN_OP_ ## op, \
1058 (SCALED_BILINEAR_FLAGS | \
1059 FAST_PATH_NONE_REPEAT | \
1060 FAST_PATH_X_UNIT_POSITIVE), \
1062 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1063 fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
1066 #define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func) \
1067 { PIXMAN_OP_ ## op, \
1069 SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
1071 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1072 fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
1075 #define SIMPLE_BILINEAR_FAST_PATH_NORMAL(op,s,d,func) \
1076 { PIXMAN_OP_ ## op, \
1078 (SCALED_BILINEAR_FLAGS | \
1079 FAST_PATH_NORMAL_REPEAT | \
1080 FAST_PATH_X_UNIT_POSITIVE), \
1082 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1083 fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \
1086 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func) \
1087 { PIXMAN_OP_ ## op, \
1089 (SCALED_BILINEAR_FLAGS | \
1090 FAST_PATH_PAD_REPEAT | \
1091 FAST_PATH_X_UNIT_POSITIVE), \
1092 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
1093 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1094 fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
1097 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func) \
1098 { PIXMAN_OP_ ## op, \
1100 (SCALED_BILINEAR_FLAGS | \
1101 FAST_PATH_NONE_REPEAT | \
1102 FAST_PATH_X_UNIT_POSITIVE), \
1103 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
1104 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1105 fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
1108 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func) \
1109 { PIXMAN_OP_ ## op, \
1111 SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
1112 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
1113 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1114 fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
1117 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \
1118 { PIXMAN_OP_ ## op, \
1120 (SCALED_BILINEAR_FLAGS | \
1121 FAST_PATH_NORMAL_REPEAT | \
1122 FAST_PATH_X_UNIT_POSITIVE), \
1123 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
1124 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1125 fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \
1128 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \
1129 { PIXMAN_OP_ ## op, \
1131 (SCALED_BILINEAR_FLAGS | \
1132 FAST_PATH_PAD_REPEAT | \
1133 FAST_PATH_X_UNIT_POSITIVE), \
1134 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
1135 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1136 fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
1139 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \
1140 { PIXMAN_OP_ ## op, \
1142 (SCALED_BILINEAR_FLAGS | \
1143 FAST_PATH_NONE_REPEAT | \
1144 FAST_PATH_X_UNIT_POSITIVE), \
1145 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
1146 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1147 fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
1150 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \
1151 { PIXMAN_OP_ ## op, \
1153 SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
1154 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
1155 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1156 fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
1159 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \
1160 { PIXMAN_OP_ ## op, \
1162 (SCALED_BILINEAR_FLAGS | \
1163 FAST_PATH_NORMAL_REPEAT | \
1164 FAST_PATH_X_UNIT_POSITIVE), \
1165 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
1166 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1167 fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \
1170 /* Prefer the use of 'cover' variant, because it is faster */
1171 #define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func) \
1172 SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func), \
1173 SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func), \
1174 SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func), \
1175 SIMPLE_BILINEAR_FAST_PATH_NORMAL (op,s,d,func)
1177 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func) \
1178 SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
1179 SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
1180 SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func), \
1181 SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL (op,s,d,func)
1183 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func) \
1184 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \
1185 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
1186 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func), \
1187 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func)