1 /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
3 * Copyright © 2000 SuSE, Inc.
4 * Copyright © 2007 Red Hat, Inc.
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of SuSE not be used in advertising or
11 * publicity pertaining to distribution of the software without specific,
12 * written prior permission. SuSE makes no representations about the
13 * suitability of this software for any purpose. It is provided "as is"
14 * without express or implied warranty.
16 * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
18 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
23 * Author: Keith Packard, SuSE, Inc.
26 #ifndef PIXMAN_FAST_PATH_H__
27 #define PIXMAN_FAST_PATH_H__
29 #include "pixman-private.h"
31 #define PIXMAN_REPEAT_COVER -1
33 static force_inline pixman_bool_t
34 repeat (pixman_repeat_t repeat, int *c, int size)
36 if (repeat == PIXMAN_REPEAT_NONE)
38 if (*c < 0 || *c >= size)
41 else if (repeat == PIXMAN_REPEAT_NORMAL)
48 else if (repeat == PIXMAN_REPEAT_PAD)
50 *c = CLIP (*c, 0, size - 1);
54 *c = MOD (*c, size * 2);
56 *c = size * 2 - *c - 1;
62 * For each scanline fetched from source image with PAD repeat:
63 * - calculate how many pixels need to be padded on the left side
64 * - calculate how many pixels need to be padded on the right side
65 * - update width to only count pixels which are fetched from the image
66 * All this information is returned via 'width', 'left_pad', 'right_pad'
67 * arguments. The code is assuming that 'unit_x' is positive.
69 * Note: 64-bit math is used in order to avoid potential overflows, which
70 * is probably excessive in many cases. This particular function
71 * may need its own correctness test and performance tuning.
73 static force_inline void
74 pad_repeat_get_scanline_bounds (int32_t source_image_width,
76 pixman_fixed_t unit_x,
81 int64_t max_vx = (int64_t) source_image_width << 16;
85 tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
93 *left_pad = (int32_t) tmp;
94 *width -= (int32_t) tmp;
101 tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
107 else if (tmp >= *width)
113 *right_pad = *width - (int32_t) tmp;
114 *width = (int32_t) tmp;
118 /* A macroified version of specialized nearest scalers for some
119 * common 8888 and 565 formats. It supports SRC and OVER ops.
121 * There are two repeat versions, one that handles repeat normal,
122 * and one without repeat handling that only works if the src region
123 * used is completely covered by the pre-repeated source samples.
125 * The loops are unrolled to process two pixels per iteration for better
126 * performance on most CPU architectures (superscalar processors
127 * can issue several operations simultaneously, other processors can hide
128 * instructions latencies by pipelining operations). Unrolling more
129 * does not make much sense because the compiler will start running out
130 * of spare registers soon.
133 #define GET_8888_ALPHA(s) ((s) >> 24)
134 /* This is not actually used since we don't have an OVER with
135 565 source, but it is needed to build. */
136 #define GET_0565_ALPHA(s) 0xff
138 #define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \
139 src_type_t, dst_type_t, OP, repeat_mode) \
140 static force_inline void \
141 scanline_func_name (dst_type_t *dst, \
145 pixman_fixed_t unit_x, \
146 pixman_fixed_t max_vx) \
153 if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \
156 while ((w -= 2) >= 0) \
160 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
162 /* This works because we know that unit_x is positive */ \
163 while (vx >= max_vx) \
170 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
172 /* This works because we know that unit_x is positive */ \
173 while (vx >= max_vx) \
178 if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
180 a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
181 a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \
185 *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
189 d = CONVERT_ ## DST_FORMAT ## _TO_8888 (*dst); \
190 s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \
192 UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
193 *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
199 *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \
203 d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \
204 s2 = CONVERT_## SRC_FORMAT ## _TO_8888 (s2); \
206 UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \
207 *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
211 else /* PIXMAN_OP_SRC */ \
213 *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
214 *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \
223 if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
225 a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
229 *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
233 d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \
234 s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \
236 UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
237 *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
241 else /* PIXMAN_OP_SRC */ \
243 *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
248 #define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \
251 fast_composite_scaled_nearest_ ## scale_func_name (pixman_implementation_t *imp, \
253 pixman_image_t * src_image, \
254 pixman_image_t * mask_image, \
255 pixman_image_t * dst_image, \
265 dst_type_t *dst_line; \
266 src_type_t *src_first_line; \
268 pixman_fixed_t max_vx = max_vx; /* suppress uninitialized variable warning */ \
269 pixman_fixed_t max_vy; \
271 pixman_fixed_t vx, vy; \
272 pixman_fixed_t unit_x, unit_y; \
273 int32_t left_pad, right_pad; \
277 int src_stride, dst_stride; \
279 PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \
280 /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
281 * transformed from destination space to source space */ \
282 PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
284 /* reference point is the center of the pixel */ \
285 v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
286 v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
287 v.vector[2] = pixman_fixed_1; \
289 if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
292 unit_x = src_image->common.transform->matrix[0][0]; \
293 unit_y = src_image->common.transform->matrix[1][1]; \
295 /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \
296 v.vector[0] -= pixman_fixed_e; \
297 v.vector[1] -= pixman_fixed_e; \
302 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
304 /* Clamp repeating positions inside the actual samples */ \
305 max_vx = src_image->bits.width << 16; \
306 max_vy = src_image->bits.height << 16; \
308 repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \
309 repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
312 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \
313 PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
315 pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \
316 &width, &left_pad, &right_pad); \
317 vx += left_pad * unit_x; \
320 while (--height >= 0) \
323 dst_line += dst_stride; \
327 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
328 repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
329 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
331 repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \
332 src = src_first_line + src_stride * y; \
335 scanline_func (dst, src, left_pad, 0, 0, 0); \
339 scanline_func (dst + left_pad, src, width, vx, unit_x, 0); \
343 scanline_func (dst + left_pad + width, src + src_image->bits.width - 1, \
344 right_pad, 0, 0, 0); \
347 else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
349 static src_type_t zero = 0; \
350 if (y < 0 || y >= src_image->bits.height) \
352 scanline_func (dst, &zero, left_pad + width + right_pad, 0, 0, 0); \
355 src = src_first_line + src_stride * y; \
358 scanline_func (dst, &zero, left_pad, 0, 0, 0); \
362 scanline_func (dst + left_pad, src, width, vx, unit_x, 0); \
366 scanline_func (dst + left_pad + width, &zero, right_pad, 0, 0, 0); \
371 src = src_first_line + src_stride * y; \
372 scanline_func (dst, src, width, vx, unit_x, max_vx); \
377 #define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \
378 src_type_t, dst_type_t, OP, repeat_mode) \
379 FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
380 SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \
382 FAST_NEAREST_MAINLOOP(scale_func_name##_##OP, \
383 scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
384 src_type_t, dst_type_t, repeat_mode) \
386 extern int no_such_variable
389 #define SCALED_NEAREST_FLAGS \
390 (FAST_PATH_SCALE_TRANSFORM | \
391 FAST_PATH_NO_ALPHA_MAP | \
392 FAST_PATH_NEAREST_FILTER | \
393 FAST_PATH_NO_ACCESSORS | \
394 FAST_PATH_NARROW_FORMAT)
396 #define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \
397 { PIXMAN_OP_ ## op, \
399 (SCALED_NEAREST_FLAGS | \
400 FAST_PATH_NORMAL_REPEAT | \
401 FAST_PATH_X_UNIT_POSITIVE), \
403 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
404 fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
407 #define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \
408 { PIXMAN_OP_ ## op, \
410 (SCALED_NEAREST_FLAGS | \
411 FAST_PATH_PAD_REPEAT | \
412 FAST_PATH_X_UNIT_POSITIVE), \
414 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
415 fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
418 #define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func) \
419 { PIXMAN_OP_ ## op, \
421 (SCALED_NEAREST_FLAGS | \
422 FAST_PATH_NONE_REPEAT | \
423 FAST_PATH_X_UNIT_POSITIVE), \
425 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
426 fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
429 #define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \
430 { PIXMAN_OP_ ## op, \
432 SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
434 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
435 fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
438 /* Prefer the use of 'cover' variant, because it is faster */
439 #define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
440 SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \
441 SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \
442 SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \
443 SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)