Rename pixman-fast-path.h to pixman-inlines.h
[profile/ivi/pixman.git] / pixman / pixman-inlines.h
1 /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
2 /*
3  * Copyright © 2000 SuSE, Inc.
4  * Copyright © 2007 Red Hat, Inc.
5  *
6  * Permission to use, copy, modify, distribute, and sell this software and its
7  * documentation for any purpose is hereby granted without fee, provided that
8  * the above copyright notice appear in all copies and that both that
9  * copyright notice and this permission notice appear in supporting
10  * documentation, and that the name of SuSE not be used in advertising or
11  * publicity pertaining to distribution of the software without specific,
12  * written prior permission.  SuSE makes no representations about the
13  * suitability of this software for any purpose.  It is provided "as is"
14  * without express or implied warranty.
15  *
16  * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
18  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22  *
23  * Author:  Keith Packard, SuSE, Inc.
24  */
25
26 #ifndef PIXMAN_FAST_PATH_H__
27 #define PIXMAN_FAST_PATH_H__
28
29 #include "pixman-private.h"
30
31 #define PIXMAN_REPEAT_COVER -1
32
33 /* Flags describing input parameters to fast path macro template.
34  * Turning on some flag values may indicate that
35  * "some property X is available so template can use this" or
36  * "some property X should be handled by template".
37  *
38  * FLAG_HAVE_SOLID_MASK
39  *  Input mask is solid so template should handle this.
40  *
41  * FLAG_HAVE_NON_SOLID_MASK
42  *  Input mask is bits mask so template should handle this.
43  *
44  * FLAG_HAVE_SOLID_MASK and FLAG_HAVE_NON_SOLID_MASK are mutually
45  * exclusive. (It's not allowed to turn both flags on)
46  */
47 #define FLAG_NONE                               (0)
48 #define FLAG_HAVE_SOLID_MASK                    (1 <<   1)
49 #define FLAG_HAVE_NON_SOLID_MASK                (1 <<   2)
50
51 /* To avoid too short repeated scanline function calls, extend source
52  * scanlines having width less than below constant value.
53  */
54 #define REPEAT_NORMAL_MIN_WIDTH                 64
55
56 static force_inline pixman_bool_t
57 repeat (pixman_repeat_t repeat, int *c, int size)
58 {
59     if (repeat == PIXMAN_REPEAT_NONE)
60     {
61         if (*c < 0 || *c >= size)
62             return FALSE;
63     }
64     else if (repeat == PIXMAN_REPEAT_NORMAL)
65     {
66         while (*c >= size)
67             *c -= size;
68         while (*c < 0)
69             *c += size;
70     }
71     else if (repeat == PIXMAN_REPEAT_PAD)
72     {
73         *c = CLIP (*c, 0, size - 1);
74     }
75     else /* REFLECT */
76     {
77         *c = MOD (*c, size * 2);
78         if (*c >= size)
79             *c = size * 2 - *c - 1;
80     }
81     return TRUE;
82 }
83
84 /*
85  * For each scanline fetched from source image with PAD repeat:
86  * - calculate how many pixels need to be padded on the left side
87  * - calculate how many pixels need to be padded on the right side
88  * - update width to only count pixels which are fetched from the image
89  * All this information is returned via 'width', 'left_pad', 'right_pad'
90  * arguments. The code is assuming that 'unit_x' is positive.
91  *
92  * Note: 64-bit math is used in order to avoid potential overflows, which
93  *       is probably excessive in many cases. This particular function
94  *       may need its own correctness test and performance tuning.
95  */
96 static force_inline void
97 pad_repeat_get_scanline_bounds (int32_t         source_image_width,
98                                 pixman_fixed_t  vx,
99                                 pixman_fixed_t  unit_x,
100                                 int32_t *       width,
101                                 int32_t *       left_pad,
102                                 int32_t *       right_pad)
103 {
104     int64_t max_vx = (int64_t) source_image_width << 16;
105     int64_t tmp;
106     if (vx < 0)
107     {
108         tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
109         if (tmp > *width)
110         {
111             *left_pad = *width;
112             *width = 0;
113         }
114         else
115         {
116             *left_pad = (int32_t) tmp;
117             *width -= (int32_t) tmp;
118         }
119     }
120     else
121     {
122         *left_pad = 0;
123     }
124     tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
125     if (tmp < 0)
126     {
127         *right_pad = *width;
128         *width = 0;
129     }
130     else if (tmp >= *width)
131     {
132         *right_pad = 0;
133     }
134     else
135     {
136         *right_pad = *width - (int32_t) tmp;
137         *width = (int32_t) tmp;
138     }
139 }
140
141 /* A macroified version of specialized nearest scalers for some
142  * common 8888 and 565 formats. It supports SRC and OVER ops.
143  *
144  * There are two repeat versions, one that handles repeat normal,
145  * and one without repeat handling that only works if the src region
146  * used is completely covered by the pre-repeated source samples.
147  *
148  * The loops are unrolled to process two pixels per iteration for better
149  * performance on most CPU architectures (superscalar processors
150  * can issue several operations simultaneously, other processors can hide
151  * instructions latencies by pipelining operations). Unrolling more
152  * does not make much sense because the compiler will start running out
153  * of spare registers soon.
154  */
155
156 #define GET_8888_ALPHA(s) ((s) >> 24)
157  /* This is not actually used since we don't have an OVER with
158     565 source, but it is needed to build. */
159 #define GET_0565_ALPHA(s) 0xff
160 #define GET_x888_ALPHA(s) 0xff
161
162 #define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT,                       \
163                               src_type_t, dst_type_t, OP, repeat_mode)                          \
164 static force_inline void                                                                        \
165 scanline_func_name (dst_type_t       *dst,                                                      \
166                     const src_type_t *src,                                                      \
167                     int32_t           w,                                                        \
168                     pixman_fixed_t    vx,                                                       \
169                     pixman_fixed_t    unit_x,                                                   \
170                     pixman_fixed_t    max_vx,                                                   \
171                     pixman_bool_t     fully_transparent_src)                                    \
172 {                                                                                               \
173         uint32_t   d;                                                                           \
174         src_type_t s1, s2;                                                                      \
175         uint8_t    a1, a2;                                                                      \
176         int        x1, x2;                                                                      \
177                                                                                                 \
178         if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src)                        \
179             return;                                                                             \
180                                                                                                 \
181         if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER)            \
182             abort();                                                                            \
183                                                                                                 \
184         while ((w -= 2) >= 0)                                                                   \
185         {                                                                                       \
186             x1 = vx >> 16;                                                                      \
187             vx += unit_x;                                                                       \
188             if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                          \
189             {                                                                                   \
190                 /* This works because we know that unit_x is positive */                        \
191                 while (vx >= max_vx)                                                            \
192                     vx -= max_vx;                                                               \
193             }                                                                                   \
194             s1 = src[x1];                                                                       \
195                                                                                                 \
196             x2 = vx >> 16;                                                                      \
197             vx += unit_x;                                                                       \
198             if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                          \
199             {                                                                                   \
200                 /* This works because we know that unit_x is positive */                        \
201                 while (vx >= max_vx)                                                            \
202                     vx -= max_vx;                                                               \
203             }                                                                                   \
204             s2 = src[x2];                                                                       \
205                                                                                                 \
206             if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)                                             \
207             {                                                                                   \
208                 a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);                                          \
209                 a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2);                                          \
210                                                                                                 \
211                 if (a1 == 0xff)                                                                 \
212                 {                                                                               \
213                     *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);                   \
214                 }                                                                               \
215                 else if (s1)                                                                    \
216                 {                                                                               \
217                     d = CONVERT_ ## DST_FORMAT ## _TO_8888 (*dst);                              \
218                     s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1);                               \
219                     a1 ^= 0xff;                                                                 \
220                     UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);                                        \
221                     *dst = CONVERT_8888_TO_ ## DST_FORMAT (d);                                  \
222                 }                                                                               \
223                 dst++;                                                                          \
224                                                                                                 \
225                 if (a2 == 0xff)                                                                 \
226                 {                                                                               \
227                     *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2);                   \
228                 }                                                                               \
229                 else if (s2)                                                                    \
230                 {                                                                               \
231                     d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst);                               \
232                     s2 = CONVERT_## SRC_FORMAT ## _TO_8888 (s2);                                \
233                     a2 ^= 0xff;                                                                 \
234                     UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2);                                        \
235                     *dst = CONVERT_8888_TO_ ## DST_FORMAT (d);                                  \
236                 }                                                                               \
237                 dst++;                                                                          \
238             }                                                                                   \
239             else /* PIXMAN_OP_SRC */                                                            \
240             {                                                                                   \
241                 *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);                     \
242                 *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2);                     \
243             }                                                                                   \
244         }                                                                                       \
245                                                                                                 \
246         if (w & 1)                                                                              \
247         {                                                                                       \
248             x1 = vx >> 16;                                                                      \
249             s1 = src[x1];                                                                       \
250                                                                                                 \
251             if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)                                             \
252             {                                                                                   \
253                 a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);                                          \
254                                                                                                 \
255                 if (a1 == 0xff)                                                                 \
256                 {                                                                               \
257                     *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);                   \
258                 }                                                                               \
259                 else if (s1)                                                                    \
260                 {                                                                               \
261                     d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst);                               \
262                     s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1);                               \
263                     a1 ^= 0xff;                                                                 \
264                     UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);                                        \
265                     *dst = CONVERT_8888_TO_ ## DST_FORMAT (d);                                  \
266                 }                                                                               \
267                 dst++;                                                                          \
268             }                                                                                   \
269             else /* PIXMAN_OP_SRC */                                                            \
270             {                                                                                   \
271                 *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);                     \
272             }                                                                                   \
273         }                                                                                       \
274 }
275
276 #define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t,      \
277                                   dst_type_t, repeat_mode, have_mask, mask_is_solid)            \
278 static void                                                                                     \
279 fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,                \
280                                                    pixman_composite_info_t *info)               \
281 {                                                                                               \
282     PIXMAN_COMPOSITE_ARGS (info);                                                               \
283     dst_type_t *dst_line;                                                                       \
284     mask_type_t *mask_line;                                                                     \
285     src_type_t *src_first_line;                                                                 \
286     int       y;                                                                                \
287     pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */            \
288     pixman_fixed_t max_vy;                                                                      \
289     pixman_vector_t v;                                                                          \
290     pixman_fixed_t vx, vy;                                                                      \
291     pixman_fixed_t unit_x, unit_y;                                                              \
292     int32_t left_pad, right_pad;                                                                \
293                                                                                                 \
294     src_type_t *src;                                                                            \
295     dst_type_t *dst;                                                                            \
296     mask_type_t solid_mask;                                                                     \
297     const mask_type_t *mask = &solid_mask;                                                      \
298     int src_stride, mask_stride, dst_stride;                                                    \
299                                                                                                 \
300     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1);    \
301     if (have_mask)                                                                              \
302     {                                                                                           \
303         if (mask_is_solid)                                                                      \
304             solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);    \
305         else                                                                                    \
306             PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t,                     \
307                                    mask_stride, mask_line, 1);                                  \
308     }                                                                                           \
309     /* pass in 0 instead of src_x and src_y because src_x and src_y need to be                  \
310      * transformed from destination space to source space */                                    \
311     PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);         \
312                                                                                                 \
313     /* reference point is the center of the pixel */                                            \
314     v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;                             \
315     v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;                             \
316     v.vector[2] = pixman_fixed_1;                                                               \
317                                                                                                 \
318     if (!pixman_transform_point_3d (src_image->common.transform, &v))                           \
319         return;                                                                                 \
320                                                                                                 \
321     unit_x = src_image->common.transform->matrix[0][0];                                         \
322     unit_y = src_image->common.transform->matrix[1][1];                                         \
323                                                                                                 \
324     /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */                   \
325     v.vector[0] -= pixman_fixed_e;                                                              \
326     v.vector[1] -= pixman_fixed_e;                                                              \
327                                                                                                 \
328     vx = v.vector[0];                                                                           \
329     vy = v.vector[1];                                                                           \
330                                                                                                 \
331     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                                  \
332     {                                                                                           \
333         /* Clamp repeating positions inside the actual samples */                               \
334         max_vx = src_image->bits.width << 16;                                                   \
335         max_vy = src_image->bits.height << 16;                                                  \
336                                                                                                 \
337         repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx);                                             \
338         repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);                                             \
339     }                                                                                           \
340                                                                                                 \
341     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD ||                                   \
342         PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)                                    \
343     {                                                                                           \
344         pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x,                      \
345                                         &width, &left_pad, &right_pad);                         \
346         vx += left_pad * unit_x;                                                                \
347     }                                                                                           \
348                                                                                                 \
349     while (--height >= 0)                                                                       \
350     {                                                                                           \
351         dst = dst_line;                                                                         \
352         dst_line += dst_stride;                                                                 \
353         if (have_mask && !mask_is_solid)                                                        \
354         {                                                                                       \
355             mask = mask_line;                                                                   \
356             mask_line += mask_stride;                                                           \
357         }                                                                                       \
358                                                                                                 \
359         y = vy >> 16;                                                                           \
360         vy += unit_y;                                                                           \
361         if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                              \
362             repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);                                         \
363         if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)                                 \
364         {                                                                                       \
365             repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height);                             \
366             src = src_first_line + src_stride * y;                                              \
367             if (left_pad > 0)                                                                   \
368             {                                                                                   \
369                 scanline_func (mask, dst, src, left_pad, 0, 0, 0, FALSE);                       \
370             }                                                                                   \
371             if (width > 0)                                                                      \
372             {                                                                                   \
373                 scanline_func (mask + (mask_is_solid ? 0 : left_pad),                           \
374                                dst + left_pad, src, width, vx, unit_x, 0, FALSE);               \
375             }                                                                                   \
376             if (right_pad > 0)                                                                  \
377             {                                                                                   \
378                 scanline_func (mask + (mask_is_solid ? 0 : left_pad + width),                   \
379                                dst + left_pad + width, src + src_image->bits.width - 1,         \
380                                right_pad, 0, 0, 0, FALSE);                                      \
381             }                                                                                   \
382         }                                                                                       \
383         else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)                           \
384         {                                                                                       \
385             static const src_type_t zero[1] = { 0 };                                            \
386             if (y < 0 || y >= src_image->bits.height)                                           \
387             {                                                                                   \
388                 scanline_func (mask, dst, zero, left_pad + width + right_pad, 0, 0, 0, TRUE);   \
389                 continue;                                                                       \
390             }                                                                                   \
391             src = src_first_line + src_stride * y;                                              \
392             if (left_pad > 0)                                                                   \
393             {                                                                                   \
394                 scanline_func (mask, dst, zero, left_pad, 0, 0, 0, TRUE);                       \
395             }                                                                                   \
396             if (width > 0)                                                                      \
397             {                                                                                   \
398                 scanline_func (mask + (mask_is_solid ? 0 : left_pad),                           \
399                                dst + left_pad, src, width, vx, unit_x, 0, FALSE);               \
400             }                                                                                   \
401             if (right_pad > 0)                                                                  \
402             {                                                                                   \
403                 scanline_func (mask + (mask_is_solid ? 0 : left_pad + width),                   \
404                                dst + left_pad + width, zero, right_pad, 0, 0, 0, TRUE);         \
405             }                                                                                   \
406         }                                                                                       \
407         else                                                                                    \
408         {                                                                                       \
409             src = src_first_line + src_stride * y;                                              \
410             scanline_func (mask, dst, src, width, vx, unit_x, max_vx, FALSE);                   \
411         }                                                                                       \
412     }                                                                                           \
413 }
414
415 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
416 #define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t,   \
417                                   dst_type_t, repeat_mode, have_mask, mask_is_solid)            \
418         FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t, \
419                                   dst_type_t, repeat_mode, have_mask, mask_is_solid)
420
421 #define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t,    \
422                               repeat_mode)                                                      \
423     static force_inline void                                                                    \
424     scanline_func##scale_func_name##_wrapper (                                                  \
425                     const uint8_t    *mask,                                                     \
426                     dst_type_t       *dst,                                                      \
427                     const src_type_t *src,                                                      \
428                     int32_t          w,                                                         \
429                     pixman_fixed_t   vx,                                                        \
430                     pixman_fixed_t   unit_x,                                                    \
431                     pixman_fixed_t   max_vx,                                                    \
432                     pixman_bool_t    fully_transparent_src)                                     \
433     {                                                                                           \
434         scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src);                 \
435     }                                                                                           \
436     FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper,       \
437                                src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE)
438
439 #define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t,           \
440                               repeat_mode)                                                      \
441         FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t,           \
442                               dst_type_t, repeat_mode)
443
444 #define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT,                           \
445                      src_type_t, dst_type_t, OP, repeat_mode)                           \
446     FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,       \
447                           SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t,               \
448                           OP, repeat_mode)                                              \
449     FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP,                       \
450                           scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,       \
451                           src_type_t, dst_type_t, repeat_mode)
452
453
454 #define SCALED_NEAREST_FLAGS                                            \
455     (FAST_PATH_SCALE_TRANSFORM  |                                       \
456      FAST_PATH_NO_ALPHA_MAP     |                                       \
457      FAST_PATH_NEAREST_FILTER   |                                       \
458      FAST_PATH_NO_ACCESSORS     |                                       \
459      FAST_PATH_NARROW_FORMAT)
460
461 #define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func)                    \
462     {   PIXMAN_OP_ ## op,                                               \
463         PIXMAN_ ## s,                                                   \
464         (SCALED_NEAREST_FLAGS           |                               \
465          FAST_PATH_NORMAL_REPEAT        |                               \
466          FAST_PATH_X_UNIT_POSITIVE),                                    \
467         PIXMAN_null, 0,                                                 \
468         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
469         fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,   \
470     }
471
472 #define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func)                       \
473     {   PIXMAN_OP_ ## op,                                               \
474         PIXMAN_ ## s,                                                   \
475         (SCALED_NEAREST_FLAGS           |                               \
476          FAST_PATH_PAD_REPEAT           |                               \
477          FAST_PATH_X_UNIT_POSITIVE),                                    \
478         PIXMAN_null, 0,                                                 \
479         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
480         fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,      \
481     }
482
483 #define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func)                      \
484     {   PIXMAN_OP_ ## op,                                               \
485         PIXMAN_ ## s,                                                   \
486         (SCALED_NEAREST_FLAGS           |                               \
487          FAST_PATH_NONE_REPEAT          |                               \
488          FAST_PATH_X_UNIT_POSITIVE),                                    \
489         PIXMAN_null, 0,                                                 \
490         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
491         fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,     \
492     }
493
494 #define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func)                     \
495     {   PIXMAN_OP_ ## op,                                               \
496         PIXMAN_ ## s,                                                   \
497         SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP,            \
498         PIXMAN_null, 0,                                                 \
499         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
500         fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,    \
501     }
502
503 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func)            \
504     {   PIXMAN_OP_ ## op,                                               \
505         PIXMAN_ ## s,                                                   \
506         (SCALED_NEAREST_FLAGS           |                               \
507          FAST_PATH_NORMAL_REPEAT        |                               \
508          FAST_PATH_X_UNIT_POSITIVE),                                    \
509         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
510         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
511         fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,   \
512     }
513
514 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func)               \
515     {   PIXMAN_OP_ ## op,                                               \
516         PIXMAN_ ## s,                                                   \
517         (SCALED_NEAREST_FLAGS           |                               \
518          FAST_PATH_PAD_REPEAT           |                               \
519          FAST_PATH_X_UNIT_POSITIVE),                                    \
520         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
521         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
522         fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,      \
523     }
524
525 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func)              \
526     {   PIXMAN_OP_ ## op,                                               \
527         PIXMAN_ ## s,                                                   \
528         (SCALED_NEAREST_FLAGS           |                               \
529          FAST_PATH_NONE_REPEAT          |                               \
530          FAST_PATH_X_UNIT_POSITIVE),                                    \
531         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
532         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
533         fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,     \
534     }
535
536 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func)             \
537     {   PIXMAN_OP_ ## op,                                               \
538         PIXMAN_ ## s,                                                   \
539         SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP,            \
540         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
541         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
542         fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,    \
543     }
544
545 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func)         \
546     {   PIXMAN_OP_ ## op,                                               \
547         PIXMAN_ ## s,                                                   \
548         (SCALED_NEAREST_FLAGS           |                               \
549          FAST_PATH_NORMAL_REPEAT        |                               \
550          FAST_PATH_X_UNIT_POSITIVE),                                    \
551         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
552         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
553         fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,   \
554     }
555
556 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func)            \
557     {   PIXMAN_OP_ ## op,                                               \
558         PIXMAN_ ## s,                                                   \
559         (SCALED_NEAREST_FLAGS           |                               \
560          FAST_PATH_PAD_REPEAT           |                               \
561          FAST_PATH_X_UNIT_POSITIVE),                                    \
562         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
563         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
564         fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,      \
565     }
566
567 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func)           \
568     {   PIXMAN_OP_ ## op,                                               \
569         PIXMAN_ ## s,                                                   \
570         (SCALED_NEAREST_FLAGS           |                               \
571          FAST_PATH_NONE_REPEAT          |                               \
572          FAST_PATH_X_UNIT_POSITIVE),                                    \
573         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
574         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
575         fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,     \
576     }
577
578 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func)          \
579     {   PIXMAN_OP_ ## op,                                               \
580         PIXMAN_ ## s,                                                   \
581         SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP,            \
582         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
583         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
584         fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,    \
585     }
586
587 /* Prefer the use of 'cover' variant, because it is faster */
588 #define SIMPLE_NEAREST_FAST_PATH(op,s,d,func)                           \
589     SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func),                       \
590     SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func),                        \
591     SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func),                         \
592     SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
593
594 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func)                   \
595     SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func),               \
596     SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func),                \
597     SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
598
599 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func)                \
600     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func),            \
601     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func),             \
602     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
603
604 /*****************************************************************************/
605
606 /*
607  * Identify 5 zones in each scanline for bilinear scaling. Depending on
608  * whether 2 pixels to be interpolated are fetched from the image itself,
609  * from the padding area around it or from both image and padding area.
610  */
611 static force_inline void
612 bilinear_pad_repeat_get_scanline_bounds (int32_t         source_image_width,
613                                          pixman_fixed_t  vx,
614                                          pixman_fixed_t  unit_x,
615                                          int32_t *       left_pad,
616                                          int32_t *       left_tz,
617                                          int32_t *       width,
618                                          int32_t *       right_tz,
619                                          int32_t *       right_pad)
620 {
621         int width1 = *width, left_pad1, right_pad1;
622         int width2 = *width, left_pad2, right_pad2;
623
624         pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x,
625                                         &width1, &left_pad1, &right_pad1);
626         pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1,
627                                         unit_x, &width2, &left_pad2, &right_pad2);
628
629         *left_pad = left_pad2;
630         *left_tz = left_pad1 - left_pad2;
631         *right_tz = right_pad2 - right_pad1;
632         *right_pad = right_pad1;
633         *width -= *left_pad + *left_tz + *right_tz + *right_pad;
634 }
635
636 /*
637  * Main loop template for single pass bilinear scaling. It needs to be
638  * provided with 'scanline_func' which should do the compositing operation.
639  * The needed function has the following prototype:
640  *
641  *      scanline_func (dst_type_t *       dst,
642  *                     const mask_type_ * mask,
643  *                     const src_type_t * src_top,
644  *                     const src_type_t * src_bottom,
645  *                     int32_t            width,
646  *                     int                weight_top,
647  *                     int                weight_bottom,
648  *                     pixman_fixed_t     vx,
649  *                     pixman_fixed_t     unit_x,
650  *                     pixman_fixed_t     max_vx,
651  *                     pixman_bool_t      zero_src)
652  *
653  * Where:
654  *  dst                 - destination scanline buffer for storing results
655  *  mask                - mask buffer (or single value for solid mask)
656  *  src_top, src_bottom - two source scanlines
657  *  width               - number of pixels to process
658  *  weight_top          - weight of the top row for interpolation
659  *  weight_bottom       - weight of the bottom row for interpolation
660  *  vx                  - initial position for fetching the first pair of
661  *                        pixels from the source buffer
662  *  unit_x              - position increment needed to move to the next pair
663  *                        of pixels
664  *  max_vx              - image size as a fixed point value, can be used for
665  *                        implementing NORMAL repeat (when it is supported)
666  *  zero_src            - boolean hint variable, which is set to TRUE when
667  *                        all source pixels are fetched from zero padding
668  *                        zone for NONE repeat
669  *
670  * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to 256,
671  *       but sometimes it may be less than that for NONE repeat when handling
672  *       fuzzy antialiased top or bottom image edges. Also both top and
673  *       bottom weight variables are guaranteed to have value in 0-255
674  *       range and can fit into unsigned byte or be used with 8-bit SIMD
675  *       multiplication instructions.
676  */
677 #define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t,     \
678                                   dst_type_t, repeat_mode, flags)                               \
679 static void                                                                                     \
680 fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,                \
681                                                    pixman_composite_info_t *info)               \
682 {                                                                                               \
683     PIXMAN_COMPOSITE_ARGS (info);                                                               \
684     dst_type_t *dst_line;                                                                       \
685     mask_type_t *mask_line;                                                                     \
686     src_type_t *src_first_line;                                                                 \
687     int       y1, y2;                                                                           \
688     pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */            \
689     pixman_vector_t v;                                                                          \
690     pixman_fixed_t vx, vy;                                                                      \
691     pixman_fixed_t unit_x, unit_y;                                                              \
692     int32_t left_pad, left_tz, right_tz, right_pad;                                             \
693                                                                                                 \
694     dst_type_t *dst;                                                                            \
695     mask_type_t solid_mask;                                                                     \
696     const mask_type_t *mask = &solid_mask;                                                      \
697     int src_stride, mask_stride, dst_stride;                                                    \
698                                                                                                 \
699     int src_width;                                                                              \
700     pixman_fixed_t src_width_fixed;                                                             \
701     int max_x;                                                                                  \
702     pixman_bool_t need_src_extension;                                                           \
703                                                                                                 \
704     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1);    \
705     if (flags & FLAG_HAVE_SOLID_MASK)                                                           \
706     {                                                                                           \
707         solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);        \
708         mask_stride = 0;                                                                        \
709     }                                                                                           \
710     else if (flags & FLAG_HAVE_NON_SOLID_MASK)                                                  \
711     {                                                                                           \
712         PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t,                         \
713                                mask_stride, mask_line, 1);                                      \
714     }                                                                                           \
715                                                                                                 \
716     /* pass in 0 instead of src_x and src_y because src_x and src_y need to be                  \
717      * transformed from destination space to source space */                                    \
718     PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);         \
719                                                                                                 \
720     /* reference point is the center of the pixel */                                            \
721     v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;                             \
722     v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;                             \
723     v.vector[2] = pixman_fixed_1;                                                               \
724                                                                                                 \
725     if (!pixman_transform_point_3d (src_image->common.transform, &v))                           \
726         return;                                                                                 \
727                                                                                                 \
728     unit_x = src_image->common.transform->matrix[0][0];                                         \
729     unit_y = src_image->common.transform->matrix[1][1];                                         \
730                                                                                                 \
731     v.vector[0] -= pixman_fixed_1 / 2;                                                          \
732     v.vector[1] -= pixman_fixed_1 / 2;                                                          \
733                                                                                                 \
734     vy = v.vector[1];                                                                           \
735                                                                                                 \
736     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD ||                                   \
737         PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)                                    \
738     {                                                                                           \
739         bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x,    \
740                                         &left_pad, &left_tz, &width, &right_tz, &right_pad);    \
741         if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)                                 \
742         {                                                                                       \
743             /* PAD repeat does not need special handling for 'transition zones' and */          \
744             /* they can be combined with 'padding zones' safely */                              \
745             left_pad += left_tz;                                                                \
746             right_pad += right_tz;                                                              \
747             left_tz = right_tz = 0;                                                             \
748         }                                                                                       \
749         v.vector[0] += left_pad * unit_x;                                                       \
750     }                                                                                           \
751                                                                                                 \
752     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                                  \
753     {                                                                                           \
754         vx = v.vector[0];                                                                       \
755         repeat (PIXMAN_REPEAT_NORMAL, &vx, pixman_int_to_fixed(src_image->bits.width));         \
756         max_x = pixman_fixed_to_int (vx + (width - 1) * unit_x) + 1;                            \
757                                                                                                 \
758         if (src_image->bits.width < REPEAT_NORMAL_MIN_WIDTH)                                    \
759         {                                                                                       \
760             src_width = 0;                                                                      \
761                                                                                                 \
762             while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x)                   \
763                 src_width += src_image->bits.width;                                             \
764                                                                                                 \
765             need_src_extension = TRUE;                                                          \
766         }                                                                                       \
767         else                                                                                    \
768         {                                                                                       \
769             src_width = src_image->bits.width;                                                  \
770             need_src_extension = FALSE;                                                         \
771         }                                                                                       \
772                                                                                                 \
773         src_width_fixed = pixman_int_to_fixed (src_width);                                      \
774     }                                                                                           \
775                                                                                                 \
776     while (--height >= 0)                                                                       \
777     {                                                                                           \
778         int weight1, weight2;                                                                   \
779         dst = dst_line;                                                                         \
780         dst_line += dst_stride;                                                                 \
781         vx = v.vector[0];                                                                       \
782         if (flags & FLAG_HAVE_NON_SOLID_MASK)                                                   \
783         {                                                                                       \
784             mask = mask_line;                                                                   \
785             mask_line += mask_stride;                                                           \
786         }                                                                                       \
787                                                                                                 \
788         y1 = pixman_fixed_to_int (vy);                                                          \
789         weight2 = (vy >> 8) & 0xff;                                                             \
790         if (weight2)                                                                            \
791         {                                                                                       \
792             /* normal case, both row weights are in 0-255 range and fit unsigned byte */        \
793             y2 = y1 + 1;                                                                        \
794             weight1 = 256 - weight2;                                                            \
795         }                                                                                       \
796         else                                                                                    \
797         {                                                                                       \
798             /* set both top and bottom row to the same scanline, and weights to 128+128 */      \
799             y2 = y1;                                                                            \
800             weight1 = weight2 = 128;                                                            \
801         }                                                                                       \
802         vy += unit_y;                                                                           \
803         if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)                                 \
804         {                                                                                       \
805             src_type_t *src1, *src2;                                                            \
806             src_type_t buf1[2];                                                                 \
807             src_type_t buf2[2];                                                                 \
808             repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height);                            \
809             repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height);                            \
810             src1 = src_first_line + src_stride * y1;                                            \
811             src2 = src_first_line + src_stride * y2;                                            \
812                                                                                                 \
813             if (left_pad > 0)                                                                   \
814             {                                                                                   \
815                 buf1[0] = buf1[1] = src1[0];                                                    \
816                 buf2[0] = buf2[1] = src2[0];                                                    \
817                 scanline_func (dst, mask,                                                       \
818                                buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE);         \
819                 dst += left_pad;                                                                \
820                 if (flags & FLAG_HAVE_NON_SOLID_MASK)                                           \
821                     mask += left_pad;                                                           \
822             }                                                                                   \
823             if (width > 0)                                                                      \
824             {                                                                                   \
825                 scanline_func (dst, mask,                                                       \
826                                src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE);      \
827                 dst += width;                                                                   \
828                 if (flags & FLAG_HAVE_NON_SOLID_MASK)                                           \
829                     mask += width;                                                              \
830             }                                                                                   \
831             if (right_pad > 0)                                                                  \
832             {                                                                                   \
833                 buf1[0] = buf1[1] = src1[src_image->bits.width - 1];                            \
834                 buf2[0] = buf2[1] = src2[src_image->bits.width - 1];                            \
835                 scanline_func (dst, mask,                                                       \
836                                buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE);        \
837             }                                                                                   \
838         }                                                                                       \
839         else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)                           \
840         {                                                                                       \
841             src_type_t *src1, *src2;                                                            \
842             src_type_t buf1[2];                                                                 \
843             src_type_t buf2[2];                                                                 \
844             /* handle top/bottom zero padding by just setting weights to 0 if needed */         \
845             if (y1 < 0)                                                                         \
846             {                                                                                   \
847                 weight1 = 0;                                                                    \
848                 y1 = 0;                                                                         \
849             }                                                                                   \
850             if (y1 >= src_image->bits.height)                                                   \
851             {                                                                                   \
852                 weight1 = 0;                                                                    \
853                 y1 = src_image->bits.height - 1;                                                \
854             }                                                                                   \
855             if (y2 < 0)                                                                         \
856             {                                                                                   \
857                 weight2 = 0;                                                                    \
858                 y2 = 0;                                                                         \
859             }                                                                                   \
860             if (y2 >= src_image->bits.height)                                                   \
861             {                                                                                   \
862                 weight2 = 0;                                                                    \
863                 y2 = src_image->bits.height - 1;                                                \
864             }                                                                                   \
865             src1 = src_first_line + src_stride * y1;                                            \
866             src2 = src_first_line + src_stride * y2;                                            \
867                                                                                                 \
868             if (left_pad > 0)                                                                   \
869             {                                                                                   \
870                 buf1[0] = buf1[1] = 0;                                                          \
871                 buf2[0] = buf2[1] = 0;                                                          \
872                 scanline_func (dst, mask,                                                       \
873                                buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE);          \
874                 dst += left_pad;                                                                \
875                 if (flags & FLAG_HAVE_NON_SOLID_MASK)                                           \
876                     mask += left_pad;                                                           \
877             }                                                                                   \
878             if (left_tz > 0)                                                                    \
879             {                                                                                   \
880                 buf1[0] = 0;                                                                    \
881                 buf1[1] = src1[0];                                                              \
882                 buf2[0] = 0;                                                                    \
883                 buf2[1] = src2[0];                                                              \
884                 scanline_func (dst, mask,                                                       \
885                                buf1, buf2, left_tz, weight1, weight2,                           \
886                                pixman_fixed_frac (vx), unit_x, 0, FALSE);                       \
887                 dst += left_tz;                                                                 \
888                 if (flags & FLAG_HAVE_NON_SOLID_MASK)                                           \
889                     mask += left_tz;                                                            \
890                 vx += left_tz * unit_x;                                                         \
891             }                                                                                   \
892             if (width > 0)                                                                      \
893             {                                                                                   \
894                 scanline_func (dst, mask,                                                       \
895                                src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE);      \
896                 dst += width;                                                                   \
897                 if (flags & FLAG_HAVE_NON_SOLID_MASK)                                           \
898                     mask += width;                                                              \
899                 vx += width * unit_x;                                                           \
900             }                                                                                   \
901             if (right_tz > 0)                                                                   \
902             {                                                                                   \
903                 buf1[0] = src1[src_image->bits.width - 1];                                      \
904                 buf1[1] = 0;                                                                    \
905                 buf2[0] = src2[src_image->bits.width - 1];                                      \
906                 buf2[1] = 0;                                                                    \
907                 scanline_func (dst, mask,                                                       \
908                                buf1, buf2, right_tz, weight1, weight2,                          \
909                                pixman_fixed_frac (vx), unit_x, 0, FALSE);                       \
910                 dst += right_tz;                                                                \
911                 if (flags & FLAG_HAVE_NON_SOLID_MASK)                                           \
912                     mask += right_tz;                                                           \
913             }                                                                                   \
914             if (right_pad > 0)                                                                  \
915             {                                                                                   \
916                 buf1[0] = buf1[1] = 0;                                                          \
917                 buf2[0] = buf2[1] = 0;                                                          \
918                 scanline_func (dst, mask,                                                       \
919                                buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE);         \
920             }                                                                                   \
921         }                                                                                       \
922         else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                         \
923         {                                                                                       \
924             int32_t         num_pixels;                                                         \
925             int32_t         width_remain;                                                       \
926             src_type_t *    src_line_top;                                                       \
927             src_type_t *    src_line_bottom;                                                    \
928             src_type_t      buf1[2];                                                            \
929             src_type_t      buf2[2];                                                            \
930             src_type_t      extended_src_line0[REPEAT_NORMAL_MIN_WIDTH*2];                      \
931             src_type_t      extended_src_line1[REPEAT_NORMAL_MIN_WIDTH*2];                      \
932             int             i, j;                                                               \
933                                                                                                 \
934             repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height);                         \
935             repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height);                         \
936             src_line_top = src_first_line + src_stride * y1;                                    \
937             src_line_bottom = src_first_line + src_stride * y2;                                 \
938                                                                                                 \
939             if (need_src_extension)                                                             \
940             {                                                                                   \
941                 for (i=0; i<src_width;)                                                         \
942                 {                                                                               \
943                     for (j=0; j<src_image->bits.width; j++, i++)                                \
944                     {                                                                           \
945                         extended_src_line0[i] = src_line_top[j];                                \
946                         extended_src_line1[i] = src_line_bottom[j];                             \
947                     }                                                                           \
948                 }                                                                               \
949                                                                                                 \
950                 src_line_top = &extended_src_line0[0];                                          \
951                 src_line_bottom = &extended_src_line1[0];                                       \
952             }                                                                                   \
953                                                                                                 \
954             /* Top & Bottom wrap around buffer */                                               \
955             buf1[0] = src_line_top[src_width - 1];                                              \
956             buf1[1] = src_line_top[0];                                                          \
957             buf2[0] = src_line_bottom[src_width - 1];                                           \
958             buf2[1] = src_line_bottom[0];                                                       \
959                                                                                                 \
960             width_remain = width;                                                               \
961                                                                                                 \
962             while (width_remain > 0)                                                            \
963             {                                                                                   \
964                 /* We use src_width_fixed because it can make vx in original source range */    \
965                 repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);                            \
966                                                                                                 \
967                 /* Wrap around part */                                                          \
968                 if (pixman_fixed_to_int (vx) == src_width - 1)                                  \
969                 {                                                                               \
970                     /* for positive unit_x                                                      \
971                      * num_pixels = max(n) + 1, where vx + n*unit_x < src_width_fixed           \
972                      *                                                                          \
973                      * vx is in range [0, src_width_fixed - pixman_fixed_e]                     \
974                      * So we are safe from overflow.                                            \
975                      */                                                                         \
976                     num_pixels = ((src_width_fixed - vx - pixman_fixed_e) / unit_x) + 1;        \
977                                                                                                 \
978                     if (num_pixels > width_remain)                                              \
979                         num_pixels = width_remain;                                              \
980                                                                                                 \
981                     scanline_func (dst, mask, buf1, buf2, num_pixels,                           \
982                                    weight1, weight2, pixman_fixed_frac(vx),                     \
983                                    unit_x, src_width_fixed, FALSE);                             \
984                                                                                                 \
985                     width_remain -= num_pixels;                                                 \
986                     vx += num_pixels * unit_x;                                                  \
987                     dst += num_pixels;                                                          \
988                                                                                                 \
989                     if (flags & FLAG_HAVE_NON_SOLID_MASK)                                       \
990                         mask += num_pixels;                                                     \
991                                                                                                 \
992                     repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);                        \
993                 }                                                                               \
994                                                                                                 \
995                 /* Normal scanline composite */                                                 \
996                 if (pixman_fixed_to_int (vx) != src_width - 1 && width_remain > 0)              \
997                 {                                                                               \
998                     /* for positive unit_x                                                      \
999                      * num_pixels = max(n) + 1, where vx + n*unit_x < (src_width_fixed - 1)     \
1000                      *                                                                          \
1001                      * vx is in range [0, src_width_fixed - pixman_fixed_e]                     \
1002                      * So we are safe from overflow here.                                       \
1003                      */                                                                         \
1004                     num_pixels = ((src_width_fixed - pixman_fixed_1 - vx - pixman_fixed_e)      \
1005                                   / unit_x) + 1;                                                \
1006                                                                                                 \
1007                     if (num_pixels > width_remain)                                              \
1008                         num_pixels = width_remain;                                              \
1009                                                                                                 \
1010                     scanline_func (dst, mask, src_line_top, src_line_bottom, num_pixels,        \
1011                                    weight1, weight2, vx, unit_x, src_width_fixed, FALSE);       \
1012                                                                                                 \
1013                     width_remain -= num_pixels;                                                 \
1014                     vx += num_pixels * unit_x;                                                  \
1015                     dst += num_pixels;                                                          \
1016                                                                                                 \
1017                     if (flags & FLAG_HAVE_NON_SOLID_MASK)                                       \
1018                         mask += num_pixels;                                                     \
1019                 }                                                                               \
1020             }                                                                                   \
1021         }                                                                                       \
1022         else                                                                                    \
1023         {                                                                                       \
1024             scanline_func (dst, mask, src_first_line + src_stride * y1,                         \
1025                            src_first_line + src_stride * y2, width,                             \
1026                            weight1, weight2, vx, unit_x, max_vx, FALSE);                        \
1027         }                                                                                       \
1028     }                                                                                           \
1029 }
1030
1031 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
1032 #define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t,  \
1033                                   dst_type_t, repeat_mode, flags)                               \
1034         FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\
1035                                   dst_type_t, repeat_mode, flags)
1036
1037 #define SCALED_BILINEAR_FLAGS                                           \
1038     (FAST_PATH_SCALE_TRANSFORM  |                                       \
1039      FAST_PATH_NO_ALPHA_MAP     |                                       \
1040      FAST_PATH_BILINEAR_FILTER  |                                       \
1041      FAST_PATH_NO_ACCESSORS     |                                       \
1042      FAST_PATH_NARROW_FORMAT)
1043
1044 #define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func)                      \
1045     {   PIXMAN_OP_ ## op,                                               \
1046         PIXMAN_ ## s,                                                   \
1047         (SCALED_BILINEAR_FLAGS          |                               \
1048          FAST_PATH_PAD_REPEAT           |                               \
1049          FAST_PATH_X_UNIT_POSITIVE),                                    \
1050         PIXMAN_null, 0,                                                 \
1051         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1052         fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,     \
1053     }
1054
1055 #define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func)                     \
1056     {   PIXMAN_OP_ ## op,                                               \
1057         PIXMAN_ ## s,                                                   \
1058         (SCALED_BILINEAR_FLAGS          |                               \
1059          FAST_PATH_NONE_REPEAT          |                               \
1060          FAST_PATH_X_UNIT_POSITIVE),                                    \
1061         PIXMAN_null, 0,                                                 \
1062         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1063         fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,    \
1064     }
1065
1066 #define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func)                    \
1067     {   PIXMAN_OP_ ## op,                                               \
1068         PIXMAN_ ## s,                                                   \
1069         SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP,           \
1070         PIXMAN_null, 0,                                                 \
1071         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1072         fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,   \
1073     }
1074
1075 #define SIMPLE_BILINEAR_FAST_PATH_NORMAL(op,s,d,func)                   \
1076     {   PIXMAN_OP_ ## op,                                               \
1077         PIXMAN_ ## s,                                                   \
1078         (SCALED_BILINEAR_FLAGS          |                               \
1079          FAST_PATH_NORMAL_REPEAT        |                               \
1080          FAST_PATH_X_UNIT_POSITIVE),                                    \
1081         PIXMAN_null, 0,                                                 \
1082         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1083         fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,  \
1084     }
1085
1086 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func)              \
1087     {   PIXMAN_OP_ ## op,                                               \
1088         PIXMAN_ ## s,                                                   \
1089         (SCALED_BILINEAR_FLAGS          |                               \
1090          FAST_PATH_PAD_REPEAT           |                               \
1091          FAST_PATH_X_UNIT_POSITIVE),                                    \
1092         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
1093         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1094         fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,     \
1095     }
1096
1097 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func)             \
1098     {   PIXMAN_OP_ ## op,                                               \
1099         PIXMAN_ ## s,                                                   \
1100         (SCALED_BILINEAR_FLAGS          |                               \
1101          FAST_PATH_NONE_REPEAT          |                               \
1102          FAST_PATH_X_UNIT_POSITIVE),                                    \
1103         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
1104         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1105         fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,    \
1106     }
1107
1108 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func)            \
1109     {   PIXMAN_OP_ ## op,                                               \
1110         PIXMAN_ ## s,                                                   \
1111         SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP,           \
1112         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
1113         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1114         fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,   \
1115     }
1116
1117 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL(op,s,d,func)           \
1118     {   PIXMAN_OP_ ## op,                                               \
1119         PIXMAN_ ## s,                                                   \
1120         (SCALED_BILINEAR_FLAGS          |                               \
1121          FAST_PATH_NORMAL_REPEAT        |                               \
1122          FAST_PATH_X_UNIT_POSITIVE),                                    \
1123         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
1124         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1125         fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,  \
1126     }
1127
1128 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func)           \
1129     {   PIXMAN_OP_ ## op,                                               \
1130         PIXMAN_ ## s,                                                   \
1131         (SCALED_BILINEAR_FLAGS          |                               \
1132          FAST_PATH_PAD_REPEAT           |                               \
1133          FAST_PATH_X_UNIT_POSITIVE),                                    \
1134         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
1135         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1136         fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,     \
1137     }
1138
1139 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func)          \
1140     {   PIXMAN_OP_ ## op,                                               \
1141         PIXMAN_ ## s,                                                   \
1142         (SCALED_BILINEAR_FLAGS          |                               \
1143          FAST_PATH_NONE_REPEAT          |                               \
1144          FAST_PATH_X_UNIT_POSITIVE),                                    \
1145         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
1146         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1147         fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,    \
1148     }
1149
1150 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func)         \
1151     {   PIXMAN_OP_ ## op,                                               \
1152         PIXMAN_ ## s,                                                   \
1153         SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP,           \
1154         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
1155         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1156         fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,   \
1157     }
1158
1159 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func)        \
1160     {   PIXMAN_OP_ ## op,                                               \
1161         PIXMAN_ ## s,                                                   \
1162         (SCALED_BILINEAR_FLAGS          |                               \
1163          FAST_PATH_NORMAL_REPEAT        |                               \
1164          FAST_PATH_X_UNIT_POSITIVE),                                    \
1165         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
1166         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1167         fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,  \
1168     }
1169
1170 /* Prefer the use of 'cover' variant, because it is faster */
1171 #define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func)                          \
1172     SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func),                      \
1173     SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func),                       \
1174     SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func),                        \
1175     SIMPLE_BILINEAR_FAST_PATH_NORMAL (op,s,d,func)
1176
1177 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func)                  \
1178     SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func),              \
1179     SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func),               \
1180     SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func),                \
1181     SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL (op,s,d,func)
1182
1183 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func)               \
1184     SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func),           \
1185     SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func),            \
1186     SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func),             \
1187     SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func)
1188
1189 #endif