Packaging: version up to 0.38.4
[platform/upstream/pixman.git] / pixman / pixman-inlines.h
1 /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
2 /*
3  * Copyright © 2000 SuSE, Inc.
4  * Copyright © 2007 Red Hat, Inc.
5  *
6  * Permission to use, copy, modify, distribute, and sell this software and its
7  * documentation for any purpose is hereby granted without fee, provided that
8  * the above copyright notice appear in all copies and that both that
9  * copyright notice and this permission notice appear in supporting
10  * documentation, and that the name of SuSE not be used in advertising or
11  * publicity pertaining to distribution of the software without specific,
12  * written prior permission.  SuSE makes no representations about the
13  * suitability of this software for any purpose.  It is provided "as is"
14  * without express or implied warranty.
15  *
16  * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
18  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22  *
23  * Author:  Keith Packard, SuSE, Inc.
24  */
25
26 #ifndef PIXMAN_FAST_PATH_H__
27 #define PIXMAN_FAST_PATH_H__
28
29 #include "pixman-private.h"
30
31 #define PIXMAN_REPEAT_COVER -1
32
33 /* Flags describing input parameters to fast path macro template.
34  * Turning on some flag values may indicate that
35  * "some property X is available so template can use this" or
36  * "some property X should be handled by template".
37  *
38  * FLAG_HAVE_SOLID_MASK
39  *  Input mask is solid so template should handle this.
40  *
41  * FLAG_HAVE_NON_SOLID_MASK
42  *  Input mask is bits mask so template should handle this.
43  *
44  * FLAG_HAVE_SOLID_MASK and FLAG_HAVE_NON_SOLID_MASK are mutually
45  * exclusive. (It's not allowed to turn both flags on)
46  */
47 #define FLAG_NONE                               (0)
48 #define FLAG_HAVE_SOLID_MASK                    (1 <<   1)
49 #define FLAG_HAVE_NON_SOLID_MASK                (1 <<   2)
50
51 /* To avoid too short repeated scanline function calls, extend source
52  * scanlines having width less than below constant value.
53  */
54 #define REPEAT_NORMAL_MIN_WIDTH                 64
55
56 static force_inline pixman_bool_t
57 repeat (pixman_repeat_t repeat, int *c, int size)
58 {
59     if (repeat == PIXMAN_REPEAT_NONE)
60     {
61         if (*c < 0 || *c >= size)
62             return FALSE;
63     }
64     else if (repeat == PIXMAN_REPEAT_NORMAL)
65     {
66         while (*c >= size)
67             *c -= size;
68         while (*c < 0)
69             *c += size;
70     }
71     else if (repeat == PIXMAN_REPEAT_PAD)
72     {
73         *c = CLIP (*c, 0, size - 1);
74     }
75     else /* REFLECT */
76     {
77         *c = MOD (*c, size * 2);
78         if (*c >= size)
79             *c = size * 2 - *c - 1;
80     }
81     return TRUE;
82 }
83
84 static force_inline int
85 pixman_fixed_to_bilinear_weight (pixman_fixed_t x)
86 {
87     return (x >> (16 - BILINEAR_INTERPOLATION_BITS)) &
88            ((1 << BILINEAR_INTERPOLATION_BITS) - 1);
89 }
90
91 #if BILINEAR_INTERPOLATION_BITS <= 4
92 /* Inspired by Filter_32_opaque from Skia */
93 static force_inline uint32_t
94 bilinear_interpolation (uint32_t tl, uint32_t tr,
95                         uint32_t bl, uint32_t br,
96                         int distx, int disty)
97 {
98     int distxy, distxiy, distixy, distixiy;
99     uint32_t lo, hi;
100
101     distx <<= (4 - BILINEAR_INTERPOLATION_BITS);
102     disty <<= (4 - BILINEAR_INTERPOLATION_BITS);
103
104     distxy = distx * disty;
105     distxiy = (distx << 4) - distxy;    /* distx * (16 - disty) */
106     distixy = (disty << 4) - distxy;    /* disty * (16 - distx) */
107     distixiy =
108         16 * 16 - (disty << 4) -
109         (distx << 4) + distxy; /* (16 - distx) * (16 - disty) */
110
111     lo = (tl & 0xff00ff) * distixiy;
112     hi = ((tl >> 8) & 0xff00ff) * distixiy;
113
114     lo += (tr & 0xff00ff) * distxiy;
115     hi += ((tr >> 8) & 0xff00ff) * distxiy;
116
117     lo += (bl & 0xff00ff) * distixy;
118     hi += ((bl >> 8) & 0xff00ff) * distixy;
119
120     lo += (br & 0xff00ff) * distxy;
121     hi += ((br >> 8) & 0xff00ff) * distxy;
122
123     return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff);
124 }
125
126 #else
127 #if SIZEOF_LONG > 4
128
129 static force_inline uint32_t
130 bilinear_interpolation (uint32_t tl, uint32_t tr,
131                         uint32_t bl, uint32_t br,
132                         int distx, int disty)
133 {
134     uint64_t distxy, distxiy, distixy, distixiy;
135     uint64_t tl64, tr64, bl64, br64;
136     uint64_t f, r;
137
138     distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
139     disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
140
141     distxy = distx * disty;
142     distxiy = distx * (256 - disty);
143     distixy = (256 - distx) * disty;
144     distixiy = (256 - distx) * (256 - disty);
145
146     /* Alpha and Blue */
147     tl64 = tl & 0xff0000ff;
148     tr64 = tr & 0xff0000ff;
149     bl64 = bl & 0xff0000ff;
150     br64 = br & 0xff0000ff;
151
152     f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
153     r = f & 0x0000ff0000ff0000ull;
154
155     /* Red and Green */
156     tl64 = tl;
157     tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull);
158
159     tr64 = tr;
160     tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull);
161
162     bl64 = bl;
163     bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull);
164
165     br64 = br;
166     br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull);
167
168     f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
169     r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull);
170
171     return (uint32_t)(r >> 16);
172 }
173
174 #else
175
176 static force_inline uint32_t
177 bilinear_interpolation (uint32_t tl, uint32_t tr,
178                         uint32_t bl, uint32_t br,
179                         int distx, int disty)
180 {
181     int distxy, distxiy, distixy, distixiy;
182     uint32_t f, r;
183
184     distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
185     disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
186
187     distxy = distx * disty;
188     distxiy = (distx << 8) - distxy;    /* distx * (256 - disty) */
189     distixy = (disty << 8) - distxy;    /* disty * (256 - distx) */
190     distixiy =
191         256 * 256 - (disty << 8) -
192         (distx << 8) + distxy;          /* (256 - distx) * (256 - disty) */
193
194     /* Blue */
195     r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
196       + (bl & 0x000000ff) * distixy  + (br & 0x000000ff) * distxy;
197
198     /* Green */
199     f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
200       + (bl & 0x0000ff00) * distixy  + (br & 0x0000ff00) * distxy;
201     r |= f & 0xff000000;
202
203     tl >>= 16;
204     tr >>= 16;
205     bl >>= 16;
206     br >>= 16;
207     r >>= 16;
208
209     /* Red */
210     f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
211       + (bl & 0x000000ff) * distixy  + (br & 0x000000ff) * distxy;
212     r |= f & 0x00ff0000;
213
214     /* Alpha */
215     f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
216       + (bl & 0x0000ff00) * distixy  + (br & 0x0000ff00) * distxy;
217     r |= f & 0xff000000;
218
219     return r;
220 }
221
222 #endif
223 #endif // BILINEAR_INTERPOLATION_BITS <= 4
224
225 static force_inline argb_t
226 bilinear_interpolation_float (argb_t tl, argb_t tr,
227                               argb_t bl, argb_t br,
228                               float distx, float disty)
229 {
230     float distxy, distxiy, distixy, distixiy;
231     argb_t r;
232
233     distxy = distx * disty;
234     distxiy = distx - (1.f - distxy);
235     distixy = (1.f - distx) * disty;
236     distixiy = (1.f - distx) * (1.f - disty);
237
238     r.a = tl.a * distixiy + tr.a * distxiy +
239           bl.a * distixy  + br.a * distxy;
240     r.r = tl.r * distixiy + tr.r * distxiy +
241           bl.r * distixy  + br.r * distxy;
242     r.g = tl.g * distixiy + tr.g * distxiy +
243           bl.g * distixy  + br.g * distxy;
244     r.b = tl.b * distixiy + tr.b * distxiy +
245           bl.b * distixy  + br.b * distxy;
246
247     return r;
248 }
249
250 /*
251  * For each scanline fetched from source image with PAD repeat:
252  * - calculate how many pixels need to be padded on the left side
253  * - calculate how many pixels need to be padded on the right side
254  * - update width to only count pixels which are fetched from the image
255  * All this information is returned via 'width', 'left_pad', 'right_pad'
256  * arguments. The code is assuming that 'unit_x' is positive.
257  *
258  * Note: 64-bit math is used in order to avoid potential overflows, which
259  *       is probably excessive in many cases. This particular function
260  *       may need its own correctness test and performance tuning.
261  */
262 static force_inline void
263 pad_repeat_get_scanline_bounds (int32_t         source_image_width,
264                                 pixman_fixed_t  vx,
265                                 pixman_fixed_t  unit_x,
266                                 int32_t *       width,
267                                 int32_t *       left_pad,
268                                 int32_t *       right_pad)
269 {
270     int64_t max_vx = (int64_t) source_image_width << 16;
271     int64_t tmp;
272     if (vx < 0)
273     {
274         tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
275         if (tmp > *width)
276         {
277             *left_pad = *width;
278             *width = 0;
279         }
280         else
281         {
282             *left_pad = (int32_t) tmp;
283             *width -= (int32_t) tmp;
284         }
285     }
286     else
287     {
288         *left_pad = 0;
289     }
290     tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
291     if (tmp < 0)
292     {
293         *right_pad = *width;
294         *width = 0;
295     }
296     else if (tmp >= *width)
297     {
298         *right_pad = 0;
299     }
300     else
301     {
302         *right_pad = *width - (int32_t) tmp;
303         *width = (int32_t) tmp;
304     }
305 }
306
307 /* A macroified version of specialized nearest scalers for some
308  * common 8888 and 565 formats. It supports SRC and OVER ops.
309  *
310  * There are two repeat versions, one that handles repeat normal,
311  * and one without repeat handling that only works if the src region
312  * used is completely covered by the pre-repeated source samples.
313  *
314  * The loops are unrolled to process two pixels per iteration for better
315  * performance on most CPU architectures (superscalar processors
316  * can issue several operations simultaneously, other processors can hide
317  * instructions latencies by pipelining operations). Unrolling more
318  * does not make much sense because the compiler will start running out
319  * of spare registers soon.
320  */
321
322 #define GET_8888_ALPHA(s) ((s) >> 24)
323  /* This is not actually used since we don't have an OVER with
324     565 source, but it is needed to build. */
325 #define GET_0565_ALPHA(s) 0xff
326 #define GET_x888_ALPHA(s) 0xff
327
328 #define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT,                       \
329                               src_type_t, dst_type_t, OP, repeat_mode)                          \
330 static force_inline void                                                                        \
331 scanline_func_name (dst_type_t       *dst,                                                      \
332                     const src_type_t *src,                                                      \
333                     int32_t           w,                                                        \
334                     pixman_fixed_t    vx,                                                       \
335                     pixman_fixed_t    unit_x,                                                   \
336                     pixman_fixed_t    src_width_fixed,                                          \
337                     pixman_bool_t     fully_transparent_src)                                    \
338 {                                                                                               \
339         uint32_t   d;                                                                           \
340         src_type_t s1, s2;                                                                      \
341         uint8_t    a1, a2;                                                                      \
342         int        x1, x2;                                                                      \
343                                                                                                 \
344         if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src)                        \
345             return;                                                                             \
346                                                                                                 \
347         if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER)            \
348             abort();                                                                            \
349                                                                                                 \
350         while ((w -= 2) >= 0)                                                                   \
351         {                                                                                       \
352             x1 = pixman_fixed_to_int (vx);                                                      \
353             vx += unit_x;                                                                       \
354             if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                          \
355             {                                                                                   \
356                 /* This works because we know that unit_x is positive */                        \
357                 while (vx >= 0)                                                                 \
358                     vx -= src_width_fixed;                                                      \
359             }                                                                                   \
360             s1 = *(src + x1);                                                                   \
361                                                                                                 \
362             x2 = pixman_fixed_to_int (vx);                                                      \
363             vx += unit_x;                                                                       \
364             if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                          \
365             {                                                                                   \
366                 /* This works because we know that unit_x is positive */                        \
367                 while (vx >= 0)                                                                 \
368                     vx -= src_width_fixed;                                                      \
369             }                                                                                   \
370             s2 = *(src + x2);                                                                   \
371                                                                                                 \
372             if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)                                             \
373             {                                                                                   \
374                 a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);                                          \
375                 a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2);                                          \
376                                                                                                 \
377                 if (a1 == 0xff)                                                                 \
378                 {                                                                               \
379                     *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);                   \
380                 }                                                                               \
381                 else if (s1)                                                                    \
382                 {                                                                               \
383                     d = convert_ ## DST_FORMAT ## _to_8888 (*dst);                              \
384                     s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1);                               \
385                     a1 ^= 0xff;                                                                 \
386                     UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);                                        \
387                     *dst = convert_8888_to_ ## DST_FORMAT (d);                                  \
388                 }                                                                               \
389                 dst++;                                                                          \
390                                                                                                 \
391                 if (a2 == 0xff)                                                                 \
392                 {                                                                               \
393                     *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2);                   \
394                 }                                                                               \
395                 else if (s2)                                                                    \
396                 {                                                                               \
397                     d = convert_## DST_FORMAT ## _to_8888 (*dst);                               \
398                     s2 = convert_## SRC_FORMAT ## _to_8888 (s2);                                \
399                     a2 ^= 0xff;                                                                 \
400                     UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2);                                        \
401                     *dst = convert_8888_to_ ## DST_FORMAT (d);                                  \
402                 }                                                                               \
403                 dst++;                                                                          \
404             }                                                                                   \
405             else /* PIXMAN_OP_SRC */                                                            \
406             {                                                                                   \
407                 *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);                     \
408                 *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2);                     \
409             }                                                                                   \
410         }                                                                                       \
411                                                                                                 \
412         if (w & 1)                                                                              \
413         {                                                                                       \
414             x1 = pixman_fixed_to_int (vx);                                                      \
415             s1 = *(src + x1);                                                                   \
416                                                                                                 \
417             if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)                                             \
418             {                                                                                   \
419                 a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);                                          \
420                                                                                                 \
421                 if (a1 == 0xff)                                                                 \
422                 {                                                                               \
423                     *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);                   \
424                 }                                                                               \
425                 else if (s1)                                                                    \
426                 {                                                                               \
427                     d = convert_## DST_FORMAT ## _to_8888 (*dst);                               \
428                     s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1);                               \
429                     a1 ^= 0xff;                                                                 \
430                     UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);                                        \
431                     *dst = convert_8888_to_ ## DST_FORMAT (d);                                  \
432                 }                                                                               \
433                 dst++;                                                                          \
434             }                                                                                   \
435             else /* PIXMAN_OP_SRC */                                                            \
436             {                                                                                   \
437                 *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);                     \
438             }                                                                                   \
439         }                                                                                       \
440 }
441
442 #define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t,      \
443                                   dst_type_t, repeat_mode, have_mask, mask_is_solid)            \
444 static void                                                                                     \
445 fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,                \
446                                                    pixman_composite_info_t *info)               \
447 {                                                                                               \
448     PIXMAN_COMPOSITE_ARGS (info);                                                               \
449     dst_type_t *dst_line;                                                                       \
450     mask_type_t *mask_line;                                                                     \
451     src_type_t *src_first_line;                                                                 \
452     int       y;                                                                                \
453     pixman_fixed_t src_width_fixed = pixman_int_to_fixed (src_image->bits.width);               \
454     pixman_fixed_t max_vy;                                                                      \
455     pixman_vector_t v;                                                                          \
456     pixman_fixed_t vx, vy;                                                                      \
457     pixman_fixed_t unit_x, unit_y;                                                              \
458     int32_t left_pad, right_pad;                                                                \
459                                                                                                 \
460     src_type_t *src;                                                                            \
461     dst_type_t *dst;                                                                            \
462     mask_type_t solid_mask;                                                                     \
463     const mask_type_t *mask = &solid_mask;                                                      \
464     int src_stride, mask_stride, dst_stride;                                                    \
465                                                                                                 \
466     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1);    \
467     if (have_mask)                                                                              \
468     {                                                                                           \
469         if (mask_is_solid)                                                                      \
470             solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);    \
471         else                                                                                    \
472             PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t,                     \
473                                    mask_stride, mask_line, 1);                                  \
474     }                                                                                           \
475     /* pass in 0 instead of src_x and src_y because src_x and src_y need to be                  \
476      * transformed from destination space to source space */                                    \
477     PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);         \
478                                                                                                 \
479     /* reference point is the center of the pixel */                                            \
480     v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;                             \
481     v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;                             \
482     v.vector[2] = pixman_fixed_1;                                                               \
483                                                                                                 \
484     if (!pixman_transform_point_3d (src_image->common.transform, &v))                           \
485         return;                                                                                 \
486                                                                                                 \
487     unit_x = src_image->common.transform->matrix[0][0];                                         \
488     unit_y = src_image->common.transform->matrix[1][1];                                         \
489                                                                                                 \
490     /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */                   \
491     v.vector[0] -= pixman_fixed_e;                                                              \
492     v.vector[1] -= pixman_fixed_e;                                                              \
493                                                                                                 \
494     vx = v.vector[0];                                                                           \
495     vy = v.vector[1];                                                                           \
496                                                                                                 \
497     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                                  \
498     {                                                                                           \
499         max_vy = pixman_int_to_fixed (src_image->bits.height);                                  \
500                                                                                                 \
501         /* Clamp repeating positions inside the actual samples */                               \
502         repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);                                    \
503         repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);                                             \
504     }                                                                                           \
505                                                                                                 \
506     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD ||                                   \
507         PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)                                    \
508     {                                                                                           \
509         pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x,                      \
510                                         &width, &left_pad, &right_pad);                         \
511         vx += left_pad * unit_x;                                                                \
512     }                                                                                           \
513                                                                                                 \
514     while (--height >= 0)                                                                       \
515     {                                                                                           \
516         dst = dst_line;                                                                         \
517         dst_line += dst_stride;                                                                 \
518         if (have_mask && !mask_is_solid)                                                        \
519         {                                                                                       \
520             mask = mask_line;                                                                   \
521             mask_line += mask_stride;                                                           \
522         }                                                                                       \
523                                                                                                 \
524         y = pixman_fixed_to_int (vy);                                                           \
525         vy += unit_y;                                                                           \
526         if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                              \
527             repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);                                         \
528         if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)                                 \
529         {                                                                                       \
530             repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height);                             \
531             src = src_first_line + src_stride * y;                                              \
532             if (left_pad > 0)                                                                   \
533             {                                                                                   \
534                 scanline_func (mask, dst,                                                       \
535                                src + src_image->bits.width - src_image->bits.width + 1,         \
536                                left_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE);           \
537             }                                                                                   \
538             if (width > 0)                                                                      \
539             {                                                                                   \
540                 scanline_func (mask + (mask_is_solid ? 0 : left_pad),                           \
541                                dst + left_pad, src + src_image->bits.width, width,              \
542                                vx - src_width_fixed, unit_x, src_width_fixed, FALSE);           \
543             }                                                                                   \
544             if (right_pad > 0)                                                                  \
545             {                                                                                   \
546                 scanline_func (mask + (mask_is_solid ? 0 : left_pad + width),                   \
547                                dst + left_pad + width, src + src_image->bits.width,             \
548                                right_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE);          \
549             }                                                                                   \
550         }                                                                                       \
551         else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)                           \
552         {                                                                                       \
553             static const src_type_t zero[1] = { 0 };                                            \
554             if (y < 0 || y >= src_image->bits.height)                                           \
555             {                                                                                   \
556                 scanline_func (mask, dst, zero + 1, left_pad + width + right_pad,               \
557                                -pixman_fixed_e, 0, src_width_fixed, TRUE);                      \
558                 continue;                                                                       \
559             }                                                                                   \
560             src = src_first_line + src_stride * y;                                              \
561             if (left_pad > 0)                                                                   \
562             {                                                                                   \
563                 scanline_func (mask, dst, zero + 1, left_pad,                                   \
564                                -pixman_fixed_e, 0, src_width_fixed, TRUE);                      \
565             }                                                                                   \
566             if (width > 0)                                                                      \
567             {                                                                                   \
568                 scanline_func (mask + (mask_is_solid ? 0 : left_pad),                           \
569                                dst + left_pad, src + src_image->bits.width, width,              \
570                                vx - src_width_fixed, unit_x, src_width_fixed, FALSE);           \
571             }                                                                                   \
572             if (right_pad > 0)                                                                  \
573             {                                                                                   \
574                 scanline_func (mask + (mask_is_solid ? 0 : left_pad + width),                   \
575                                dst + left_pad + width, zero + 1, right_pad,                     \
576                                -pixman_fixed_e, 0, src_width_fixed, TRUE);                      \
577             }                                                                                   \
578         }                                                                                       \
579         else                                                                                    \
580         {                                                                                       \
581             src = src_first_line + src_stride * y;                                              \
582             scanline_func (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed, \
583                            unit_x, src_width_fixed, FALSE);                                     \
584         }                                                                                       \
585     }                                                                                           \
586 }
587
588 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
589 #define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t,   \
590                                   dst_type_t, repeat_mode, have_mask, mask_is_solid)            \
591         FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t, \
592                                   dst_type_t, repeat_mode, have_mask, mask_is_solid)
593
594 #define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t,    \
595                               repeat_mode)                                                      \
596     static force_inline void                                                                    \
597     scanline_func##scale_func_name##_wrapper (                                                  \
598                     const uint8_t    *mask,                                                     \
599                     dst_type_t       *dst,                                                      \
600                     const src_type_t *src,                                                      \
601                     int32_t          w,                                                         \
602                     pixman_fixed_t   vx,                                                        \
603                     pixman_fixed_t   unit_x,                                                    \
604                     pixman_fixed_t   max_vx,                                                    \
605                     pixman_bool_t    fully_transparent_src)                                     \
606     {                                                                                           \
607         scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src);                 \
608     }                                                                                           \
609     FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper,       \
610                                src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE)
611
612 #define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t,           \
613                               repeat_mode)                                                      \
614         FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t,           \
615                               dst_type_t, repeat_mode)
616
617 #define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT,                           \
618                      src_type_t, dst_type_t, OP, repeat_mode)                           \
619     FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,       \
620                           SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t,               \
621                           OP, repeat_mode)                                              \
622     FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP,                       \
623                           scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,       \
624                           src_type_t, dst_type_t, repeat_mode)
625
626
627 #define SCALED_NEAREST_FLAGS                                            \
628     (FAST_PATH_SCALE_TRANSFORM  |                                       \
629      FAST_PATH_NO_ALPHA_MAP     |                                       \
630      FAST_PATH_NEAREST_FILTER   |                                       \
631      FAST_PATH_NO_ACCESSORS     |                                       \
632      FAST_PATH_NARROW_FORMAT)
633
634 #define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func)                    \
635     {   PIXMAN_OP_ ## op,                                               \
636         PIXMAN_ ## s,                                                   \
637         (SCALED_NEAREST_FLAGS           |                               \
638          FAST_PATH_NORMAL_REPEAT        |                               \
639          FAST_PATH_X_UNIT_POSITIVE),                                    \
640         PIXMAN_null, 0,                                                 \
641         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
642         fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,   \
643     }
644
645 #define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func)                       \
646     {   PIXMAN_OP_ ## op,                                               \
647         PIXMAN_ ## s,                                                   \
648         (SCALED_NEAREST_FLAGS           |                               \
649          FAST_PATH_PAD_REPEAT           |                               \
650          FAST_PATH_X_UNIT_POSITIVE),                                    \
651         PIXMAN_null, 0,                                                 \
652         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
653         fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,      \
654     }
655
656 #define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func)                      \
657     {   PIXMAN_OP_ ## op,                                               \
658         PIXMAN_ ## s,                                                   \
659         (SCALED_NEAREST_FLAGS           |                               \
660          FAST_PATH_NONE_REPEAT          |                               \
661          FAST_PATH_X_UNIT_POSITIVE),                                    \
662         PIXMAN_null, 0,                                                 \
663         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
664         fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,     \
665     }
666
667 #define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func)                     \
668     {   PIXMAN_OP_ ## op,                                               \
669         PIXMAN_ ## s,                                                   \
670         SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,    \
671         PIXMAN_null, 0,                                                 \
672         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
673         fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,    \
674     }
675
676 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func)            \
677     {   PIXMAN_OP_ ## op,                                               \
678         PIXMAN_ ## s,                                                   \
679         (SCALED_NEAREST_FLAGS           |                               \
680          FAST_PATH_NORMAL_REPEAT        |                               \
681          FAST_PATH_X_UNIT_POSITIVE),                                    \
682         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
683         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
684         fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,   \
685     }
686
687 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func)               \
688     {   PIXMAN_OP_ ## op,                                               \
689         PIXMAN_ ## s,                                                   \
690         (SCALED_NEAREST_FLAGS           |                               \
691          FAST_PATH_PAD_REPEAT           |                               \
692          FAST_PATH_X_UNIT_POSITIVE),                                    \
693         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
694         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
695         fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,      \
696     }
697
698 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func)              \
699     {   PIXMAN_OP_ ## op,                                               \
700         PIXMAN_ ## s,                                                   \
701         (SCALED_NEAREST_FLAGS           |                               \
702          FAST_PATH_NONE_REPEAT          |                               \
703          FAST_PATH_X_UNIT_POSITIVE),                                    \
704         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
705         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
706         fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,     \
707     }
708
709 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func)             \
710     {   PIXMAN_OP_ ## op,                                               \
711         PIXMAN_ ## s,                                                   \
712         SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,    \
713         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
714         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
715         fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,    \
716     }
717
718 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func)         \
719     {   PIXMAN_OP_ ## op,                                               \
720         PIXMAN_ ## s,                                                   \
721         (SCALED_NEAREST_FLAGS           |                               \
722          FAST_PATH_NORMAL_REPEAT        |                               \
723          FAST_PATH_X_UNIT_POSITIVE),                                    \
724         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
725         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
726         fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,   \
727     }
728
729 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func)            \
730     {   PIXMAN_OP_ ## op,                                               \
731         PIXMAN_ ## s,                                                   \
732         (SCALED_NEAREST_FLAGS           |                               \
733          FAST_PATH_PAD_REPEAT           |                               \
734          FAST_PATH_X_UNIT_POSITIVE),                                    \
735         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
736         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
737         fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,      \
738     }
739
740 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func)           \
741     {   PIXMAN_OP_ ## op,                                               \
742         PIXMAN_ ## s,                                                   \
743         (SCALED_NEAREST_FLAGS           |                               \
744          FAST_PATH_NONE_REPEAT          |                               \
745          FAST_PATH_X_UNIT_POSITIVE),                                    \
746         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
747         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
748         fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,     \
749     }
750
751 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func)          \
752     {   PIXMAN_OP_ ## op,                                               \
753         PIXMAN_ ## s,                                                   \
754         SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,    \
755         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
756         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
757         fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,    \
758     }
759
760 /* Prefer the use of 'cover' variant, because it is faster */
761 #define SIMPLE_NEAREST_FAST_PATH(op,s,d,func)                           \
762     SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func),                       \
763     SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func),                        \
764     SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func),                         \
765     SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
766
767 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func)                   \
768     SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func),               \
769     SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func),                \
770     SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
771
772 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func)                \
773     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func),            \
774     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func),             \
775     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func),              \
776     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func)
777
778 /*****************************************************************************/
779
780 /*
781  * Identify 5 zones in each scanline for bilinear scaling. Depending on
782  * whether 2 pixels to be interpolated are fetched from the image itself,
783  * from the padding area around it or from both image and padding area.
784  */
785 static force_inline void
786 bilinear_pad_repeat_get_scanline_bounds (int32_t         source_image_width,
787                                          pixman_fixed_t  vx,
788                                          pixman_fixed_t  unit_x,
789                                          int32_t *       left_pad,
790                                          int32_t *       left_tz,
791                                          int32_t *       width,
792                                          int32_t *       right_tz,
793                                          int32_t *       right_pad)
794 {
795         int width1 = *width, left_pad1, right_pad1;
796         int width2 = *width, left_pad2, right_pad2;
797
798         pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x,
799                                         &width1, &left_pad1, &right_pad1);
800         pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1,
801                                         unit_x, &width2, &left_pad2, &right_pad2);
802
803         *left_pad = left_pad2;
804         *left_tz = left_pad1 - left_pad2;
805         *right_tz = right_pad2 - right_pad1;
806         *right_pad = right_pad1;
807         *width -= *left_pad + *left_tz + *right_tz + *right_pad;
808 }
809
810 /*
811  * Main loop template for single pass bilinear scaling. It needs to be
812  * provided with 'scanline_func' which should do the compositing operation.
813  * The needed function has the following prototype:
814  *
815  *      scanline_func (dst_type_t *       dst,
816  *                     const mask_type_ * mask,
817  *                     const src_type_t * src_top,
818  *                     const src_type_t * src_bottom,
819  *                     int32_t            width,
820  *                     int                weight_top,
821  *                     int                weight_bottom,
822  *                     pixman_fixed_t     vx,
823  *                     pixman_fixed_t     unit_x,
824  *                     pixman_fixed_t     max_vx,
825  *                     pixman_bool_t      zero_src)
826  *
827  * Where:
828  *  dst                 - destination scanline buffer for storing results
829  *  mask                - mask buffer (or single value for solid mask)
830  *  src_top, src_bottom - two source scanlines
831  *  width               - number of pixels to process
832  *  weight_top          - weight of the top row for interpolation
833  *  weight_bottom       - weight of the bottom row for interpolation
834  *  vx                  - initial position for fetching the first pair of
835  *                        pixels from the source buffer
836  *  unit_x              - position increment needed to move to the next pair
837  *                        of pixels
838  *  max_vx              - image size as a fixed point value, can be used for
839  *                        implementing NORMAL repeat (when it is supported)
840  *  zero_src            - boolean hint variable, which is set to TRUE when
841  *                        all source pixels are fetched from zero padding
842  *                        zone for NONE repeat
843  *
844  * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to
845  *       BILINEAR_INTERPOLATION_RANGE, but sometimes it may be less than that
846  *       for NONE repeat when handling fuzzy antialiased top or bottom image
847  *       edges. Also both top and bottom weight variables are guaranteed to
848  *       have value, which is less than BILINEAR_INTERPOLATION_RANGE.
849  *       For example, the weights can fit into unsigned byte or be used
850  *       with 8-bit SIMD multiplication instructions for 8-bit interpolation
851  *       precision.
852  */
853 #define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t,     \
854                                   dst_type_t, repeat_mode, flags)                               \
855 static void                                                                                     \
856 fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,                \
857                                                    pixman_composite_info_t *info)               \
858 {                                                                                               \
859     PIXMAN_COMPOSITE_ARGS (info);                                                               \
860     dst_type_t *dst_line;                                                                       \
861     mask_type_t *mask_line;                                                                     \
862     src_type_t *src_first_line;                                                                 \
863     int       y1, y2;                                                                           \
864     pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */            \
865     pixman_vector_t v;                                                                          \
866     pixman_fixed_t vx, vy;                                                                      \
867     pixman_fixed_t unit_x, unit_y;                                                              \
868     int32_t left_pad, left_tz, right_tz, right_pad;                                             \
869                                                                                                 \
870     dst_type_t *dst;                                                                            \
871     mask_type_t solid_mask;                                                                     \
872     const mask_type_t *mask = &solid_mask;                                                      \
873     int src_stride, mask_stride, dst_stride;                                                    \
874                                                                                                 \
875     int src_width;                                                                              \
876     pixman_fixed_t src_width_fixed;                                                             \
877     int max_x;                                                                                  \
878     pixman_bool_t need_src_extension;                                                           \
879                                                                                                 \
880     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1);    \
881     if (flags & FLAG_HAVE_SOLID_MASK)                                                           \
882     {                                                                                           \
883         solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);        \
884         mask_stride = 0;                                                                        \
885     }                                                                                           \
886     else if (flags & FLAG_HAVE_NON_SOLID_MASK)                                                  \
887     {                                                                                           \
888         PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t,                         \
889                                mask_stride, mask_line, 1);                                      \
890     }                                                                                           \
891                                                                                                 \
892     /* pass in 0 instead of src_x and src_y because src_x and src_y need to be                  \
893      * transformed from destination space to source space */                                    \
894     PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);         \
895                                                                                                 \
896     /* reference point is the center of the pixel */                                            \
897     v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;                             \
898     v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;                             \
899     v.vector[2] = pixman_fixed_1;                                                               \
900                                                                                                 \
901     if (!pixman_transform_point_3d (src_image->common.transform, &v))                           \
902         return;                                                                                 \
903                                                                                                 \
904     unit_x = src_image->common.transform->matrix[0][0];                                         \
905     unit_y = src_image->common.transform->matrix[1][1];                                         \
906                                                                                                 \
907     v.vector[0] -= pixman_fixed_1 / 2;                                                          \
908     v.vector[1] -= pixman_fixed_1 / 2;                                                          \
909                                                                                                 \
910     vy = v.vector[1];                                                                           \
911                                                                                                 \
912     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD ||                                   \
913         PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)                                    \
914     {                                                                                           \
915         bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x,    \
916                                         &left_pad, &left_tz, &width, &right_tz, &right_pad);    \
917         if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)                                 \
918         {                                                                                       \
919             /* PAD repeat does not need special handling for 'transition zones' and */          \
920             /* they can be combined with 'padding zones' safely */                              \
921             left_pad += left_tz;                                                                \
922             right_pad += right_tz;                                                              \
923             left_tz = right_tz = 0;                                                             \
924         }                                                                                       \
925         v.vector[0] += left_pad * unit_x;                                                       \
926     }                                                                                           \
927                                                                                                 \
928     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                                  \
929     {                                                                                           \
930         vx = v.vector[0];                                                                       \
931         repeat (PIXMAN_REPEAT_NORMAL, &vx, pixman_int_to_fixed(src_image->bits.width));         \
932         max_x = pixman_fixed_to_int (vx + (width - 1) * (int64_t)unit_x) + 1;                   \
933                                                                                                 \
934         if (src_image->bits.width < REPEAT_NORMAL_MIN_WIDTH)                                    \
935         {                                                                                       \
936             src_width = 0;                                                                      \
937                                                                                                 \
938             while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x)                   \
939                 src_width += src_image->bits.width;                                             \
940                                                                                                 \
941             need_src_extension = TRUE;                                                          \
942         }                                                                                       \
943         else                                                                                    \
944         {                                                                                       \
945             src_width = src_image->bits.width;                                                  \
946             need_src_extension = FALSE;                                                         \
947         }                                                                                       \
948                                                                                                 \
949         src_width_fixed = pixman_int_to_fixed (src_width);                                      \
950     }                                                                                           \
951                                                                                                 \
952     while (--height >= 0)                                                                       \
953     {                                                                                           \
954         int weight1, weight2;                                                                   \
955         dst = dst_line;                                                                         \
956         dst_line += dst_stride;                                                                 \
957         vx = v.vector[0];                                                                       \
958         if (flags & FLAG_HAVE_NON_SOLID_MASK)                                                   \
959         {                                                                                       \
960             mask = mask_line;                                                                   \
961             mask_line += mask_stride;                                                           \
962         }                                                                                       \
963                                                                                                 \
964         y1 = pixman_fixed_to_int (vy);                                                          \
965         weight2 = pixman_fixed_to_bilinear_weight (vy);                                         \
966         if (weight2)                                                                            \
967         {                                                                                       \
968             /* both weight1 and weight2 are smaller than BILINEAR_INTERPOLATION_RANGE */        \
969             y2 = y1 + 1;                                                                        \
970             weight1 = BILINEAR_INTERPOLATION_RANGE - weight2;                                   \
971         }                                                                                       \
972         else                                                                                    \
973         {                                                                                       \
974             /* set both top and bottom row to the same scanline and tweak weights */            \
975             y2 = y1;                                                                            \
976             weight1 = weight2 = BILINEAR_INTERPOLATION_RANGE / 2;                               \
977         }                                                                                       \
978         vy += unit_y;                                                                           \
979         if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)                                 \
980         {                                                                                       \
981             src_type_t *src1, *src2;                                                            \
982             src_type_t buf1[2];                                                                 \
983             src_type_t buf2[2];                                                                 \
984             repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height);                            \
985             repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height);                            \
986             src1 = src_first_line + src_stride * y1;                                            \
987             src2 = src_first_line + src_stride * y2;                                            \
988                                                                                                 \
989             if (left_pad > 0)                                                                   \
990             {                                                                                   \
991                 buf1[0] = buf1[1] = src1[0];                                                    \
992                 buf2[0] = buf2[1] = src2[0];                                                    \
993                 scanline_func (dst, mask,                                                       \
994                                buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE);         \
995                 dst += left_pad;                                                                \
996                 if (flags & FLAG_HAVE_NON_SOLID_MASK)                                           \
997                     mask += left_pad;                                                           \
998             }                                                                                   \
999             if (width > 0)                                                                      \
1000             {                                                                                   \
1001                 scanline_func (dst, mask,                                                       \
1002                                src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE);      \
1003                 dst += width;                                                                   \
1004                 if (flags & FLAG_HAVE_NON_SOLID_MASK)                                           \
1005                     mask += width;                                                              \
1006             }                                                                                   \
1007             if (right_pad > 0)                                                                  \
1008             {                                                                                   \
1009                 buf1[0] = buf1[1] = src1[src_image->bits.width - 1];                            \
1010                 buf2[0] = buf2[1] = src2[src_image->bits.width - 1];                            \
1011                 scanline_func (dst, mask,                                                       \
1012                                buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE);        \
1013             }                                                                                   \
1014         }                                                                                       \
1015         else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)                           \
1016         {                                                                                       \
1017             src_type_t *src1, *src2;                                                            \
1018             src_type_t buf1[2];                                                                 \
1019             src_type_t buf2[2];                                                                 \
1020             /* handle top/bottom zero padding by just setting weights to 0 if needed */         \
1021             if (y1 < 0)                                                                         \
1022             {                                                                                   \
1023                 weight1 = 0;                                                                    \
1024                 y1 = 0;                                                                         \
1025             }                                                                                   \
1026             if (y1 >= src_image->bits.height)                                                   \
1027             {                                                                                   \
1028                 weight1 = 0;                                                                    \
1029                 y1 = src_image->bits.height - 1;                                                \
1030             }                                                                                   \
1031             if (y2 < 0)                                                                         \
1032             {                                                                                   \
1033                 weight2 = 0;                                                                    \
1034                 y2 = 0;                                                                         \
1035             }                                                                                   \
1036             if (y2 >= src_image->bits.height)                                                   \
1037             {                                                                                   \
1038                 weight2 = 0;                                                                    \
1039                 y2 = src_image->bits.height - 1;                                                \
1040             }                                                                                   \
1041             src1 = src_first_line + src_stride * y1;                                            \
1042             src2 = src_first_line + src_stride * y2;                                            \
1043                                                                                                 \
1044             if (left_pad > 0)                                                                   \
1045             {                                                                                   \
1046                 buf1[0] = buf1[1] = 0;                                                          \
1047                 buf2[0] = buf2[1] = 0;                                                          \
1048                 scanline_func (dst, mask,                                                       \
1049                                buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE);          \
1050                 dst += left_pad;                                                                \
1051                 if (flags & FLAG_HAVE_NON_SOLID_MASK)                                           \
1052                     mask += left_pad;                                                           \
1053             }                                                                                   \
1054             if (left_tz > 0)                                                                    \
1055             {                                                                                   \
1056                 buf1[0] = 0;                                                                    \
1057                 buf1[1] = src1[0];                                                              \
1058                 buf2[0] = 0;                                                                    \
1059                 buf2[1] = src2[0];                                                              \
1060                 scanline_func (dst, mask,                                                       \
1061                                buf1, buf2, left_tz, weight1, weight2,                           \
1062                                pixman_fixed_frac (vx), unit_x, 0, FALSE);                       \
1063                 dst += left_tz;                                                                 \
1064                 if (flags & FLAG_HAVE_NON_SOLID_MASK)                                           \
1065                     mask += left_tz;                                                            \
1066                 vx += left_tz * unit_x;                                                         \
1067             }                                                                                   \
1068             if (width > 0)                                                                      \
1069             {                                                                                   \
1070                 scanline_func (dst, mask,                                                       \
1071                                src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE);      \
1072                 dst += width;                                                                   \
1073                 if (flags & FLAG_HAVE_NON_SOLID_MASK)                                           \
1074                     mask += width;                                                              \
1075                 vx += width * unit_x;                                                           \
1076             }                                                                                   \
1077             if (right_tz > 0)                                                                   \
1078             {                                                                                   \
1079                 buf1[0] = src1[src_image->bits.width - 1];                                      \
1080                 buf1[1] = 0;                                                                    \
1081                 buf2[0] = src2[src_image->bits.width - 1];                                      \
1082                 buf2[1] = 0;                                                                    \
1083                 scanline_func (dst, mask,                                                       \
1084                                buf1, buf2, right_tz, weight1, weight2,                          \
1085                                pixman_fixed_frac (vx), unit_x, 0, FALSE);                       \
1086                 dst += right_tz;                                                                \
1087                 if (flags & FLAG_HAVE_NON_SOLID_MASK)                                           \
1088                     mask += right_tz;                                                           \
1089             }                                                                                   \
1090             if (right_pad > 0)                                                                  \
1091             {                                                                                   \
1092                 buf1[0] = buf1[1] = 0;                                                          \
1093                 buf2[0] = buf2[1] = 0;                                                          \
1094                 scanline_func (dst, mask,                                                       \
1095                                buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE);         \
1096             }                                                                                   \
1097         }                                                                                       \
1098         else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                         \
1099         {                                                                                       \
1100             int32_t         num_pixels;                                                         \
1101             int32_t         width_remain;                                                       \
1102             src_type_t *    src_line_top;                                                       \
1103             src_type_t *    src_line_bottom;                                                    \
1104             src_type_t      buf1[2];                                                            \
1105             src_type_t      buf2[2];                                                            \
1106             src_type_t      extended_src_line0[REPEAT_NORMAL_MIN_WIDTH*2];                      \
1107             src_type_t      extended_src_line1[REPEAT_NORMAL_MIN_WIDTH*2];                      \
1108             int             i, j;                                                               \
1109                                                                                                 \
1110             repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height);                         \
1111             repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height);                         \
1112             src_line_top = src_first_line + src_stride * y1;                                    \
1113             src_line_bottom = src_first_line + src_stride * y2;                                 \
1114                                                                                                 \
1115             if (need_src_extension)                                                             \
1116             {                                                                                   \
1117                 for (i=0; i<src_width;)                                                         \
1118                 {                                                                               \
1119                     for (j=0; j<src_image->bits.width; j++, i++)                                \
1120                     {                                                                           \
1121                         extended_src_line0[i] = src_line_top[j];                                \
1122                         extended_src_line1[i] = src_line_bottom[j];                             \
1123                     }                                                                           \
1124                 }                                                                               \
1125                                                                                                 \
1126                 src_line_top = &extended_src_line0[0];                                          \
1127                 src_line_bottom = &extended_src_line1[0];                                       \
1128             }                                                                                   \
1129                                                                                                 \
1130             /* Top & Bottom wrap around buffer */                                               \
1131             buf1[0] = src_line_top[src_width - 1];                                              \
1132             buf1[1] = src_line_top[0];                                                          \
1133             buf2[0] = src_line_bottom[src_width - 1];                                           \
1134             buf2[1] = src_line_bottom[0];                                                       \
1135                                                                                                 \
1136             width_remain = width;                                                               \
1137                                                                                                 \
1138             while (width_remain > 0)                                                            \
1139             {                                                                                   \
1140                 /* We use src_width_fixed because it can make vx in original source range */    \
1141                 repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);                            \
1142                                                                                                 \
1143                 /* Wrap around part */                                                          \
1144                 if (pixman_fixed_to_int (vx) == src_width - 1)                                  \
1145                 {                                                                               \
1146                     /* for positive unit_x                                                      \
1147                      * num_pixels = max(n) + 1, where vx + n*unit_x < src_width_fixed           \
1148                      *                                                                          \
1149                      * vx is in range [0, src_width_fixed - pixman_fixed_e]                     \
1150                      * So we are safe from overflow.                                            \
1151                      */                                                                         \
1152                     num_pixels = ((src_width_fixed - vx - pixman_fixed_e) / unit_x) + 1;        \
1153                                                                                                 \
1154                     if (num_pixels > width_remain)                                              \
1155                         num_pixels = width_remain;                                              \
1156                                                                                                 \
1157                     scanline_func (dst, mask, buf1, buf2, num_pixels,                           \
1158                                    weight1, weight2, pixman_fixed_frac(vx),                     \
1159                                    unit_x, src_width_fixed, FALSE);                             \
1160                                                                                                 \
1161                     width_remain -= num_pixels;                                                 \
1162                     vx += num_pixels * unit_x;                                                  \
1163                     dst += num_pixels;                                                          \
1164                                                                                                 \
1165                     if (flags & FLAG_HAVE_NON_SOLID_MASK)                                       \
1166                         mask += num_pixels;                                                     \
1167                                                                                                 \
1168                     repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);                        \
1169                 }                                                                               \
1170                                                                                                 \
1171                 /* Normal scanline composite */                                                 \
1172                 if (pixman_fixed_to_int (vx) != src_width - 1 && width_remain > 0)              \
1173                 {                                                                               \
1174                     /* for positive unit_x                                                      \
1175                      * num_pixels = max(n) + 1, where vx + n*unit_x < (src_width_fixed - 1)     \
1176                      *                                                                          \
1177                      * vx is in range [0, src_width_fixed - pixman_fixed_e]                     \
1178                      * So we are safe from overflow here.                                       \
1179                      */                                                                         \
1180                     num_pixels = ((src_width_fixed - pixman_fixed_1 - vx - pixman_fixed_e)      \
1181                                   / unit_x) + 1;                                                \
1182                                                                                                 \
1183                     if (num_pixels > width_remain)                                              \
1184                         num_pixels = width_remain;                                              \
1185                                                                                                 \
1186                     scanline_func (dst, mask, src_line_top, src_line_bottom, num_pixels,        \
1187                                    weight1, weight2, vx, unit_x, src_width_fixed, FALSE);       \
1188                                                                                                 \
1189                     width_remain -= num_pixels;                                                 \
1190                     vx += num_pixels * unit_x;                                                  \
1191                     dst += num_pixels;                                                          \
1192                                                                                                 \
1193                     if (flags & FLAG_HAVE_NON_SOLID_MASK)                                       \
1194                         mask += num_pixels;                                                     \
1195                 }                                                                               \
1196             }                                                                                   \
1197         }                                                                                       \
1198         else                                                                                    \
1199         {                                                                                       \
1200             scanline_func (dst, mask, src_first_line + src_stride * y1,                         \
1201                            src_first_line + src_stride * y2, width,                             \
1202                            weight1, weight2, vx, unit_x, max_vx, FALSE);                        \
1203         }                                                                                       \
1204     }                                                                                           \
1205 }
1206
1207 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
1208 #define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t,  \
1209                                   dst_type_t, repeat_mode, flags)                               \
1210         FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\
1211                                   dst_type_t, repeat_mode, flags)
1212
1213 #define SCALED_BILINEAR_FLAGS                                           \
1214     (FAST_PATH_SCALE_TRANSFORM  |                                       \
1215      FAST_PATH_NO_ALPHA_MAP     |                                       \
1216      FAST_PATH_BILINEAR_FILTER  |                                       \
1217      FAST_PATH_NO_ACCESSORS     |                                       \
1218      FAST_PATH_NARROW_FORMAT)
1219
1220 #define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func)                      \
1221     {   PIXMAN_OP_ ## op,                                               \
1222         PIXMAN_ ## s,                                                   \
1223         (SCALED_BILINEAR_FLAGS          |                               \
1224          FAST_PATH_PAD_REPEAT           |                               \
1225          FAST_PATH_X_UNIT_POSITIVE),                                    \
1226         PIXMAN_null, 0,                                                 \
1227         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1228         fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,     \
1229     }
1230
1231 #define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func)                     \
1232     {   PIXMAN_OP_ ## op,                                               \
1233         PIXMAN_ ## s,                                                   \
1234         (SCALED_BILINEAR_FLAGS          |                               \
1235          FAST_PATH_NONE_REPEAT          |                               \
1236          FAST_PATH_X_UNIT_POSITIVE),                                    \
1237         PIXMAN_null, 0,                                                 \
1238         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1239         fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,    \
1240     }
1241
1242 #define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func)                    \
1243     {   PIXMAN_OP_ ## op,                                               \
1244         PIXMAN_ ## s,                                                   \
1245         SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,  \
1246         PIXMAN_null, 0,                                                 \
1247         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1248         fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,   \
1249     }
1250
1251 #define SIMPLE_BILINEAR_FAST_PATH_NORMAL(op,s,d,func)                   \
1252     {   PIXMAN_OP_ ## op,                                               \
1253         PIXMAN_ ## s,                                                   \
1254         (SCALED_BILINEAR_FLAGS          |                               \
1255          FAST_PATH_NORMAL_REPEAT        |                               \
1256          FAST_PATH_X_UNIT_POSITIVE),                                    \
1257         PIXMAN_null, 0,                                                 \
1258         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1259         fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,  \
1260     }
1261
1262 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func)              \
1263     {   PIXMAN_OP_ ## op,                                               \
1264         PIXMAN_ ## s,                                                   \
1265         (SCALED_BILINEAR_FLAGS          |                               \
1266          FAST_PATH_PAD_REPEAT           |                               \
1267          FAST_PATH_X_UNIT_POSITIVE),                                    \
1268         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
1269         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1270         fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,     \
1271     }
1272
1273 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func)             \
1274     {   PIXMAN_OP_ ## op,                                               \
1275         PIXMAN_ ## s,                                                   \
1276         (SCALED_BILINEAR_FLAGS          |                               \
1277          FAST_PATH_NONE_REPEAT          |                               \
1278          FAST_PATH_X_UNIT_POSITIVE),                                    \
1279         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
1280         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1281         fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,    \
1282     }
1283
1284 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func)            \
1285     {   PIXMAN_OP_ ## op,                                               \
1286         PIXMAN_ ## s,                                                   \
1287         SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,  \
1288         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
1289         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1290         fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,   \
1291     }
1292
1293 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL(op,s,d,func)           \
1294     {   PIXMAN_OP_ ## op,                                               \
1295         PIXMAN_ ## s,                                                   \
1296         (SCALED_BILINEAR_FLAGS          |                               \
1297          FAST_PATH_NORMAL_REPEAT        |                               \
1298          FAST_PATH_X_UNIT_POSITIVE),                                    \
1299         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
1300         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1301         fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,  \
1302     }
1303
1304 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func)           \
1305     {   PIXMAN_OP_ ## op,                                               \
1306         PIXMAN_ ## s,                                                   \
1307         (SCALED_BILINEAR_FLAGS          |                               \
1308          FAST_PATH_PAD_REPEAT           |                               \
1309          FAST_PATH_X_UNIT_POSITIVE),                                    \
1310         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
1311         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1312         fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,     \
1313     }
1314
1315 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func)          \
1316     {   PIXMAN_OP_ ## op,                                               \
1317         PIXMAN_ ## s,                                                   \
1318         (SCALED_BILINEAR_FLAGS          |                               \
1319          FAST_PATH_NONE_REPEAT          |                               \
1320          FAST_PATH_X_UNIT_POSITIVE),                                    \
1321         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
1322         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1323         fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,    \
1324     }
1325
1326 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func)         \
1327     {   PIXMAN_OP_ ## op,                                               \
1328         PIXMAN_ ## s,                                                   \
1329         SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,  \
1330         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
1331         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1332         fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,   \
1333     }
1334
1335 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func)        \
1336     {   PIXMAN_OP_ ## op,                                               \
1337         PIXMAN_ ## s,                                                   \
1338         (SCALED_BILINEAR_FLAGS          |                               \
1339          FAST_PATH_NORMAL_REPEAT        |                               \
1340          FAST_PATH_X_UNIT_POSITIVE),                                    \
1341         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
1342         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1343         fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,  \
1344     }
1345
1346 /* Prefer the use of 'cover' variant, because it is faster */
1347 #define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func)                          \
1348     SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func),                      \
1349     SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func),                       \
1350     SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func),                        \
1351     SIMPLE_BILINEAR_FAST_PATH_NORMAL (op,s,d,func)
1352
1353 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func)                  \
1354     SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func),              \
1355     SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func),               \
1356     SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func),                \
1357     SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL (op,s,d,func)
1358
1359 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func)               \
1360     SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func),           \
1361     SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func),            \
1362     SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func),             \
1363     SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func)
1364
1365 #endif