Fix rounding for DIV_UNc()
[profile/ivi/pixman.git] / pixman / pixman-inlines.h
1 /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
2 /*
3  * Copyright © 2000 SuSE, Inc.
4  * Copyright © 2007 Red Hat, Inc.
5  *
6  * Permission to use, copy, modify, distribute, and sell this software and its
7  * documentation for any purpose is hereby granted without fee, provided that
8  * the above copyright notice appear in all copies and that both that
9  * copyright notice and this permission notice appear in supporting
10  * documentation, and that the name of SuSE not be used in advertising or
11  * publicity pertaining to distribution of the software without specific,
12  * written prior permission.  SuSE makes no representations about the
13  * suitability of this software for any purpose.  It is provided "as is"
14  * without express or implied warranty.
15  *
16  * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
18  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22  *
23  * Author:  Keith Packard, SuSE, Inc.
24  */
25
26 #ifndef PIXMAN_FAST_PATH_H__
27 #define PIXMAN_FAST_PATH_H__
28
29 #include "pixman-private.h"
30
31 #define PIXMAN_REPEAT_COVER -1
32
33 /* Flags describing input parameters to fast path macro template.
34  * Turning on some flag values may indicate that
35  * "some property X is available so template can use this" or
36  * "some property X should be handled by template".
37  *
38  * FLAG_HAVE_SOLID_MASK
39  *  Input mask is solid so template should handle this.
40  *
41  * FLAG_HAVE_NON_SOLID_MASK
42  *  Input mask is bits mask so template should handle this.
43  *
44  * FLAG_HAVE_SOLID_MASK and FLAG_HAVE_NON_SOLID_MASK are mutually
45  * exclusive. (It's not allowed to turn both flags on)
46  */
47 #define FLAG_NONE                               (0)
48 #define FLAG_HAVE_SOLID_MASK                    (1 <<   1)
49 #define FLAG_HAVE_NON_SOLID_MASK                (1 <<   2)
50
51 /* To avoid too short repeated scanline function calls, extend source
52  * scanlines having width less than below constant value.
53  */
54 #define REPEAT_NORMAL_MIN_WIDTH                 64
55
56 static force_inline pixman_bool_t
57 repeat (pixman_repeat_t repeat, int *c, int size)
58 {
59     if (repeat == PIXMAN_REPEAT_NONE)
60     {
61         if (*c < 0 || *c >= size)
62             return FALSE;
63     }
64     else if (repeat == PIXMAN_REPEAT_NORMAL)
65     {
66         while (*c >= size)
67             *c -= size;
68         while (*c < 0)
69             *c += size;
70     }
71     else if (repeat == PIXMAN_REPEAT_PAD)
72     {
73         *c = CLIP (*c, 0, size - 1);
74     }
75     else /* REFLECT */
76     {
77         *c = MOD (*c, size * 2);
78         if (*c >= size)
79             *c = size * 2 - *c - 1;
80     }
81     return TRUE;
82 }
83
84 #if SIZEOF_LONG > 4
85
86 static force_inline uint32_t
87 bilinear_interpolation (uint32_t tl, uint32_t tr,
88                         uint32_t bl, uint32_t br,
89                         int distx, int disty)
90 {
91     uint64_t distxy, distxiy, distixy, distixiy;
92     uint64_t tl64, tr64, bl64, br64;
93     uint64_t f, r;
94
95     distxy = distx * disty;
96     distxiy = distx * (256 - disty);
97     distixy = (256 - distx) * disty;
98     distixiy = (256 - distx) * (256 - disty);
99
100     /* Alpha and Blue */
101     tl64 = tl & 0xff0000ff;
102     tr64 = tr & 0xff0000ff;
103     bl64 = bl & 0xff0000ff;
104     br64 = br & 0xff0000ff;
105
106     f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
107     r = f & 0x0000ff0000ff0000ull;
108
109     /* Red and Green */
110     tl64 = tl;
111     tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull);
112
113     tr64 = tr;
114     tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull);
115
116     bl64 = bl;
117     bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull);
118
119     br64 = br;
120     br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull);
121
122     f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
123     r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull);
124
125     return (uint32_t)(r >> 16);
126 }
127
128 #else
129
130 static force_inline uint32_t
131 bilinear_interpolation (uint32_t tl, uint32_t tr,
132                         uint32_t bl, uint32_t br,
133                         int distx, int disty)
134 {
135     int distxy, distxiy, distixy, distixiy;
136     uint32_t f, r;
137
138     distxy = distx * disty;
139     distxiy = (distx << 8) - distxy;    /* distx * (256 - disty) */
140     distixy = (disty << 8) - distxy;    /* disty * (256 - distx) */
141     distixiy =
142         256 * 256 - (disty << 8) -
143         (distx << 8) + distxy;          /* (256 - distx) * (256 - disty) */
144
145     /* Blue */
146     r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
147       + (bl & 0x000000ff) * distixy  + (br & 0x000000ff) * distxy;
148
149     /* Green */
150     f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
151       + (bl & 0x0000ff00) * distixy  + (br & 0x0000ff00) * distxy;
152     r |= f & 0xff000000;
153
154     tl >>= 16;
155     tr >>= 16;
156     bl >>= 16;
157     br >>= 16;
158     r >>= 16;
159
160     /* Red */
161     f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
162       + (bl & 0x000000ff) * distixy  + (br & 0x000000ff) * distxy;
163     r |= f & 0x00ff0000;
164
165     /* Alpha */
166     f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
167       + (bl & 0x0000ff00) * distixy  + (br & 0x0000ff00) * distxy;
168     r |= f & 0xff000000;
169
170     return r;
171 }
172
173 #endif
174
175 /*
176  * For each scanline fetched from source image with PAD repeat:
177  * - calculate how many pixels need to be padded on the left side
178  * - calculate how many pixels need to be padded on the right side
179  * - update width to only count pixels which are fetched from the image
180  * All this information is returned via 'width', 'left_pad', 'right_pad'
181  * arguments. The code is assuming that 'unit_x' is positive.
182  *
183  * Note: 64-bit math is used in order to avoid potential overflows, which
184  *       is probably excessive in many cases. This particular function
185  *       may need its own correctness test and performance tuning.
186  */
187 static force_inline void
188 pad_repeat_get_scanline_bounds (int32_t         source_image_width,
189                                 pixman_fixed_t  vx,
190                                 pixman_fixed_t  unit_x,
191                                 int32_t *       width,
192                                 int32_t *       left_pad,
193                                 int32_t *       right_pad)
194 {
195     int64_t max_vx = (int64_t) source_image_width << 16;
196     int64_t tmp;
197     if (vx < 0)
198     {
199         tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
200         if (tmp > *width)
201         {
202             *left_pad = *width;
203             *width = 0;
204         }
205         else
206         {
207             *left_pad = (int32_t) tmp;
208             *width -= (int32_t) tmp;
209         }
210     }
211     else
212     {
213         *left_pad = 0;
214     }
215     tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
216     if (tmp < 0)
217     {
218         *right_pad = *width;
219         *width = 0;
220     }
221     else if (tmp >= *width)
222     {
223         *right_pad = 0;
224     }
225     else
226     {
227         *right_pad = *width - (int32_t) tmp;
228         *width = (int32_t) tmp;
229     }
230 }
231
232 /* A macroified version of specialized nearest scalers for some
233  * common 8888 and 565 formats. It supports SRC and OVER ops.
234  *
235  * There are two repeat versions, one that handles repeat normal,
236  * and one without repeat handling that only works if the src region
237  * used is completely covered by the pre-repeated source samples.
238  *
239  * The loops are unrolled to process two pixels per iteration for better
240  * performance on most CPU architectures (superscalar processors
241  * can issue several operations simultaneously, other processors can hide
242  * instructions latencies by pipelining operations). Unrolling more
243  * does not make much sense because the compiler will start running out
244  * of spare registers soon.
245  */
246
247 #define GET_8888_ALPHA(s) ((s) >> 24)
248  /* This is not actually used since we don't have an OVER with
249     565 source, but it is needed to build. */
250 #define GET_0565_ALPHA(s) 0xff
251 #define GET_x888_ALPHA(s) 0xff
252
253 #define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT,                       \
254                               src_type_t, dst_type_t, OP, repeat_mode)                          \
255 static force_inline void                                                                        \
256 scanline_func_name (dst_type_t       *dst,                                                      \
257                     const src_type_t *src,                                                      \
258                     int32_t           w,                                                        \
259                     pixman_fixed_t    vx,                                                       \
260                     pixman_fixed_t    unit_x,                                                   \
261                     pixman_fixed_t    max_vx,                                                   \
262                     pixman_bool_t     fully_transparent_src)                                    \
263 {                                                                                               \
264         uint32_t   d;                                                                           \
265         src_type_t s1, s2;                                                                      \
266         uint8_t    a1, a2;                                                                      \
267         int        x1, x2;                                                                      \
268                                                                                                 \
269         if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src)                        \
270             return;                                                                             \
271                                                                                                 \
272         if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER)            \
273             abort();                                                                            \
274                                                                                                 \
275         while ((w -= 2) >= 0)                                                                   \
276         {                                                                                       \
277             x1 = vx >> 16;                                                                      \
278             vx += unit_x;                                                                       \
279             if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                          \
280             {                                                                                   \
281                 /* This works because we know that unit_x is positive */                        \
282                 while (vx >= max_vx)                                                            \
283                     vx -= max_vx;                                                               \
284             }                                                                                   \
285             s1 = src[x1];                                                                       \
286                                                                                                 \
287             x2 = vx >> 16;                                                                      \
288             vx += unit_x;                                                                       \
289             if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                          \
290             {                                                                                   \
291                 /* This works because we know that unit_x is positive */                        \
292                 while (vx >= max_vx)                                                            \
293                     vx -= max_vx;                                                               \
294             }                                                                                   \
295             s2 = src[x2];                                                                       \
296                                                                                                 \
297             if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)                                             \
298             {                                                                                   \
299                 a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);                                          \
300                 a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2);                                          \
301                                                                                                 \
302                 if (a1 == 0xff)                                                                 \
303                 {                                                                               \
304                     *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);                   \
305                 }                                                                               \
306                 else if (s1)                                                                    \
307                 {                                                                               \
308                     d = CONVERT_ ## DST_FORMAT ## _TO_8888 (*dst);                              \
309                     s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1);                               \
310                     a1 ^= 0xff;                                                                 \
311                     UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);                                        \
312                     *dst = CONVERT_8888_TO_ ## DST_FORMAT (d);                                  \
313                 }                                                                               \
314                 dst++;                                                                          \
315                                                                                                 \
316                 if (a2 == 0xff)                                                                 \
317                 {                                                                               \
318                     *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2);                   \
319                 }                                                                               \
320                 else if (s2)                                                                    \
321                 {                                                                               \
322                     d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst);                               \
323                     s2 = CONVERT_## SRC_FORMAT ## _TO_8888 (s2);                                \
324                     a2 ^= 0xff;                                                                 \
325                     UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2);                                        \
326                     *dst = CONVERT_8888_TO_ ## DST_FORMAT (d);                                  \
327                 }                                                                               \
328                 dst++;                                                                          \
329             }                                                                                   \
330             else /* PIXMAN_OP_SRC */                                                            \
331             {                                                                                   \
332                 *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);                     \
333                 *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2);                     \
334             }                                                                                   \
335         }                                                                                       \
336                                                                                                 \
337         if (w & 1)                                                                              \
338         {                                                                                       \
339             x1 = vx >> 16;                                                                      \
340             s1 = src[x1];                                                                       \
341                                                                                                 \
342             if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)                                             \
343             {                                                                                   \
344                 a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);                                          \
345                                                                                                 \
346                 if (a1 == 0xff)                                                                 \
347                 {                                                                               \
348                     *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);                   \
349                 }                                                                               \
350                 else if (s1)                                                                    \
351                 {                                                                               \
352                     d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst);                               \
353                     s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1);                               \
354                     a1 ^= 0xff;                                                                 \
355                     UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);                                        \
356                     *dst = CONVERT_8888_TO_ ## DST_FORMAT (d);                                  \
357                 }                                                                               \
358                 dst++;                                                                          \
359             }                                                                                   \
360             else /* PIXMAN_OP_SRC */                                                            \
361             {                                                                                   \
362                 *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);                     \
363             }                                                                                   \
364         }                                                                                       \
365 }
366
367 #define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t,      \
368                                   dst_type_t, repeat_mode, have_mask, mask_is_solid)            \
369 static void                                                                                     \
370 fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,                \
371                                                    pixman_composite_info_t *info)               \
372 {                                                                                               \
373     PIXMAN_COMPOSITE_ARGS (info);                                                               \
374     dst_type_t *dst_line;                                                                       \
375     mask_type_t *mask_line;                                                                     \
376     src_type_t *src_first_line;                                                                 \
377     int       y;                                                                                \
378     pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */            \
379     pixman_fixed_t max_vy;                                                                      \
380     pixman_vector_t v;                                                                          \
381     pixman_fixed_t vx, vy;                                                                      \
382     pixman_fixed_t unit_x, unit_y;                                                              \
383     int32_t left_pad, right_pad;                                                                \
384                                                                                                 \
385     src_type_t *src;                                                                            \
386     dst_type_t *dst;                                                                            \
387     mask_type_t solid_mask;                                                                     \
388     const mask_type_t *mask = &solid_mask;                                                      \
389     int src_stride, mask_stride, dst_stride;                                                    \
390                                                                                                 \
391     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1);    \
392     if (have_mask)                                                                              \
393     {                                                                                           \
394         if (mask_is_solid)                                                                      \
395             solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);    \
396         else                                                                                    \
397             PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t,                     \
398                                    mask_stride, mask_line, 1);                                  \
399     }                                                                                           \
400     /* pass in 0 instead of src_x and src_y because src_x and src_y need to be                  \
401      * transformed from destination space to source space */                                    \
402     PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);         \
403                                                                                                 \
404     /* reference point is the center of the pixel */                                            \
405     v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;                             \
406     v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;                             \
407     v.vector[2] = pixman_fixed_1;                                                               \
408                                                                                                 \
409     if (!pixman_transform_point_3d (src_image->common.transform, &v))                           \
410         return;                                                                                 \
411                                                                                                 \
412     unit_x = src_image->common.transform->matrix[0][0];                                         \
413     unit_y = src_image->common.transform->matrix[1][1];                                         \
414                                                                                                 \
415     /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */                   \
416     v.vector[0] -= pixman_fixed_e;                                                              \
417     v.vector[1] -= pixman_fixed_e;                                                              \
418                                                                                                 \
419     vx = v.vector[0];                                                                           \
420     vy = v.vector[1];                                                                           \
421                                                                                                 \
422     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                                  \
423     {                                                                                           \
424         /* Clamp repeating positions inside the actual samples */                               \
425         max_vx = src_image->bits.width << 16;                                                   \
426         max_vy = src_image->bits.height << 16;                                                  \
427                                                                                                 \
428         repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx);                                             \
429         repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);                                             \
430     }                                                                                           \
431                                                                                                 \
432     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD ||                                   \
433         PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)                                    \
434     {                                                                                           \
435         pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x,                      \
436                                         &width, &left_pad, &right_pad);                         \
437         vx += left_pad * unit_x;                                                                \
438     }                                                                                           \
439                                                                                                 \
440     while (--height >= 0)                                                                       \
441     {                                                                                           \
442         dst = dst_line;                                                                         \
443         dst_line += dst_stride;                                                                 \
444         if (have_mask && !mask_is_solid)                                                        \
445         {                                                                                       \
446             mask = mask_line;                                                                   \
447             mask_line += mask_stride;                                                           \
448         }                                                                                       \
449                                                                                                 \
450         y = vy >> 16;                                                                           \
451         vy += unit_y;                                                                           \
452         if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                              \
453             repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);                                         \
454         if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)                                 \
455         {                                                                                       \
456             repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height);                             \
457             src = src_first_line + src_stride * y;                                              \
458             if (left_pad > 0)                                                                   \
459             {                                                                                   \
460                 scanline_func (mask, dst, src, left_pad, 0, 0, 0, FALSE);                       \
461             }                                                                                   \
462             if (width > 0)                                                                      \
463             {                                                                                   \
464                 scanline_func (mask + (mask_is_solid ? 0 : left_pad),                           \
465                                dst + left_pad, src, width, vx, unit_x, 0, FALSE);               \
466             }                                                                                   \
467             if (right_pad > 0)                                                                  \
468             {                                                                                   \
469                 scanline_func (mask + (mask_is_solid ? 0 : left_pad + width),                   \
470                                dst + left_pad + width, src + src_image->bits.width - 1,         \
471                                right_pad, 0, 0, 0, FALSE);                                      \
472             }                                                                                   \
473         }                                                                                       \
474         else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)                           \
475         {                                                                                       \
476             static const src_type_t zero[1] = { 0 };                                            \
477             if (y < 0 || y >= src_image->bits.height)                                           \
478             {                                                                                   \
479                 scanline_func (mask, dst, zero, left_pad + width + right_pad, 0, 0, 0, TRUE);   \
480                 continue;                                                                       \
481             }                                                                                   \
482             src = src_first_line + src_stride * y;                                              \
483             if (left_pad > 0)                                                                   \
484             {                                                                                   \
485                 scanline_func (mask, dst, zero, left_pad, 0, 0, 0, TRUE);                       \
486             }                                                                                   \
487             if (width > 0)                                                                      \
488             {                                                                                   \
489                 scanline_func (mask + (mask_is_solid ? 0 : left_pad),                           \
490                                dst + left_pad, src, width, vx, unit_x, 0, FALSE);               \
491             }                                                                                   \
492             if (right_pad > 0)                                                                  \
493             {                                                                                   \
494                 scanline_func (mask + (mask_is_solid ? 0 : left_pad + width),                   \
495                                dst + left_pad + width, zero, right_pad, 0, 0, 0, TRUE);         \
496             }                                                                                   \
497         }                                                                                       \
498         else                                                                                    \
499         {                                                                                       \
500             src = src_first_line + src_stride * y;                                              \
501             scanline_func (mask, dst, src, width, vx, unit_x, max_vx, FALSE);                   \
502         }                                                                                       \
503     }                                                                                           \
504 }
505
506 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
507 #define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t,   \
508                                   dst_type_t, repeat_mode, have_mask, mask_is_solid)            \
509         FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t, \
510                                   dst_type_t, repeat_mode, have_mask, mask_is_solid)
511
512 #define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t,    \
513                               repeat_mode)                                                      \
514     static force_inline void                                                                    \
515     scanline_func##scale_func_name##_wrapper (                                                  \
516                     const uint8_t    *mask,                                                     \
517                     dst_type_t       *dst,                                                      \
518                     const src_type_t *src,                                                      \
519                     int32_t          w,                                                         \
520                     pixman_fixed_t   vx,                                                        \
521                     pixman_fixed_t   unit_x,                                                    \
522                     pixman_fixed_t   max_vx,                                                    \
523                     pixman_bool_t    fully_transparent_src)                                     \
524     {                                                                                           \
525         scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src);                 \
526     }                                                                                           \
527     FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper,       \
528                                src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE)
529
530 #define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t,           \
531                               repeat_mode)                                                      \
532         FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t,           \
533                               dst_type_t, repeat_mode)
534
535 #define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT,                           \
536                      src_type_t, dst_type_t, OP, repeat_mode)                           \
537     FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,       \
538                           SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t,               \
539                           OP, repeat_mode)                                              \
540     FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP,                       \
541                           scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,       \
542                           src_type_t, dst_type_t, repeat_mode)
543
544
545 #define SCALED_NEAREST_FLAGS                                            \
546     (FAST_PATH_SCALE_TRANSFORM  |                                       \
547      FAST_PATH_NO_ALPHA_MAP     |                                       \
548      FAST_PATH_NEAREST_FILTER   |                                       \
549      FAST_PATH_NO_ACCESSORS     |                                       \
550      FAST_PATH_NARROW_FORMAT)
551
552 #define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func)                    \
553     {   PIXMAN_OP_ ## op,                                               \
554         PIXMAN_ ## s,                                                   \
555         (SCALED_NEAREST_FLAGS           |                               \
556          FAST_PATH_NORMAL_REPEAT        |                               \
557          FAST_PATH_X_UNIT_POSITIVE),                                    \
558         PIXMAN_null, 0,                                                 \
559         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
560         fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,   \
561     }
562
563 #define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func)                       \
564     {   PIXMAN_OP_ ## op,                                               \
565         PIXMAN_ ## s,                                                   \
566         (SCALED_NEAREST_FLAGS           |                               \
567          FAST_PATH_PAD_REPEAT           |                               \
568          FAST_PATH_X_UNIT_POSITIVE),                                    \
569         PIXMAN_null, 0,                                                 \
570         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
571         fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,      \
572     }
573
574 #define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func)                      \
575     {   PIXMAN_OP_ ## op,                                               \
576         PIXMAN_ ## s,                                                   \
577         (SCALED_NEAREST_FLAGS           |                               \
578          FAST_PATH_NONE_REPEAT          |                               \
579          FAST_PATH_X_UNIT_POSITIVE),                                    \
580         PIXMAN_null, 0,                                                 \
581         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
582         fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,     \
583     }
584
585 #define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func)                     \
586     {   PIXMAN_OP_ ## op,                                               \
587         PIXMAN_ ## s,                                                   \
588         SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,    \
589         PIXMAN_null, 0,                                                 \
590         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
591         fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,    \
592     }
593
594 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func)            \
595     {   PIXMAN_OP_ ## op,                                               \
596         PIXMAN_ ## s,                                                   \
597         (SCALED_NEAREST_FLAGS           |                               \
598          FAST_PATH_NORMAL_REPEAT        |                               \
599          FAST_PATH_X_UNIT_POSITIVE),                                    \
600         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
601         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
602         fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,   \
603     }
604
605 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func)               \
606     {   PIXMAN_OP_ ## op,                                               \
607         PIXMAN_ ## s,                                                   \
608         (SCALED_NEAREST_FLAGS           |                               \
609          FAST_PATH_PAD_REPEAT           |                               \
610          FAST_PATH_X_UNIT_POSITIVE),                                    \
611         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
612         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
613         fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,      \
614     }
615
616 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func)              \
617     {   PIXMAN_OP_ ## op,                                               \
618         PIXMAN_ ## s,                                                   \
619         (SCALED_NEAREST_FLAGS           |                               \
620          FAST_PATH_NONE_REPEAT          |                               \
621          FAST_PATH_X_UNIT_POSITIVE),                                    \
622         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
623         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
624         fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,     \
625     }
626
627 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func)             \
628     {   PIXMAN_OP_ ## op,                                               \
629         PIXMAN_ ## s,                                                   \
630         SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,    \
631         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
632         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
633         fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,    \
634     }
635
636 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func)         \
637     {   PIXMAN_OP_ ## op,                                               \
638         PIXMAN_ ## s,                                                   \
639         (SCALED_NEAREST_FLAGS           |                               \
640          FAST_PATH_NORMAL_REPEAT        |                               \
641          FAST_PATH_X_UNIT_POSITIVE),                                    \
642         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
643         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
644         fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,   \
645     }
646
647 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func)            \
648     {   PIXMAN_OP_ ## op,                                               \
649         PIXMAN_ ## s,                                                   \
650         (SCALED_NEAREST_FLAGS           |                               \
651          FAST_PATH_PAD_REPEAT           |                               \
652          FAST_PATH_X_UNIT_POSITIVE),                                    \
653         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
654         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
655         fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,      \
656     }
657
658 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func)           \
659     {   PIXMAN_OP_ ## op,                                               \
660         PIXMAN_ ## s,                                                   \
661         (SCALED_NEAREST_FLAGS           |                               \
662          FAST_PATH_NONE_REPEAT          |                               \
663          FAST_PATH_X_UNIT_POSITIVE),                                    \
664         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
665         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
666         fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,     \
667     }
668
669 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func)          \
670     {   PIXMAN_OP_ ## op,                                               \
671         PIXMAN_ ## s,                                                   \
672         SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,    \
673         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
674         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
675         fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,    \
676     }
677
678 /* Prefer the use of 'cover' variant, because it is faster */
679 #define SIMPLE_NEAREST_FAST_PATH(op,s,d,func)                           \
680     SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func),                       \
681     SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func),                        \
682     SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func),                         \
683     SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
684
685 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func)                   \
686     SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func),               \
687     SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func),                \
688     SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
689
690 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func)                \
691     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func),            \
692     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func),             \
693     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
694
695 /*****************************************************************************/
696
697 /*
698  * Identify 5 zones in each scanline for bilinear scaling. Depending on
699  * whether 2 pixels to be interpolated are fetched from the image itself,
700  * from the padding area around it or from both image and padding area.
701  */
702 static force_inline void
703 bilinear_pad_repeat_get_scanline_bounds (int32_t         source_image_width,
704                                          pixman_fixed_t  vx,
705                                          pixman_fixed_t  unit_x,
706                                          int32_t *       left_pad,
707                                          int32_t *       left_tz,
708                                          int32_t *       width,
709                                          int32_t *       right_tz,
710                                          int32_t *       right_pad)
711 {
712         int width1 = *width, left_pad1, right_pad1;
713         int width2 = *width, left_pad2, right_pad2;
714
715         pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x,
716                                         &width1, &left_pad1, &right_pad1);
717         pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1,
718                                         unit_x, &width2, &left_pad2, &right_pad2);
719
720         *left_pad = left_pad2;
721         *left_tz = left_pad1 - left_pad2;
722         *right_tz = right_pad2 - right_pad1;
723         *right_pad = right_pad1;
724         *width -= *left_pad + *left_tz + *right_tz + *right_pad;
725 }
726
727 /*
728  * Main loop template for single pass bilinear scaling. It needs to be
729  * provided with 'scanline_func' which should do the compositing operation.
730  * The needed function has the following prototype:
731  *
732  *      scanline_func (dst_type_t *       dst,
733  *                     const mask_type_ * mask,
734  *                     const src_type_t * src_top,
735  *                     const src_type_t * src_bottom,
736  *                     int32_t            width,
737  *                     int                weight_top,
738  *                     int                weight_bottom,
739  *                     pixman_fixed_t     vx,
740  *                     pixman_fixed_t     unit_x,
741  *                     pixman_fixed_t     max_vx,
742  *                     pixman_bool_t      zero_src)
743  *
744  * Where:
745  *  dst                 - destination scanline buffer for storing results
746  *  mask                - mask buffer (or single value for solid mask)
747  *  src_top, src_bottom - two source scanlines
748  *  width               - number of pixels to process
749  *  weight_top          - weight of the top row for interpolation
750  *  weight_bottom       - weight of the bottom row for interpolation
751  *  vx                  - initial position for fetching the first pair of
752  *                        pixels from the source buffer
753  *  unit_x              - position increment needed to move to the next pair
754  *                        of pixels
755  *  max_vx              - image size as a fixed point value, can be used for
756  *                        implementing NORMAL repeat (when it is supported)
757  *  zero_src            - boolean hint variable, which is set to TRUE when
758  *                        all source pixels are fetched from zero padding
759  *                        zone for NONE repeat
760  *
761  * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to 256,
762  *       but sometimes it may be less than that for NONE repeat when handling
763  *       fuzzy antialiased top or bottom image edges. Also both top and
764  *       bottom weight variables are guaranteed to have value in 0-255
765  *       range and can fit into unsigned byte or be used with 8-bit SIMD
766  *       multiplication instructions.
767  */
768 #define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t,     \
769                                   dst_type_t, repeat_mode, flags)                               \
770 static void                                                                                     \
771 fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,                \
772                                                    pixman_composite_info_t *info)               \
773 {                                                                                               \
774     PIXMAN_COMPOSITE_ARGS (info);                                                               \
775     dst_type_t *dst_line;                                                                       \
776     mask_type_t *mask_line;                                                                     \
777     src_type_t *src_first_line;                                                                 \
778     int       y1, y2;                                                                           \
779     pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */            \
780     pixman_vector_t v;                                                                          \
781     pixman_fixed_t vx, vy;                                                                      \
782     pixman_fixed_t unit_x, unit_y;                                                              \
783     int32_t left_pad, left_tz, right_tz, right_pad;                                             \
784                                                                                                 \
785     dst_type_t *dst;                                                                            \
786     mask_type_t solid_mask;                                                                     \
787     const mask_type_t *mask = &solid_mask;                                                      \
788     int src_stride, mask_stride, dst_stride;                                                    \
789                                                                                                 \
790     int src_width;                                                                              \
791     pixman_fixed_t src_width_fixed;                                                             \
792     int max_x;                                                                                  \
793     pixman_bool_t need_src_extension;                                                           \
794                                                                                                 \
795     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1);    \
796     if (flags & FLAG_HAVE_SOLID_MASK)                                                           \
797     {                                                                                           \
798         solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);        \
799         mask_stride = 0;                                                                        \
800     }                                                                                           \
801     else if (flags & FLAG_HAVE_NON_SOLID_MASK)                                                  \
802     {                                                                                           \
803         PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t,                         \
804                                mask_stride, mask_line, 1);                                      \
805     }                                                                                           \
806                                                                                                 \
807     /* pass in 0 instead of src_x and src_y because src_x and src_y need to be                  \
808      * transformed from destination space to source space */                                    \
809     PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);         \
810                                                                                                 \
811     /* reference point is the center of the pixel */                                            \
812     v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;                             \
813     v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;                             \
814     v.vector[2] = pixman_fixed_1;                                                               \
815                                                                                                 \
816     if (!pixman_transform_point_3d (src_image->common.transform, &v))                           \
817         return;                                                                                 \
818                                                                                                 \
819     unit_x = src_image->common.transform->matrix[0][0];                                         \
820     unit_y = src_image->common.transform->matrix[1][1];                                         \
821                                                                                                 \
822     v.vector[0] -= pixman_fixed_1 / 2;                                                          \
823     v.vector[1] -= pixman_fixed_1 / 2;                                                          \
824                                                                                                 \
825     vy = v.vector[1];                                                                           \
826                                                                                                 \
827     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD ||                                   \
828         PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)                                    \
829     {                                                                                           \
830         bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x,    \
831                                         &left_pad, &left_tz, &width, &right_tz, &right_pad);    \
832         if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)                                 \
833         {                                                                                       \
834             /* PAD repeat does not need special handling for 'transition zones' and */          \
835             /* they can be combined with 'padding zones' safely */                              \
836             left_pad += left_tz;                                                                \
837             right_pad += right_tz;                                                              \
838             left_tz = right_tz = 0;                                                             \
839         }                                                                                       \
840         v.vector[0] += left_pad * unit_x;                                                       \
841     }                                                                                           \
842                                                                                                 \
843     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                                  \
844     {                                                                                           \
845         vx = v.vector[0];                                                                       \
846         repeat (PIXMAN_REPEAT_NORMAL, &vx, pixman_int_to_fixed(src_image->bits.width));         \
847         max_x = pixman_fixed_to_int (vx + (width - 1) * unit_x) + 1;                            \
848                                                                                                 \
849         if (src_image->bits.width < REPEAT_NORMAL_MIN_WIDTH)                                    \
850         {                                                                                       \
851             src_width = 0;                                                                      \
852                                                                                                 \
853             while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x)                   \
854                 src_width += src_image->bits.width;                                             \
855                                                                                                 \
856             need_src_extension = TRUE;                                                          \
857         }                                                                                       \
858         else                                                                                    \
859         {                                                                                       \
860             src_width = src_image->bits.width;                                                  \
861             need_src_extension = FALSE;                                                         \
862         }                                                                                       \
863                                                                                                 \
864         src_width_fixed = pixman_int_to_fixed (src_width);                                      \
865     }                                                                                           \
866                                                                                                 \
867     while (--height >= 0)                                                                       \
868     {                                                                                           \
869         int weight1, weight2;                                                                   \
870         dst = dst_line;                                                                         \
871         dst_line += dst_stride;                                                                 \
872         vx = v.vector[0];                                                                       \
873         if (flags & FLAG_HAVE_NON_SOLID_MASK)                                                   \
874         {                                                                                       \
875             mask = mask_line;                                                                   \
876             mask_line += mask_stride;                                                           \
877         }                                                                                       \
878                                                                                                 \
879         y1 = pixman_fixed_to_int (vy);                                                          \
880         weight2 = (vy >> 8) & 0xff;                                                             \
881         if (weight2)                                                                            \
882         {                                                                                       \
883             /* normal case, both row weights are in 0-255 range and fit unsigned byte */        \
884             y2 = y1 + 1;                                                                        \
885             weight1 = 256 - weight2;                                                            \
886         }                                                                                       \
887         else                                                                                    \
888         {                                                                                       \
889             /* set both top and bottom row to the same scanline, and weights to 128+128 */      \
890             y2 = y1;                                                                            \
891             weight1 = weight2 = 128;                                                            \
892         }                                                                                       \
893         vy += unit_y;                                                                           \
894         if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)                                 \
895         {                                                                                       \
896             src_type_t *src1, *src2;                                                            \
897             src_type_t buf1[2];                                                                 \
898             src_type_t buf2[2];                                                                 \
899             repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height);                            \
900             repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height);                            \
901             src1 = src_first_line + src_stride * y1;                                            \
902             src2 = src_first_line + src_stride * y2;                                            \
903                                                                                                 \
904             if (left_pad > 0)                                                                   \
905             {                                                                                   \
906                 buf1[0] = buf1[1] = src1[0];                                                    \
907                 buf2[0] = buf2[1] = src2[0];                                                    \
908                 scanline_func (dst, mask,                                                       \
909                                buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE);         \
910                 dst += left_pad;                                                                \
911                 if (flags & FLAG_HAVE_NON_SOLID_MASK)                                           \
912                     mask += left_pad;                                                           \
913             }                                                                                   \
914             if (width > 0)                                                                      \
915             {                                                                                   \
916                 scanline_func (dst, mask,                                                       \
917                                src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE);      \
918                 dst += width;                                                                   \
919                 if (flags & FLAG_HAVE_NON_SOLID_MASK)                                           \
920                     mask += width;                                                              \
921             }                                                                                   \
922             if (right_pad > 0)                                                                  \
923             {                                                                                   \
924                 buf1[0] = buf1[1] = src1[src_image->bits.width - 1];                            \
925                 buf2[0] = buf2[1] = src2[src_image->bits.width - 1];                            \
926                 scanline_func (dst, mask,                                                       \
927                                buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE);        \
928             }                                                                                   \
929         }                                                                                       \
930         else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)                           \
931         {                                                                                       \
932             src_type_t *src1, *src2;                                                            \
933             src_type_t buf1[2];                                                                 \
934             src_type_t buf2[2];                                                                 \
935             /* handle top/bottom zero padding by just setting weights to 0 if needed */         \
936             if (y1 < 0)                                                                         \
937             {                                                                                   \
938                 weight1 = 0;                                                                    \
939                 y1 = 0;                                                                         \
940             }                                                                                   \
941             if (y1 >= src_image->bits.height)                                                   \
942             {                                                                                   \
943                 weight1 = 0;                                                                    \
944                 y1 = src_image->bits.height - 1;                                                \
945             }                                                                                   \
946             if (y2 < 0)                                                                         \
947             {                                                                                   \
948                 weight2 = 0;                                                                    \
949                 y2 = 0;                                                                         \
950             }                                                                                   \
951             if (y2 >= src_image->bits.height)                                                   \
952             {                                                                                   \
953                 weight2 = 0;                                                                    \
954                 y2 = src_image->bits.height - 1;                                                \
955             }                                                                                   \
956             src1 = src_first_line + src_stride * y1;                                            \
957             src2 = src_first_line + src_stride * y2;                                            \
958                                                                                                 \
959             if (left_pad > 0)                                                                   \
960             {                                                                                   \
961                 buf1[0] = buf1[1] = 0;                                                          \
962                 buf2[0] = buf2[1] = 0;                                                          \
963                 scanline_func (dst, mask,                                                       \
964                                buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE);          \
965                 dst += left_pad;                                                                \
966                 if (flags & FLAG_HAVE_NON_SOLID_MASK)                                           \
967                     mask += left_pad;                                                           \
968             }                                                                                   \
969             if (left_tz > 0)                                                                    \
970             {                                                                                   \
971                 buf1[0] = 0;                                                                    \
972                 buf1[1] = src1[0];                                                              \
973                 buf2[0] = 0;                                                                    \
974                 buf2[1] = src2[0];                                                              \
975                 scanline_func (dst, mask,                                                       \
976                                buf1, buf2, left_tz, weight1, weight2,                           \
977                                pixman_fixed_frac (vx), unit_x, 0, FALSE);                       \
978                 dst += left_tz;                                                                 \
979                 if (flags & FLAG_HAVE_NON_SOLID_MASK)                                           \
980                     mask += left_tz;                                                            \
981                 vx += left_tz * unit_x;                                                         \
982             }                                                                                   \
983             if (width > 0)                                                                      \
984             {                                                                                   \
985                 scanline_func (dst, mask,                                                       \
986                                src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE);      \
987                 dst += width;                                                                   \
988                 if (flags & FLAG_HAVE_NON_SOLID_MASK)                                           \
989                     mask += width;                                                              \
990                 vx += width * unit_x;                                                           \
991             }                                                                                   \
992             if (right_tz > 0)                                                                   \
993             {                                                                                   \
994                 buf1[0] = src1[src_image->bits.width - 1];                                      \
995                 buf1[1] = 0;                                                                    \
996                 buf2[0] = src2[src_image->bits.width - 1];                                      \
997                 buf2[1] = 0;                                                                    \
998                 scanline_func (dst, mask,                                                       \
999                                buf1, buf2, right_tz, weight1, weight2,                          \
1000                                pixman_fixed_frac (vx), unit_x, 0, FALSE);                       \
1001                 dst += right_tz;                                                                \
1002                 if (flags & FLAG_HAVE_NON_SOLID_MASK)                                           \
1003                     mask += right_tz;                                                           \
1004             }                                                                                   \
1005             if (right_pad > 0)                                                                  \
1006             {                                                                                   \
1007                 buf1[0] = buf1[1] = 0;                                                          \
1008                 buf2[0] = buf2[1] = 0;                                                          \
1009                 scanline_func (dst, mask,                                                       \
1010                                buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE);         \
1011             }                                                                                   \
1012         }                                                                                       \
1013         else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                         \
1014         {                                                                                       \
1015             int32_t         num_pixels;                                                         \
1016             int32_t         width_remain;                                                       \
1017             src_type_t *    src_line_top;                                                       \
1018             src_type_t *    src_line_bottom;                                                    \
1019             src_type_t      buf1[2];                                                            \
1020             src_type_t      buf2[2];                                                            \
1021             src_type_t      extended_src_line0[REPEAT_NORMAL_MIN_WIDTH*2];                      \
1022             src_type_t      extended_src_line1[REPEAT_NORMAL_MIN_WIDTH*2];                      \
1023             int             i, j;                                                               \
1024                                                                                                 \
1025             repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height);                         \
1026             repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height);                         \
1027             src_line_top = src_first_line + src_stride * y1;                                    \
1028             src_line_bottom = src_first_line + src_stride * y2;                                 \
1029                                                                                                 \
1030             if (need_src_extension)                                                             \
1031             {                                                                                   \
1032                 for (i=0; i<src_width;)                                                         \
1033                 {                                                                               \
1034                     for (j=0; j<src_image->bits.width; j++, i++)                                \
1035                     {                                                                           \
1036                         extended_src_line0[i] = src_line_top[j];                                \
1037                         extended_src_line1[i] = src_line_bottom[j];                             \
1038                     }                                                                           \
1039                 }                                                                               \
1040                                                                                                 \
1041                 src_line_top = &extended_src_line0[0];                                          \
1042                 src_line_bottom = &extended_src_line1[0];                                       \
1043             }                                                                                   \
1044                                                                                                 \
1045             /* Top & Bottom wrap around buffer */                                               \
1046             buf1[0] = src_line_top[src_width - 1];                                              \
1047             buf1[1] = src_line_top[0];                                                          \
1048             buf2[0] = src_line_bottom[src_width - 1];                                           \
1049             buf2[1] = src_line_bottom[0];                                                       \
1050                                                                                                 \
1051             width_remain = width;                                                               \
1052                                                                                                 \
1053             while (width_remain > 0)                                                            \
1054             {                                                                                   \
1055                 /* We use src_width_fixed because it can make vx in original source range */    \
1056                 repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);                            \
1057                                                                                                 \
1058                 /* Wrap around part */                                                          \
1059                 if (pixman_fixed_to_int (vx) == src_width - 1)                                  \
1060                 {                                                                               \
1061                     /* for positive unit_x                                                      \
1062                      * num_pixels = max(n) + 1, where vx + n*unit_x < src_width_fixed           \
1063                      *                                                                          \
1064                      * vx is in range [0, src_width_fixed - pixman_fixed_e]                     \
1065                      * So we are safe from overflow.                                            \
1066                      */                                                                         \
1067                     num_pixels = ((src_width_fixed - vx - pixman_fixed_e) / unit_x) + 1;        \
1068                                                                                                 \
1069                     if (num_pixels > width_remain)                                              \
1070                         num_pixels = width_remain;                                              \
1071                                                                                                 \
1072                     scanline_func (dst, mask, buf1, buf2, num_pixels,                           \
1073                                    weight1, weight2, pixman_fixed_frac(vx),                     \
1074                                    unit_x, src_width_fixed, FALSE);                             \
1075                                                                                                 \
1076                     width_remain -= num_pixels;                                                 \
1077                     vx += num_pixels * unit_x;                                                  \
1078                     dst += num_pixels;                                                          \
1079                                                                                                 \
1080                     if (flags & FLAG_HAVE_NON_SOLID_MASK)                                       \
1081                         mask += num_pixels;                                                     \
1082                                                                                                 \
1083                     repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);                        \
1084                 }                                                                               \
1085                                                                                                 \
1086                 /* Normal scanline composite */                                                 \
1087                 if (pixman_fixed_to_int (vx) != src_width - 1 && width_remain > 0)              \
1088                 {                                                                               \
1089                     /* for positive unit_x                                                      \
1090                      * num_pixels = max(n) + 1, where vx + n*unit_x < (src_width_fixed - 1)     \
1091                      *                                                                          \
1092                      * vx is in range [0, src_width_fixed - pixman_fixed_e]                     \
1093                      * So we are safe from overflow here.                                       \
1094                      */                                                                         \
1095                     num_pixels = ((src_width_fixed - pixman_fixed_1 - vx - pixman_fixed_e)      \
1096                                   / unit_x) + 1;                                                \
1097                                                                                                 \
1098                     if (num_pixels > width_remain)                                              \
1099                         num_pixels = width_remain;                                              \
1100                                                                                                 \
1101                     scanline_func (dst, mask, src_line_top, src_line_bottom, num_pixels,        \
1102                                    weight1, weight2, vx, unit_x, src_width_fixed, FALSE);       \
1103                                                                                                 \
1104                     width_remain -= num_pixels;                                                 \
1105                     vx += num_pixels * unit_x;                                                  \
1106                     dst += num_pixels;                                                          \
1107                                                                                                 \
1108                     if (flags & FLAG_HAVE_NON_SOLID_MASK)                                       \
1109                         mask += num_pixels;                                                     \
1110                 }                                                                               \
1111             }                                                                                   \
1112         }                                                                                       \
1113         else                                                                                    \
1114         {                                                                                       \
1115             scanline_func (dst, mask, src_first_line + src_stride * y1,                         \
1116                            src_first_line + src_stride * y2, width,                             \
1117                            weight1, weight2, vx, unit_x, max_vx, FALSE);                        \
1118         }                                                                                       \
1119     }                                                                                           \
1120 }
1121
1122 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
1123 #define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t,  \
1124                                   dst_type_t, repeat_mode, flags)                               \
1125         FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\
1126                                   dst_type_t, repeat_mode, flags)
1127
1128 #define SCALED_BILINEAR_FLAGS                                           \
1129     (FAST_PATH_SCALE_TRANSFORM  |                                       \
1130      FAST_PATH_NO_ALPHA_MAP     |                                       \
1131      FAST_PATH_BILINEAR_FILTER  |                                       \
1132      FAST_PATH_NO_ACCESSORS     |                                       \
1133      FAST_PATH_NARROW_FORMAT)
1134
1135 #define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func)                      \
1136     {   PIXMAN_OP_ ## op,                                               \
1137         PIXMAN_ ## s,                                                   \
1138         (SCALED_BILINEAR_FLAGS          |                               \
1139          FAST_PATH_PAD_REPEAT           |                               \
1140          FAST_PATH_X_UNIT_POSITIVE),                                    \
1141         PIXMAN_null, 0,                                                 \
1142         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1143         fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,     \
1144     }
1145
1146 #define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func)                     \
1147     {   PIXMAN_OP_ ## op,                                               \
1148         PIXMAN_ ## s,                                                   \
1149         (SCALED_BILINEAR_FLAGS          |                               \
1150          FAST_PATH_NONE_REPEAT          |                               \
1151          FAST_PATH_X_UNIT_POSITIVE),                                    \
1152         PIXMAN_null, 0,                                                 \
1153         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1154         fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,    \
1155     }
1156
1157 #define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func)                    \
1158     {   PIXMAN_OP_ ## op,                                               \
1159         PIXMAN_ ## s,                                                   \
1160         SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,  \
1161         PIXMAN_null, 0,                                                 \
1162         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1163         fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,   \
1164     }
1165
1166 #define SIMPLE_BILINEAR_FAST_PATH_NORMAL(op,s,d,func)                   \
1167     {   PIXMAN_OP_ ## op,                                               \
1168         PIXMAN_ ## s,                                                   \
1169         (SCALED_BILINEAR_FLAGS          |                               \
1170          FAST_PATH_NORMAL_REPEAT        |                               \
1171          FAST_PATH_X_UNIT_POSITIVE),                                    \
1172         PIXMAN_null, 0,                                                 \
1173         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1174         fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,  \
1175     }
1176
1177 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func)              \
1178     {   PIXMAN_OP_ ## op,                                               \
1179         PIXMAN_ ## s,                                                   \
1180         (SCALED_BILINEAR_FLAGS          |                               \
1181          FAST_PATH_PAD_REPEAT           |                               \
1182          FAST_PATH_X_UNIT_POSITIVE),                                    \
1183         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
1184         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1185         fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,     \
1186     }
1187
1188 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func)             \
1189     {   PIXMAN_OP_ ## op,                                               \
1190         PIXMAN_ ## s,                                                   \
1191         (SCALED_BILINEAR_FLAGS          |                               \
1192          FAST_PATH_NONE_REPEAT          |                               \
1193          FAST_PATH_X_UNIT_POSITIVE),                                    \
1194         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
1195         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1196         fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,    \
1197     }
1198
1199 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func)            \
1200     {   PIXMAN_OP_ ## op,                                               \
1201         PIXMAN_ ## s,                                                   \
1202         SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,  \
1203         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
1204         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1205         fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,   \
1206     }
1207
1208 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL(op,s,d,func)           \
1209     {   PIXMAN_OP_ ## op,                                               \
1210         PIXMAN_ ## s,                                                   \
1211         (SCALED_BILINEAR_FLAGS          |                               \
1212          FAST_PATH_NORMAL_REPEAT        |                               \
1213          FAST_PATH_X_UNIT_POSITIVE),                                    \
1214         PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
1215         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1216         fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,  \
1217     }
1218
1219 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func)           \
1220     {   PIXMAN_OP_ ## op,                                               \
1221         PIXMAN_ ## s,                                                   \
1222         (SCALED_BILINEAR_FLAGS          |                               \
1223          FAST_PATH_PAD_REPEAT           |                               \
1224          FAST_PATH_X_UNIT_POSITIVE),                                    \
1225         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
1226         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1227         fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,     \
1228     }
1229
1230 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func)          \
1231     {   PIXMAN_OP_ ## op,                                               \
1232         PIXMAN_ ## s,                                                   \
1233         (SCALED_BILINEAR_FLAGS          |                               \
1234          FAST_PATH_NONE_REPEAT          |                               \
1235          FAST_PATH_X_UNIT_POSITIVE),                                    \
1236         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
1237         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1238         fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,    \
1239     }
1240
1241 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func)         \
1242     {   PIXMAN_OP_ ## op,                                               \
1243         PIXMAN_ ## s,                                                   \
1244         SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,  \
1245         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
1246         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1247         fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,   \
1248     }
1249
1250 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func)        \
1251     {   PIXMAN_OP_ ## op,                                               \
1252         PIXMAN_ ## s,                                                   \
1253         (SCALED_BILINEAR_FLAGS          |                               \
1254          FAST_PATH_NORMAL_REPEAT        |                               \
1255          FAST_PATH_X_UNIT_POSITIVE),                                    \
1256         PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
1257         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1258         fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,  \
1259     }
1260
1261 /* Prefer the use of 'cover' variant, because it is faster */
1262 #define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func)                          \
1263     SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func),                      \
1264     SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func),                       \
1265     SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func),                        \
1266     SIMPLE_BILINEAR_FAST_PATH_NORMAL (op,s,d,func)
1267
1268 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func)                  \
1269     SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func),              \
1270     SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func),               \
1271     SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func),                \
1272     SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL (op,s,d,func)
1273
1274 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func)               \
1275     SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func),           \
1276     SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func),            \
1277     SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func),             \
1278     SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func)
1279
1280 #endif