Rename FAST_PATH_NO_WIDE_FORMAT to FAST_PATH_NARROW_FORMAT
[profile/ivi/pixman.git] / pixman / pixman-fast-path.h
1 /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
2 /*
3  * Copyright © 2000 SuSE, Inc.
4  * Copyright © 2007 Red Hat, Inc.
5  *
6  * Permission to use, copy, modify, distribute, and sell this software and its
7  * documentation for any purpose is hereby granted without fee, provided that
8  * the above copyright notice appear in all copies and that both that
9  * copyright notice and this permission notice appear in supporting
10  * documentation, and that the name of SuSE not be used in advertising or
11  * publicity pertaining to distribution of the software without specific,
12  * written prior permission.  SuSE makes no representations about the
13  * suitability of this software for any purpose.  It is provided "as is"
14  * without express or implied warranty.
15  *
16  * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
18  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22  *
23  * Author:  Keith Packard, SuSE, Inc.
24  */
25
26 #ifndef PIXMAN_FAST_PATH_H__
27 #define PIXMAN_FAST_PATH_H__
28
29 #include "pixman-private.h"
30
31 #define PIXMAN_REPEAT_COVER -1
32
33 static force_inline pixman_bool_t
34 repeat (pixman_repeat_t repeat, int *c, int size)
35 {
36     if (repeat == PIXMAN_REPEAT_NONE)
37     {
38         if (*c < 0 || *c >= size)
39             return FALSE;
40     }
41     else if (repeat == PIXMAN_REPEAT_NORMAL)
42     {
43         while (*c >= size)
44             *c -= size;
45         while (*c < 0)
46             *c += size;
47     }
48     else if (repeat == PIXMAN_REPEAT_PAD)
49     {
50         *c = CLIP (*c, 0, size - 1);
51     }
52     else /* REFLECT */
53     {
54         *c = MOD (*c, size * 2);
55         if (*c >= size)
56             *c = size * 2 - *c - 1;
57     }
58     return TRUE;
59 }
60
61 /*
62  * For each scanline fetched from source image with PAD repeat:
63  * - calculate how many pixels need to be padded on the left side
64  * - calculate how many pixels need to be padded on the right side
65  * - update width to only count pixels which are fetched from the image
66  * All this information is returned via 'width', 'left_pad', 'right_pad'
67  * arguments. The code is assuming that 'unit_x' is positive.
68  *
69  * Note: 64-bit math is used in order to avoid potential overflows, which
70  *       is probably excessive in many cases. This particular function
71  *       may need its own correctness test and performance tuning.
72  */
73 static force_inline void
74 pad_repeat_get_scanline_bounds (int32_t         source_image_width,
75                                 pixman_fixed_t  vx,
76                                 pixman_fixed_t  unit_x,
77                                 int32_t *       width,
78                                 int32_t *       left_pad,
79                                 int32_t *       right_pad)
80 {
81     int64_t max_vx = (int64_t) source_image_width << 16;
82     int64_t tmp;
83     if (vx < 0)
84     {
85         tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
86         if (tmp > *width)
87         {
88             *left_pad = *width;
89             *width = 0;
90         }
91         else
92         {
93             *left_pad = (int32_t) tmp;
94             *width -= (int32_t) tmp;
95         }
96     }
97     else
98     {
99         *left_pad = 0;
100     }
101     tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
102     if (tmp < 0)
103     {
104         *right_pad = *width;
105         *width = 0;
106     }
107     else if (tmp >= *width)
108     {
109         *right_pad = 0;
110     }
111     else
112     {
113         *right_pad = *width - (int32_t) tmp;
114         *width = (int32_t) tmp;
115     }
116 }
117
118 /* A macroified version of specialized nearest scalers for some
119  * common 8888 and 565 formats. It supports SRC and OVER ops.
120  *
121  * There are two repeat versions, one that handles repeat normal,
122  * and one without repeat handling that only works if the src region
123  * used is completely covered by the pre-repeated source samples.
124  *
125  * The loops are unrolled to process two pixels per iteration for better
126  * performance on most CPU architectures (superscalar processors
127  * can issue several operations simultaneously, other processors can hide
128  * instructions latencies by pipelining operations). Unrolling more
129  * does not make much sense because the compiler will start running out
130  * of spare registers soon.
131  */
132
133 #define GET_8888_ALPHA(s) ((s) >> 24)
134  /* This is not actually used since we don't have an OVER with
135     565 source, but it is needed to build. */
136 #define GET_0565_ALPHA(s) 0xff
137
138 #define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT,                       \
139                               src_type_t, dst_type_t, OP, repeat_mode)                          \
140 static force_inline void                                                                        \
141 scanline_func_name (dst_type_t     *dst,                                                        \
142                     src_type_t     *src,                                                        \
143                     int32_t         w,                                                          \
144                     pixman_fixed_t  vx,                                                         \
145                     pixman_fixed_t  unit_x,                                                     \
146                     pixman_fixed_t  max_vx)                                                     \
147 {                                                                                               \
148         uint32_t   d;                                                                           \
149         src_type_t s1, s2;                                                                      \
150         uint8_t    a1, a2;                                                                      \
151         int        x1, x2;                                                                      \
152                                                                                                 \
153         if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER)            \
154             abort();                                                                            \
155                                                                                                 \
156         while ((w -= 2) >= 0)                                                                   \
157         {                                                                                       \
158             x1 = vx >> 16;                                                                      \
159             vx += unit_x;                                                                       \
160             if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                          \
161             {                                                                                   \
162                 /* This works because we know that unit_x is positive */                        \
163                 while (vx >= max_vx)                                                            \
164                     vx -= max_vx;                                                               \
165             }                                                                                   \
166             s1 = src[x1];                                                                       \
167                                                                                                 \
168             x2 = vx >> 16;                                                                      \
169             vx += unit_x;                                                                       \
170             if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                          \
171             {                                                                                   \
172                 /* This works because we know that unit_x is positive */                        \
173                 while (vx >= max_vx)                                                            \
174                     vx -= max_vx;                                                               \
175             }                                                                                   \
176             s2 = src[x2];                                                                       \
177                                                                                                 \
178             if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)                                             \
179             {                                                                                   \
180                 a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);                                          \
181                 a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2);                                          \
182                                                                                                 \
183                 if (a1 == 0xff)                                                                 \
184                 {                                                                               \
185                     *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);                   \
186                 }                                                                               \
187                 else if (s1)                                                                    \
188                 {                                                                               \
189                     d = CONVERT_ ## DST_FORMAT ## _TO_8888 (*dst);                              \
190                     s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1);                               \
191                     a1 ^= 0xff;                                                                 \
192                     UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);                                        \
193                     *dst = CONVERT_8888_TO_ ## DST_FORMAT (d);                                  \
194                 }                                                                               \
195                 dst++;                                                                          \
196                                                                                                 \
197                 if (a2 == 0xff)                                                                 \
198                 {                                                                               \
199                     *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2);                   \
200                 }                                                                               \
201                 else if (s2)                                                                    \
202                 {                                                                               \
203                     d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst);                               \
204                     s2 = CONVERT_## SRC_FORMAT ## _TO_8888 (s2);                                \
205                     a2 ^= 0xff;                                                                 \
206                     UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2);                                        \
207                     *dst = CONVERT_8888_TO_ ## DST_FORMAT (d);                                  \
208                 }                                                                               \
209                 dst++;                                                                          \
210             }                                                                                   \
211             else /* PIXMAN_OP_SRC */                                                            \
212             {                                                                                   \
213                 *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);                     \
214                 *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2);                     \
215             }                                                                                   \
216         }                                                                                       \
217                                                                                                 \
218         if (w & 1)                                                                              \
219         {                                                                                       \
220             x1 = vx >> 16;                                                                      \
221             s1 = src[x1];                                                                       \
222                                                                                                 \
223             if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)                                             \
224             {                                                                                   \
225                 a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);                                          \
226                                                                                                 \
227                 if (a1 == 0xff)                                                                 \
228                 {                                                                               \
229                     *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);                   \
230                 }                                                                               \
231                 else if (s1)                                                                    \
232                 {                                                                               \
233                     d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst);                               \
234                     s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1);                               \
235                     a1 ^= 0xff;                                                                 \
236                     UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);                                        \
237                     *dst = CONVERT_8888_TO_ ## DST_FORMAT (d);                                  \
238                 }                                                                               \
239                 dst++;                                                                          \
240             }                                                                                   \
241             else /* PIXMAN_OP_SRC */                                                            \
242             {                                                                                   \
243                 *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);                     \
244             }                                                                                   \
245         }                                                                                       \
246 }
247
248 #define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t,           \
249                               repeat_mode)                                                      \
250 static void                                                                                     \
251 fast_composite_scaled_nearest_ ## scale_func_name (pixman_implementation_t *imp,                \
252                                                    pixman_op_t              op,                 \
253                                                    pixman_image_t *         src_image,          \
254                                                    pixman_image_t *         mask_image,         \
255                                                    pixman_image_t *         dst_image,          \
256                                                    int32_t                  src_x,              \
257                                                    int32_t                  src_y,              \
258                                                    int32_t                  mask_x,             \
259                                                    int32_t                  mask_y,             \
260                                                    int32_t                  dst_x,              \
261                                                    int32_t                  dst_y,              \
262                                                    int32_t                  width,              \
263                                                    int32_t                  height)             \
264 {                                                                                               \
265     dst_type_t *dst_line;                                                                       \
266     src_type_t *src_first_line;                                                                 \
267     int       y;                                                                                \
268     pixman_fixed_t max_vx = max_vx; /* suppress uninitialized variable warning */               \
269     pixman_fixed_t max_vy;                                                                      \
270     pixman_vector_t v;                                                                          \
271     pixman_fixed_t vx, vy;                                                                      \
272     pixman_fixed_t unit_x, unit_y;                                                              \
273     int32_t left_pad, right_pad;                                                                \
274                                                                                                 \
275     src_type_t *src;                                                                            \
276     dst_type_t *dst;                                                                            \
277     int       src_stride, dst_stride;                                                           \
278                                                                                                 \
279     PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1);       \
280     /* pass in 0 instead of src_x and src_y because src_x and src_y need to be                  \
281      * transformed from destination space to source space */                                    \
282     PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);         \
283                                                                                                 \
284     /* reference point is the center of the pixel */                                            \
285     v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;                             \
286     v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;                             \
287     v.vector[2] = pixman_fixed_1;                                                               \
288                                                                                                 \
289     if (!pixman_transform_point_3d (src_image->common.transform, &v))                           \
290         return;                                                                                 \
291                                                                                                 \
292     unit_x = src_image->common.transform->matrix[0][0];                                         \
293     unit_y = src_image->common.transform->matrix[1][1];                                         \
294                                                                                                 \
295     /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */                   \
296     v.vector[0] -= pixman_fixed_e;                                                              \
297     v.vector[1] -= pixman_fixed_e;                                                              \
298                                                                                                 \
299     vx = v.vector[0];                                                                           \
300     vy = v.vector[1];                                                                           \
301                                                                                                 \
302     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                                  \
303     {                                                                                           \
304         /* Clamp repeating positions inside the actual samples */                               \
305         max_vx = src_image->bits.width << 16;                                                   \
306         max_vy = src_image->bits.height << 16;                                                  \
307                                                                                                 \
308         repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx);                                             \
309         repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);                                             \
310     }                                                                                           \
311                                                                                                 \
312     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD ||                                   \
313         PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)                                    \
314     {                                                                                           \
315         pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x,                      \
316                                         &width, &left_pad, &right_pad);                         \
317         vx += left_pad * unit_x;                                                                \
318     }                                                                                           \
319                                                                                                 \
320     while (--height >= 0)                                                                       \
321     {                                                                                           \
322         dst = dst_line;                                                                         \
323         dst_line += dst_stride;                                                                 \
324                                                                                                 \
325         y = vy >> 16;                                                                           \
326         vy += unit_y;                                                                           \
327         if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                              \
328             repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);                                         \
329         if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)                                 \
330         {                                                                                       \
331             repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height);                             \
332             src = src_first_line + src_stride * y;                                              \
333             if (left_pad > 0)                                                                   \
334             {                                                                                   \
335                 scanline_func (dst, src, left_pad, 0, 0, 0);                                    \
336             }                                                                                   \
337             if (width > 0)                                                                      \
338             {                                                                                   \
339                 scanline_func (dst + left_pad, src, width, vx, unit_x, 0);                      \
340             }                                                                                   \
341             if (right_pad > 0)                                                                  \
342             {                                                                                   \
343                 scanline_func (dst + left_pad + width, src + src_image->bits.width - 1,         \
344                                 right_pad, 0, 0, 0);                                            \
345             }                                                                                   \
346         }                                                                                       \
347         else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)                           \
348         {                                                                                       \
349             static src_type_t zero = 0;                                                         \
350             if (y < 0 || y >= src_image->bits.height)                                           \
351             {                                                                                   \
352                 scanline_func (dst, &zero, left_pad + width + right_pad, 0, 0, 0);              \
353                 continue;                                                                       \
354             }                                                                                   \
355             src = src_first_line + src_stride * y;                                              \
356             if (left_pad > 0)                                                                   \
357             {                                                                                   \
358                 scanline_func (dst, &zero, left_pad, 0, 0, 0);                                  \
359             }                                                                                   \
360             if (width > 0)                                                                      \
361             {                                                                                   \
362                 scanline_func (dst + left_pad, src, width, vx, unit_x, 0);                      \
363             }                                                                                   \
364             if (right_pad > 0)                                                                  \
365             {                                                                                   \
366                 scanline_func (dst + left_pad + width, &zero, right_pad, 0, 0, 0);              \
367             }                                                                                   \
368         }                                                                                       \
369         else                                                                                    \
370         {                                                                                       \
371             src = src_first_line + src_stride * y;                                              \
372             scanline_func (dst, src, width, vx, unit_x, max_vx);                                \
373         }                                                                                       \
374     }                                                                                           \
375 }
376
377 #define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT,                           \
378                      src_type_t, dst_type_t, OP, repeat_mode)                           \
379     FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,       \
380                           SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t,               \
381                           OP, repeat_mode)                                              \
382     FAST_NEAREST_MAINLOOP(scale_func_name##_##OP,                                       \
383                           scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,       \
384                           src_type_t, dst_type_t, repeat_mode)
385
386
387 #define SCALED_NEAREST_FLAGS                                            \
388     (FAST_PATH_SCALE_TRANSFORM  |                                       \
389      FAST_PATH_NO_ALPHA_MAP     |                                       \
390      FAST_PATH_NEAREST_FILTER   |                                       \
391      FAST_PATH_NO_ACCESSORS     |                                       \
392      FAST_PATH_NARROW_FORMAT)
393
394 #define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func)                    \
395     {   PIXMAN_OP_ ## op,                                               \
396         PIXMAN_ ## s,                                                   \
397         (SCALED_NEAREST_FLAGS           |                               \
398          FAST_PATH_NORMAL_REPEAT        |                               \
399          FAST_PATH_X_UNIT_POSITIVE),                                    \
400         PIXMAN_null, 0,                                                 \
401         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
402         fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,   \
403     }
404
405 #define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func)                       \
406     {   PIXMAN_OP_ ## op,                                               \
407         PIXMAN_ ## s,                                                   \
408         (SCALED_NEAREST_FLAGS           |                               \
409          FAST_PATH_PAD_REPEAT           |                               \
410          FAST_PATH_X_UNIT_POSITIVE),                                    \
411         PIXMAN_null, 0,                                                 \
412         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
413         fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,      \
414     }
415
416 #define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func)                      \
417     {   PIXMAN_OP_ ## op,                                               \
418         PIXMAN_ ## s,                                                   \
419         (SCALED_NEAREST_FLAGS           |                               \
420          FAST_PATH_NONE_REPEAT          |                               \
421          FAST_PATH_X_UNIT_POSITIVE),                                    \
422         PIXMAN_null, 0,                                                 \
423         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
424         fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,     \
425     }
426
427 #define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func)                     \
428     {   PIXMAN_OP_ ## op,                                               \
429         PIXMAN_ ## s,                                                   \
430         SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP,            \
431         PIXMAN_null, 0,                                                 \
432         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
433         fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,    \
434     }
435
436 /* Prefer the use of 'cover' variant, because it is faster */
437 #define SIMPLE_NEAREST_FAST_PATH(op,s,d,func)                           \
438     SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func),                       \
439     SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func),                        \
440     SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func),                         \
441     SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
442
443 #endif