Use the implementation's combiner's
[profile/ivi/pixman.git] / pixman / pixman-general.c
1 /*
2  * Copyright © 2009 Red Hat, Inc.
3  * Copyright © 2000 SuSE, Inc.
4  * Copyright © 2007 Red Hat, Inc.
5  * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
6  *             2005 Lars Knoll & Zack Rusin, Trolltech
7  *             2008 Aaron Plattner, NVIDIA Corporation
8  *
9  * Permission to use, copy, modify, distribute, and sell this software and its
10  * documentation for any purpose is hereby granted without fee, provided that
11  * the above copyright notice appear in all copies and that both that
12  * copyright notice and this permission notice appear in supporting
13  * documentation, and that the name of Red Hat not be used in advertising or
14  * publicity pertaining to distribution of the software without specific,
15  * written prior permission.  Red Hat makes no representations about the
16  * suitability of this software for any purpose.  It is provided "as is"
17  * without express or implied warranty.
18  *
19  * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
20  * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
21  * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
22  * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
23  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
24  * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
25  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
26  * SOFTWARE.
27  */
28 #include <config.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <math.h>
32 #include <assert.h>
33 #include <limits.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include "pixman-private.h"
38 #include "pixman-mmx.h"
39 #include "pixman-vmx.h"
40 #include "pixman-sse2.h"
41 #include "pixman-arm-simd.h"
42 #include "pixman-combine32.h"
43 #include "pixman-private.h"
44
45
46 static void
47 general_combine_32 (pixman_implementation_t *imp, pixman_op_t op,
48                     uint32_t *dest, const uint32_t *src, const uint32_t *mask,
49                     int width)
50 {
51     CombineFunc32 f = pixman_composeFunctions.combineU[op];
52
53     f (dest, src, mask, width);
54 }
55
56 static void
57 general_combine_32_ca (pixman_implementation_t *imp, pixman_op_t op,
58                        uint32_t *dest, const uint32_t *src, const uint32_t *mask,
59                        int width)
60 {
61     CombineFunc32 f = pixman_composeFunctions.combineC[op];
62
63     f (dest, src, mask, width);
64 }
65
66 static void
67 general_combine_64 (pixman_implementation_t *imp, pixman_op_t op,
68                     uint64_t *dest, const uint64_t *src, const uint64_t *mask,
69                     int width)
70 {
71     CombineFunc64 f = pixman_composeFunctions64.combineU[op];
72
73     f (dest, src, mask, width);
74 }
75
76 static void
77 general_combine_64_ca (pixman_implementation_t *imp, pixman_op_t op,
78                        uint64_t *dest, const uint64_t *src, const uint64_t *mask,
79                        int width)
80 {
81     CombineFunc64 f = pixman_composeFunctions64.combineC[op];
82
83     f (dest, src, mask, width);
84 }
85
86 static void
87 pixman_composite_rect_general_internal (pixman_implementation_t *imp,
88                                         const FbComposeData *data,
89                                         void *src_buffer, void *mask_buffer, 
90                                         void *dest_buffer, const int wide)
91 {
92     int i;
93     scanStoreProc store;
94     scanFetchProc fetchSrc = NULL, fetchMask = NULL, fetchDest = NULL;
95     uint32_t *bits;
96     int32_t stride;
97     source_pict_class_t srcClass, maskClass;
98     pixman_bool_t component_alpha;
99
100     srcClass = _pixman_image_classify (data->src,
101                                        data->xSrc, data->ySrc,
102                                        data->width, data->height);
103
104     maskClass = SOURCE_IMAGE_CLASS_UNKNOWN;
105     if (data->mask)
106     {
107         maskClass = _pixman_image_classify (data->mask,
108                                             data->xSrc, data->ySrc,
109                                             data->width, data->height);
110     }
111     
112     if (data->op == PIXMAN_OP_CLEAR)
113         fetchSrc = NULL;
114     else if (wide)
115         fetchSrc = _pixman_image_get_scanline_64;
116     else
117         fetchSrc = _pixman_image_get_scanline_32;
118
119     if (!data->mask || data->op == PIXMAN_OP_CLEAR)
120         fetchMask = NULL;
121     else if (wide)
122         fetchMask = _pixman_image_get_scanline_64;
123     else
124         fetchMask = _pixman_image_get_scanline_32;
125
126     if (data->op == PIXMAN_OP_CLEAR || data->op == PIXMAN_OP_SRC)
127         fetchDest = NULL;
128     else if (wide)
129         fetchDest = _pixman_image_get_scanline_64;
130     else
131         fetchDest = _pixman_image_get_scanline_32;
132
133     if (wide)
134         store = _pixman_image_store_scanline_64;
135     else
136         store = _pixman_image_store_scanline_32;
137
138     // Skip the store step and composite directly into the
139     // destination if the output format of the compose func matches
140     // the destination format.
141     if (!wide &&
142         !data->dest->common.alpha_map &&
143         !data->dest->common.write_func && 
144         (data->op == PIXMAN_OP_ADD || data->op == PIXMAN_OP_OVER) &&
145         (data->dest->bits.format == PIXMAN_a8r8g8b8 ||
146          data->dest->bits.format == PIXMAN_x8r8g8b8))
147     {
148         store = NULL;
149     }
150
151     if (!store)
152     {
153         bits = data->dest->bits.bits;
154         stride = data->dest->bits.rowstride;
155     }
156     else
157     {
158         bits = NULL;
159         stride = 0;
160     }
161
162     component_alpha =
163         fetchSrc                   &&
164         fetchMask                  &&
165         data->mask                 &&
166         data->mask->common.type == BITS &&
167         data->mask->common.component_alpha &&
168         PIXMAN_FORMAT_RGB (data->mask->bits.format);
169
170     {
171         pixman_combine_32_func_t compose;
172
173         if (wide)
174         {
175             if (component_alpha)
176                 compose = (pixman_combine_32_func_t)_pixman_implementation_combine_64_ca;
177             else
178                 compose = (pixman_combine_32_func_t)_pixman_implementation_combine_64;
179         }
180         else
181         {
182             if (component_alpha)
183                 compose = _pixman_implementation_combine_32_ca;
184             else
185                 compose = _pixman_implementation_combine_32;
186         }
187
188         if (!compose)
189             return;
190
191         if (!fetchMask)
192             mask_buffer = NULL;
193         
194         for (i = 0; i < data->height; ++i)
195         {
196             /* fill first half of scanline with source */
197             if (fetchSrc)
198             {
199                 if (fetchMask)
200                 {
201                     /* fetch mask before source so that fetching of
202                        source can be optimized */
203                     fetchMask (data->mask, data->xMask, data->yMask + i,
204                                data->width, mask_buffer, 0, 0);
205
206                     if (maskClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
207                         fetchMask = NULL;
208                 }
209
210                 if (srcClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
211                 {
212                     fetchSrc (data->src, data->xSrc, data->ySrc + i,
213                               data->width, src_buffer, 0, 0);
214                     fetchSrc = NULL;
215                 }
216                 else
217                 {
218                     fetchSrc (data->src, data->xSrc, data->ySrc + i,
219                               data->width, src_buffer, mask_buffer,
220                               0xffffffff);
221                 }
222             }
223             else if (fetchMask)
224             {
225                 fetchMask (data->mask, data->xMask, data->yMask + i,
226                            data->width, mask_buffer, 0, 0);
227             }
228
229             if (store)
230             {
231                 /* fill dest into second half of scanline */
232                 if (fetchDest)
233                     fetchDest (data->dest, data->xDest, data->yDest + i,
234                                data->width, dest_buffer, 0, 0);
235
236                 /* blend */
237                 compose (imp, data->op, dest_buffer, src_buffer, mask_buffer, data->width);
238
239                 /* write back */
240                 store (&(data->dest->bits), data->xDest, data->yDest + i, data->width,
241                        dest_buffer);
242             }
243             else
244             {
245                 /* blend */
246                 compose (imp, data->op, bits + (data->yDest + i) * stride +
247                          data->xDest,
248                          src_buffer, mask_buffer, data->width);
249             }
250         }
251     }
252 }
253
254 #define SCANLINE_BUFFER_LENGTH 8192
255
256 static void
257 general_composite_rect (pixman_implementation_t *imp,
258                         const FbComposeData *data)
259 {
260     uint8_t stack_scanline_buffer[SCANLINE_BUFFER_LENGTH * 3];
261     const pixman_format_code_t srcFormat =
262         data->src->type == BITS ? data->src->bits.format : 0;
263     const pixman_format_code_t maskFormat =
264         data->mask && data->mask->type == BITS ? data->mask->bits.format : 0;
265     const pixman_format_code_t destFormat = data->dest->type == BITS ? data->dest->bits.format : 0;
266     const int srcWide = PIXMAN_FORMAT_16BPC(srcFormat);
267     const int maskWide = data->mask && PIXMAN_FORMAT_16BPC(maskFormat);
268     const int destWide = PIXMAN_FORMAT_16BPC(destFormat);
269     const int wide = srcWide || maskWide || destWide;
270     const int Bpp = wide ? 8 : 4;
271     uint8_t *scanline_buffer = stack_scanline_buffer;
272     uint8_t *src_buffer, *mask_buffer, *dest_buffer;
273     
274     if (data->width * Bpp > SCANLINE_BUFFER_LENGTH)
275     {
276         scanline_buffer = pixman_malloc_abc (data->width, 3, Bpp);
277
278         if (!scanline_buffer)
279             return;
280     }
281
282     src_buffer = scanline_buffer;
283     mask_buffer = src_buffer + data->width * Bpp;
284     dest_buffer = mask_buffer + data->width * Bpp;
285
286     pixman_composite_rect_general_internal (imp, data, src_buffer,
287                                             mask_buffer, dest_buffer,
288                                             wide);
289
290     if (scanline_buffer != stack_scanline_buffer)
291         free (scanline_buffer);
292 }
293
294 static void
295 pixman_image_composite_rect  (pixman_implementation_t *imp,
296                               pixman_op_t                   op,
297                               pixman_image_t               *src,
298                               pixman_image_t               *mask,
299                               pixman_image_t               *dest,
300                               int32_t                       src_x,
301                               int32_t                       src_y,
302                               int32_t                       mask_x,
303                               int32_t                       mask_y,
304                               int32_t                       dest_x,
305                               int32_t                       dest_y,
306                               int32_t                      width,
307                               int32_t                      height)
308 {
309     FbComposeData compose_data;
310
311     return_if_fail (src != NULL);
312     return_if_fail (dest != NULL);
313
314     compose_data.op = op;
315     compose_data.src = src;
316     compose_data.mask = mask;
317     compose_data.dest = dest;
318     compose_data.xSrc = src_x;
319     compose_data.ySrc = src_y;
320     compose_data.xMask = mask_x;
321     compose_data.yMask = mask_y;
322     compose_data.xDest = dest_x;
323     compose_data.yDest = dest_y;
324     compose_data.width = width;
325     compose_data.height = height;
326
327     general_composite_rect (imp, &compose_data);
328 }
329
330 #if defined(USE_SSE2) && defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
331
332 /*
333  * Work around GCC bug causing crashes in Mozilla with SSE2
334  * 
335  * When using SSE2 intrinsics, gcc assumes that the stack is 16 byte
336  * aligned. Unfortunately some code, such as Mozilla and Mono contain
337  * code that aligns the stack to 4 bytes.
338  *
339  * The __force_align_arg_pointer__ makes gcc generate a prologue that
340  * realigns the stack pointer to 16 bytes.
341  *
342  * On x86-64 this is not necessary because the standard ABI already
343  * calls for a 16 byte aligned stack.
344  *
345  * See https://bugs.freedesktop.org/show_bug.cgi?id=15693
346  */
347
348 __attribute__((__force_align_arg_pointer__))
349 #endif
350 static void
351 general_composite (pixman_implementation_t *    imp,
352                    pixman_op_t                  op,
353                    pixman_image_t *             src,
354                    pixman_image_t *             mask,
355                    pixman_image_t *             dest,
356                    int32_t                      src_x,
357                    int32_t                      src_y,
358                    int32_t                      mask_x,
359                    int32_t                      mask_y,
360                    int32_t                      dest_x,
361                    int32_t                      dest_y,
362                    int32_t                      width,
363                    int32_t                      height)
364 {
365     pixman_bool_t srcRepeat = src->type == BITS && src->common.repeat == PIXMAN_REPEAT_NORMAL;
366     pixman_bool_t maskRepeat = FALSE;
367     pixman_bool_t srcTransform = src->common.transform != NULL;
368     pixman_bool_t maskTransform = FALSE;
369
370 #ifdef USE_VMX
371     fbComposeSetupVMX();
372 #endif
373
374     if (srcRepeat && srcTransform &&
375         src->bits.width == 1 &&
376         src->bits.height == 1)
377     {
378         srcTransform = FALSE;
379     }
380
381     if (mask && mask->type == BITS)
382     {
383         maskRepeat = mask->common.repeat == PIXMAN_REPEAT_NORMAL;
384
385         maskTransform = mask->common.transform != 0;
386         if (mask->common.filter == PIXMAN_FILTER_CONVOLUTION)
387             maskTransform = TRUE;
388
389         if (maskRepeat && maskTransform &&
390             mask->bits.width == 1 &&
391             mask->bits.height == 1)
392         {
393             maskTransform = FALSE;
394         }
395     }
396     
397 #ifdef USE_VMX
398     if (_pixman_run_fast_path (vmx_fast_paths, imp,
399                                op, src, mask, dest,
400                                src_x, src_y,
401                                mask_x, mask_y,
402                                dest_x, dest_y,
403                                width, height))
404         return;
405 #endif
406
407 #ifdef USE_ARM_NEON
408     if (pixman_have_arm_neon() && _pixman_run_fast_path (arm_neon_fast_paths, imp,
409                                                          op, src, mask, dest,
410                                                          src_x, src_y,
411                                                          mask_x, mask_y,
412                                                          dest_x, dest_y,
413                                                          width, height))
414         return;
415 #endif
416
417 #ifdef USE_ARM_SIMD
418     if (pixman_have_arm_simd() && _pixman_run_fast_path (arm_simd_fast_paths, imp,
419                                                          op, src, mask, dest,
420                                                          src_x, src_y,
421                                                          mask_x, mask_y,
422                                                          dest_x, dest_y,
423                                                          width, height))
424         return;
425 #endif
426
427     if (pixman_have_arm_simd() && _pixman_run_fast_path (c_fast_paths, imp,
428                                                          op, src, mask, dest,
429                                                          src_x, src_y,
430                                                          mask_x, mask_y,
431                                                          dest_x, dest_y,
432                                                          width, height))
433         return;
434     
435     /* CompositeGeneral optimizes 1x1 repeating images itself */
436     if (src->type == BITS &&
437         src->bits.width == 1 && src->bits.height == 1)
438     {
439         srcRepeat = FALSE;
440     }
441     
442     if (mask && mask->type == BITS &&
443         mask->bits.width == 1 && mask->bits.height == 1)
444     {
445         maskRepeat = FALSE;
446     }
447     
448     /* if we are transforming, repeats are handled in fbFetchTransformed */
449     if (srcTransform)
450         srcRepeat = FALSE;
451     
452     if (maskTransform)
453         maskRepeat = FALSE;
454
455     _pixman_walk_composite_region (imp, op, src, mask, dest, src_x, src_y,
456                                    mask_x, mask_y, dest_x, dest_y, width, height,
457                                    srcRepeat, maskRepeat, pixman_image_composite_rect);
458 }
459
460 pixman_implementation_t *
461 _pixman_implementation_create_general (pixman_implementation_t *toplevel)
462 {
463     pixman_implementation_t *imp = _pixman_implementation_create (toplevel, NULL);
464     int i;
465
466     imp->composite = general_composite;
467     
468     for (i = 0; i < PIXMAN_OP_LAST; ++i)
469     {
470         imp->combine_32[i] = general_combine_32;
471         imp->combine_32_ca[i] = general_combine_32_ca;
472         imp->combine_64[i] = general_combine_64;
473         imp->combine_64_ca[i] = general_combine_64_ca;
474     }
475
476     return imp;
477 }