mmx: add add_0565_0565

[profile/ivi/pixman.git] / pixman / pixman.c
diff --git a/pixman/pixman.c b/pixman/pixman.c

index 9af6e2f..8fb5356 100644 (file)
--- a/pixman/pixman.c
+++ b/pixman/pixman.c
@@ -335,210 +335,77 @@ pixman_compute_composite_region32 (pixman_region32_t * region,
      return TRUE;
  }
  
-#define N_CACHED_FAST_PATHS 8
-
  typedef struct
  {
-    struct
-    {
-       pixman_implementation_t *       imp;
-       pixman_fast_path_t              fast_path;
-    } cache [N_CACHED_FAST_PATHS];
-} cache_t;
-
-PIXMAN_DEFINE_THREAD_LOCAL (cache_t, fast_path_cache);
-
-static force_inline pixman_bool_t
-lookup_composite_function (pixman_op_t                 op,
-                          pixman_format_code_t         src_format,
-                          uint32_t                     src_flags,
-                          pixman_format_code_t         mask_format,
-                          uint32_t                     mask_flags,
-                          pixman_format_code_t         dest_format,
-                          uint32_t                     dest_flags,
-                          pixman_implementation_t    **out_imp,
-                          pixman_composite_func_t     *out_func)
-{
-    pixman_implementation_t *imp;
-    cache_t *cache;
-    int i;
-
-    /* Check cache for fast paths */
-    cache = PIXMAN_GET_THREAD_LOCAL (fast_path_cache);
-
-    for (i = 0; i < N_CACHED_FAST_PATHS; ++i)
-    {
-       const pixman_fast_path_t *info = &(cache->cache[i].fast_path);
-
-       /* Note that we check for equality here, not whether
-        * the cached fast path matches. This is to prevent
-        * us from selecting an overly general fast path
-        * when a more specific one would work.
-        */
-       if (info->op == op                      &&
-           info->src_format == src_format      &&
-           info->mask_format == mask_format    &&
-           info->dest_format == dest_format    &&
-           info->src_flags == src_flags        &&
-           info->mask_flags == mask_flags      &&
-           info->dest_flags == dest_flags      &&
-           info->func)
-       {
-           *out_imp = cache->cache[i].imp;
-           *out_func = cache->cache[i].fast_path.func;
-
-           goto update_cache;
-       }
-    }
-
-    for (imp = get_implementation (); imp != NULL; imp = imp->delegate)
-    {
-       const pixman_fast_path_t *info = imp->fast_paths;
-
-       while (info->op != PIXMAN_OP_NONE)
-       {
-           if ((info->op == op || info->op == PIXMAN_OP_any)           &&
-               /* Formats */
-               ((info->src_format == src_format) ||
-                (info->src_format == PIXMAN_any))                      &&
-               ((info->mask_format == mask_format) ||
-                (info->mask_format == PIXMAN_any))                     &&
-               ((info->dest_format == dest_format) ||
-                (info->dest_format == PIXMAN_any))                     &&
-               /* Flags */
-               (info->src_flags & src_flags) == info->src_flags        &&
-               (info->mask_flags & mask_flags) == info->mask_flags     &&
-               (info->dest_flags & dest_flags) == info->dest_flags)
-           {
-               *out_imp = imp;
-               *out_func = info->func;
-
-               /* Set i to the last spot in the cache so that the
-                * move-to-front code below will work
-                */
-               i = N_CACHED_FAST_PATHS - 1;
-
-               goto update_cache;
-           }
-
-           ++info;
-       }
-    }
-    return FALSE;
-
-update_cache:
-    if (i)
-    {
-       while (i--)
-           cache->cache[i + 1] = cache->cache[i];
-
-       cache->cache[0].imp = *out_imp;
-       cache->cache[0].fast_path.op = op;
-       cache->cache[0].fast_path.src_format = src_format;
-       cache->cache[0].fast_path.src_flags = src_flags;
-       cache->cache[0].fast_path.mask_format = mask_format;
-       cache->cache[0].fast_path.mask_flags = mask_flags;
-       cache->cache[0].fast_path.dest_format = dest_format;
-       cache->cache[0].fast_path.dest_flags = dest_flags;
-       cache->cache[0].fast_path.func = *out_func;
-    }
-
-    return TRUE;
-}
+    pixman_fixed_48_16_t       x1;
+    pixman_fixed_48_16_t       y1;
+    pixman_fixed_48_16_t       x2;
+    pixman_fixed_48_16_t       y2;
+} box_48_16_t;
  
  static pixman_bool_t
-compute_sample_extents (pixman_transform_t *transform,
-                       pixman_box32_t *extents,
-                       pixman_fixed_t x_off, pixman_fixed_t y_off,
-                       pixman_fixed_t width, pixman_fixed_t height)
+compute_transformed_extents (pixman_transform_t *transform,
+                            const pixman_box32_t *extents,
+                            box_48_16_t *transformed)
  {
-    pixman_fixed_t x1, y1, x2, y2;
      pixman_fixed_48_16_t tx1, ty1, tx2, ty2;
+    pixman_fixed_t x1, y1, x2, y2;
+    int i;
  
-    /* We have checked earlier that (extents->x1 - x) etc. fit in a pixman_fixed_t */
-    x1 = (pixman_fixed_48_16_t)pixman_int_to_fixed (extents->x1) + pixman_fixed_1 / 2;
-    y1 = (pixman_fixed_48_16_t)pixman_int_to_fixed (extents->y1) + pixman_fixed_1 / 2;
-    x2 = (pixman_fixed_48_16_t)pixman_int_to_fixed (extents->x2) - pixman_fixed_1 / 2;
-    y2 = (pixman_fixed_48_16_t)pixman_int_to_fixed (extents->y2) - pixman_fixed_1 / 2;
+    x1 = pixman_int_to_fixed (extents->x1) + pixman_fixed_1 / 2;
+    y1 = pixman_int_to_fixed (extents->y1) + pixman_fixed_1 / 2;
+    x2 = pixman_int_to_fixed (extents->x2) - pixman_fixed_1 / 2;
+    y2 = pixman_int_to_fixed (extents->y2) - pixman_fixed_1 / 2;
  
      if (!transform)
      {
-       tx1 = (pixman_fixed_48_16_t)x1;
-       ty1 = (pixman_fixed_48_16_t)y1;
-       tx2 = (pixman_fixed_48_16_t)x2;
-       ty2 = (pixman_fixed_48_16_t)y2;
-    }
-    else
-    {
-       int i;
+       transformed->x1 = x1;
+       transformed->y1 = y1;
+       transformed->x2 = x2;
+       transformed->y2 = y2;
  
-       /* Silence GCC */
-       tx1 = ty1 = tx2 = ty2 = 0;
+       return TRUE;
+    }
  
-       for (i = 0; i < 4; ++i)
-       {
-           pixman_fixed_48_16_t tx, ty;
-           pixman_vector_t v;
+    tx1 = ty1 = INT64_MAX;
+    tx2 = ty2 = INT64_MIN;
  
-           v.vector[0] = (i & 0x01)? x1 : x2;
-           v.vector[1] = (i & 0x02)? y1 : y2;
-           v.vector[2] = pixman_fixed_1;
+    for (i = 0; i < 4; ++i)
+    {
+       pixman_fixed_48_16_t tx, ty;
+       pixman_vector_t v;
  
-           if (!pixman_transform_point (transform, &v))
-               return FALSE;
+       v.vector[0] = (i & 0x01)? x1 : x2;
+       v.vector[1] = (i & 0x02)? y1 : y2;
+       v.vector[2] = pixman_fixed_1;
  
-           tx = (pixman_fixed_48_16_t)v.vector[0];
-           ty = (pixman_fixed_48_16_t)v.vector[1];
+       if (!pixman_transform_point (transform, &v))
+           return FALSE;
  
-           if (i == 0)
-           {
-               tx1 = tx;
-               ty1 = ty;
-               tx2 = tx;
-               ty2 = ty;
-           }
-           else
-           {
-               if (tx < tx1)
-                   tx1 = tx;
-               if (ty < ty1)
-                   ty1 = ty;
-               if (tx > tx2)
-                   tx2 = tx;
-               if (ty > ty2)
-                   ty2 = ty;
-           }
-       }
+       tx = (pixman_fixed_48_16_t)v.vector[0];
+       ty = (pixman_fixed_48_16_t)v.vector[1];
+
+       if (tx < tx1)
+           tx1 = tx;
+       if (ty < ty1)
+           ty1 = ty;
+       if (tx > tx2)
+           tx2 = tx;
+       if (ty > ty2)
+           ty2 = ty;
      }
  
-    /* Expand the source area by a tiny bit so account of different rounding that
-     * may happen during sampling. Note that (8 * pixman_fixed_e) is very far from
-     * 0.5 so this won't cause the area computed to be overly pessimistic.
-     */
-    tx1 += x_off - 8 * pixman_fixed_e;
-    ty1 += y_off - 8 * pixman_fixed_e;
-    tx2 += x_off + width + 8 * pixman_fixed_e;
-    ty2 += y_off + height + 8 * pixman_fixed_e;
-
-    if (tx1 < pixman_min_fixed_48_16 || tx1 > pixman_max_fixed_48_16 ||
-       ty1 < pixman_min_fixed_48_16 || ty1 > pixman_max_fixed_48_16 ||
-       tx2 < pixman_min_fixed_48_16 || tx2 > pixman_max_fixed_48_16 ||
-       ty2 < pixman_min_fixed_48_16 || ty2 > pixman_max_fixed_48_16)
-    {
-       return FALSE;
-    }
-    else
-    {
-       extents->x1 = pixman_fixed_to_int (tx1);
-       extents->y1 = pixman_fixed_to_int (ty1);
-       extents->x2 = pixman_fixed_to_int (tx2) + 1;
-       extents->y2 = pixman_fixed_to_int (ty2) + 1;
+    transformed->x1 = tx1;
+    transformed->y1 = ty1;
+    transformed->x2 = tx2;
+    transformed->y2 = ty2;
  
-       return TRUE;
-    }
+    return TRUE;
  }
  
  #define IS_16BIT(x) (((x) >= INT16_MIN) && ((x) <= INT16_MAX))
+#define ABS(f)      (((f) < 0)?  (-(f)) : (f))
+#define IS_16_16(f) (((f) >= pixman_min_fixed_48_16 && ((f) <= pixman_max_fixed_48_16)))
  
  static pixman_bool_t
  analyze_extent (pixman_image_t       *image,
@@ -546,10 +413,11 @@ analyze_extent (pixman_image_t       *image,
                 uint32_t             *flags)
  {
      pixman_transform_t *transform;
-    pixman_fixed_t *params;
      pixman_fixed_t x_off, y_off;
      pixman_fixed_t width, height;
-    pixman_box32_t ex;
+    pixman_fixed_t *params;
+    box_48_16_t transformed;
+    pixman_box32_t exp_extents;
  
      if (!image)
         return TRUE;
@@ -577,15 +445,13 @@ analyze_extent (pixman_image_t       *image,
         if (image->bits.width >= 0x7fff || image->bits.height >= 0x7fff)
             return FALSE;
  
-#define ID_AND_NEAREST (FAST_PATH_ID_TRANSFORM | FAST_PATH_NEAREST_FILTER)
-
-       if ((image->common.flags & ID_AND_NEAREST) == ID_AND_NEAREST &&
+       if ((image->common.flags & FAST_PATH_ID_TRANSFORM) == FAST_PATH_ID_TRANSFORM &&
             extents->x1 >= 0 &&
             extents->y1 >= 0 &&
             extents->x2 <= image->bits.width &&
             extents->y2 <= image->bits.height)
         {
-           *flags |= FAST_PATH_SAMPLES_COVER_CLIP;
+           *flags |= FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
             return TRUE;
         }
  
@@ -619,17 +485,6 @@ analyze_extent (pixman_image_t       *image,
         default:
             return FALSE;
         }
-
-       /* Check whether the non-expanded, transformed extent is entirely within
-        * the source image, and set the FAST_PATH_SAMPLES_COVER_CLIP if it is.
-        */
-       ex = *extents;
-       if (compute_sample_extents (transform, &ex, x_off, y_off, width, height) &&
-           ex.x1 >= 0 && ex.y1 >= 0 &&
-           ex.x2 <= image->bits.width && ex.y2 <= image->bits.height)
-       {
-           *flags |= FAST_PATH_SAMPLES_COVER_CLIP;
-       }
      }
      else
      {
@@ -639,17 +494,57 @@ analyze_extent (pixman_image_t       *image,
         height = 0;
      }
  
-    /* Check that the extents expanded by one don't overflow. This ensures that
-     * compositing functions can simply walk the source space using 16.16
-     * variables without worrying about overflow.
+    if (!compute_transformed_extents (transform, extents, &transformed))
+       return FALSE;
+
+    /* Expand the source area by a tiny bit so account of different rounding that
+     * may happen during sampling. Note that (8 * pixman_fixed_e) is very far from
+     * 0.5 so this won't cause the area computed to be overly pessimistic.
+     */
+    transformed.x1 -= 8 * pixman_fixed_e;
+    transformed.y1 -= 8 * pixman_fixed_e;
+    transformed.x2 += 8 * pixman_fixed_e;
+    transformed.y2 += 8 * pixman_fixed_e;
+
+    if (image->common.type == BITS)
+    {
+       if (pixman_fixed_to_int (transformed.x1) >= 0                   &&
+           pixman_fixed_to_int (transformed.y1) >= 0                   &&
+           pixman_fixed_to_int (transformed.x2) < image->bits.width    &&
+           pixman_fixed_to_int (transformed.y2) < image->bits.height)
+       {
+           *flags |= FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
+       }
+
+       if (pixman_fixed_to_int (transformed.x1 - pixman_fixed_1 / 2) >= 0                &&
+           pixman_fixed_to_int (transformed.y1 - pixman_fixed_1 / 2) >= 0                &&
+           pixman_fixed_to_int (transformed.x2 + pixman_fixed_1 / 2) < image->bits.width &&
+           pixman_fixed_to_int (transformed.y2 + pixman_fixed_1 / 2) < image->bits.height)
+       {
+           *flags |= FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR;
+       }
+    }
+
+    /* Check we don't overflow when the destination extents are expanded by one.
+     * This ensures that compositing functions can simply walk the source space
+     * using 16.16 variables without worrying about overflow.
       */
-    ex.x1 = extents->x1 - 1;
-    ex.y1 = extents->y1 - 1;
-    ex.x2 = extents->x2 + 1;
-    ex.y2 = extents->y2 + 1;
+    exp_extents = *extents;
+    exp_extents.x1 -= 1;
+    exp_extents.y1 -= 1;
+    exp_extents.x2 += 1;
+    exp_extents.y2 += 1;
  
-    if (!compute_sample_extents (transform, &ex, x_off, y_off, width, height))
+    if (!compute_transformed_extents (transform, &exp_extents, &transformed))
+       return FALSE;
+    
+    if (!IS_16_16 (transformed.x1 + x_off - 8 * pixman_fixed_e)        ||
+       !IS_16_16 (transformed.y1 + y_off - 8 * pixman_fixed_e) ||
+       !IS_16_16 (transformed.x2 + x_off + 8 * pixman_fixed_e + width) ||
+       !IS_16_16 (transformed.y2 + y_off + 8 * pixman_fixed_e + height))
+    {
         return FALSE;
+    }
  
      return TRUE;
  }
@@ -755,16 +650,27 @@ pixman_image_composite32 (pixman_op_t      op,
      if (!analyze_extent (mask, &extents, &mask_flags))
         goto out;
  
-    /* If the clip is within the source samples, and the samples are opaque,
-     * then the source is effectively opaque.
+    /* If the clip is within the source samples, and the samples are
+     * opaque, then the source is effectively opaque.
       */
-#define BOTH (FAST_PATH_SAMPLES_OPAQUE | FAST_PATH_SAMPLES_COVER_CLIP)
-
-    if ((src_flags & BOTH) == BOTH)
+#define NEAREST_OPAQUE (FAST_PATH_SAMPLES_OPAQUE |                     \
+                        FAST_PATH_NEAREST_FILTER |                     \
+                        FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
+#define BILINEAR_OPAQUE        (FAST_PATH_SAMPLES_OPAQUE |                     \
+                        FAST_PATH_BILINEAR_FILTER |                    \
+                        FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR)
+
+    if ((src_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE ||
+       (src_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE)
+    {
         src_flags |= FAST_PATH_IS_OPAQUE;
+    }
  
-    if ((mask_flags & BOTH) == BOTH)
+    if ((mask_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE ||
+       (mask_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE)
+    {
         mask_flags |= FAST_PATH_IS_OPAQUE;
+    }
  
      /*
       * Check if we can replace our operator by a simpler one
@@ -773,11 +679,10 @@ pixman_image_composite32 (pixman_op_t      op,
       */
      op = optimize_operator (op, src_flags, mask_flags, dest_flags);
  
-    if (lookup_composite_function (op,
-                                  src_format, src_flags,
-                                  mask_format, mask_flags,
-                                  dest_format, dest_flags,
-                                  &imp, &func))
+    if (_pixman_lookup_composite_function (
+           get_implementation (), op,
+           src_format, src_flags, mask_format, mask_flags, dest_format, dest_flags,
+           &imp, &func))
      {
         pixman_composite_info_t info;
         const pixman_box32_t *pbox;
@@ -787,6 +692,9 @@ pixman_image_composite32 (pixman_op_t      op,
         info.src_image = src;
         info.mask_image = mask;
         info.dest_image = dest;
+       info.src_flags = src_flags;
+       info.mask_flags = mask_flags;
+       info.dest_flags = dest_flags;
  
         pbox = pixman_region32_rectangles (&region, &n);