Nearest scaling fast path macro split into two parts
authorSiarhei Siamashka <siarhei.siamashka@nokia.com>
Thu, 16 Sep 2010 10:02:18 +0000 (13:02 +0300)
committerSiarhei Siamashka <siarhei.siamashka@nokia.com>
Tue, 21 Sep 2010 10:29:55 +0000 (13:29 +0300)
Scanline processing is now split into a separate function. This provides
an easy way of overriding it with a platform specific implementation,
which may use SIMD optimizations. Only basic C data types are used as
the arguments for this function, so it may be implemented entirely in
assembly or be generated by some JIT engine.

Also as a result of this split, the complexity of code is reduced a
bit and now it should be easier to introduce support for the currently
missing NONE, PAD and REFLECT repeat types.

pixman/pixman-fast-path.h

index f289288..7babd66 100644 (file)
@@ -76,98 +76,24 @@ repeat (pixman_repeat_t repeat, int *c, int size)
     565 source, but it is needed to build. */
 #define GET_0565_ALPHA(s) 0xff
 
-#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT,                                  \
-                    src_type_t, dst_type_t, OP, repeat_mode)                                   \
-static void                                                                                    \
-fast_composite_scaled_nearest_ ## scale_func_name ## _ ## OP (pixman_implementation_t *imp,    \
-                                                             pixman_op_t              op,      \
-                                                             pixman_image_t *         src_image, \
-                                                             pixman_image_t *         mask_image, \
-                                                             pixman_image_t *         dst_image, \
-                                                             int32_t                  src_x,   \
-                                                             int32_t                  src_y,   \
-                                                             int32_t                  mask_x,  \
-                                                             int32_t                  mask_y,  \
-                                                             int32_t                  dst_x,   \
-                                                             int32_t                  dst_y,   \
-                                                             int32_t                  width,   \
-                                                             int32_t                  height)  \
+#define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT,                      \
+                             src_type_t, dst_type_t, OP, repeat_mode)                          \
+static force_inline void                                                                       \
+scanline_func_name (dst_type_t     *dst,                                                       \
+                   src_type_t     *src,                                                        \
+                   int32_t         w,                                                          \
+                   pixman_fixed_t  vx,                                                         \
+                   pixman_fixed_t  unit_x,                                                     \
+                   pixman_fixed_t  max_vx)                                                     \
 {                                                                                              \
-    dst_type_t *dst_line;                                                                      \
-    src_type_t *src_first_line;                                                                        \
-    uint32_t   d;                                                                              \
-    src_type_t s1, s2;                                                                         \
-    uint8_t   a1, a2;                                                                          \
-    int       w;                                                                               \
-    int       x1, x2, y;                                                                       \
-    pixman_fixed_t orig_vx;                                                                    \
-    pixman_fixed_t max_vx, max_vy;                                                             \
-    pixman_vector_t v;                                                                         \
-    pixman_fixed_t vx, vy;                                                                     \
-    pixman_fixed_t unit_x, unit_y;                                                             \
-                                                                                               \
-    src_type_t *src;                                                                           \
-    dst_type_t *dst;                                                                           \
-    int       src_stride, dst_stride;                                                          \
-                                                                                               \
-    if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER)               \
-       abort();                                                                                \
-                                                                                               \
-    if (PIXMAN_REPEAT_ ## repeat_mode != PIXMAN_REPEAT_NORMAL          &&                      \
-       PIXMAN_REPEAT_ ## repeat_mode != PIXMAN_REPEAT_NONE)                                    \
-    {                                                                                          \
-       abort();                                                                                \
-    }                                                                                          \
-                                                                                               \
-    PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1);      \
-    /* pass in 0 instead of src_x and src_y because src_x and src_y need to be                 \
-     * transformed from destination space to source space */                                   \
-    PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);                \
-                                                                                               \
-    /* reference point is the center of the pixel */                                           \
-    v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;                            \
-    v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;                            \
-    v.vector[2] = pixman_fixed_1;                                                              \
-                                                                                               \
-    if (!pixman_transform_point_3d (src_image->common.transform, &v))                          \
-       return;                                                                                 \
-                                                                                               \
-    unit_x = src_image->common.transform->matrix[0][0];                                                \
-    unit_y = src_image->common.transform->matrix[1][1];                                                \
+       uint32_t   d;                                                                           \
+       src_type_t s1, s2;                                                                      \
+       uint8_t    a1, a2;                                                                      \
+       int        x1, x2;                                                                      \
                                                                                                \
-    /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */                  \
-    v.vector[0] -= pixman_fixed_e;                                                             \
-    v.vector[1] -= pixman_fixed_e;                                                             \
-                                                                                               \
-    vx = v.vector[0];                                                                          \
-    vy = v.vector[1];                                                                          \
-                                                                                               \
-    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                                 \
-    {                                                                                          \
-       /* Clamp repeating positions inside the actual samples */                               \
-       max_vx = src_image->bits.width << 16;                                                   \
-       max_vy = src_image->bits.height << 16;                                                  \
-                                                                                               \
-       repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx);                                             \
-       repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);                                             \
-    }                                                                                          \
-                                                                                               \
-    orig_vx = vx;                                                                              \
+       if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER)            \
+           abort();                                                                            \
                                                                                                \
-    while (--height >= 0)                                                                      \
-    {                                                                                          \
-       dst = dst_line;                                                                         \
-       dst_line += dst_stride;                                                                 \
-                                                                                               \
-       y = vy >> 16;                                                                           \
-       vy += unit_y;                                                                           \
-       if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                              \
-           repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);                                         \
-                                                                                               \
-       src = src_first_line + src_stride * y;                                                  \
-                                                                                               \
-       w = width;                                                                              \
-       vx = orig_vx;                                                                           \
        while ((w -= 2) >= 0)                                                                   \
        {                                                                                       \
            x1 = vx >> 16;                                                                      \
@@ -258,9 +184,103 @@ fast_composite_scaled_nearest_ ## scale_func_name ## _ ## OP (pixman_implementat
                *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);                     \
            }                                                                                   \
        }                                                                                       \
+}
+
+#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t,          \
+                             repeat_mode)                                                      \
+static void                                                                                    \
+fast_composite_scaled_nearest_ ## scale_func_name (pixman_implementation_t *imp,               \
+                                                  pixman_op_t              op,                 \
+                                                  pixman_image_t *         src_image,          \
+                                                  pixman_image_t *         mask_image,         \
+                                                  pixman_image_t *         dst_image,          \
+                                                  int32_t                  src_x,              \
+                                                  int32_t                  src_y,              \
+                                                  int32_t                  mask_x,             \
+                                                  int32_t                  mask_y,             \
+                                                  int32_t                  dst_x,              \
+                                                  int32_t                  dst_y,              \
+                                                  int32_t                  width,              \
+                                                  int32_t                  height)             \
+{                                                                                              \
+    dst_type_t *dst_line;                                                                      \
+    src_type_t *src_first_line;                                                                        \
+    int       y;                                                                               \
+    pixman_fixed_t max_vx = max_vx; /* suppress uninitialized variable warning */              \
+    pixman_fixed_t max_vy;                                                                     \
+    pixman_vector_t v;                                                                         \
+    pixman_fixed_t vx, vy;                                                                     \
+    pixman_fixed_t unit_x, unit_y;                                                             \
+                                                                                               \
+    src_type_t *src;                                                                           \
+    dst_type_t *dst;                                                                           \
+    int       src_stride, dst_stride;                                                          \
+                                                                                               \
+    if (PIXMAN_REPEAT_ ## repeat_mode != PIXMAN_REPEAT_NORMAL          &&                      \
+       PIXMAN_REPEAT_ ## repeat_mode != PIXMAN_REPEAT_NONE)                                    \
+    {                                                                                          \
+       abort();                                                                                \
+    }                                                                                          \
+                                                                                               \
+    PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1);      \
+    /* pass in 0 instead of src_x and src_y because src_x and src_y need to be                 \
+     * transformed from destination space to source space */                                   \
+    PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);                \
+                                                                                               \
+    /* reference point is the center of the pixel */                                           \
+    v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;                            \
+    v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;                            \
+    v.vector[2] = pixman_fixed_1;                                                              \
+                                                                                               \
+    if (!pixman_transform_point_3d (src_image->common.transform, &v))                          \
+       return;                                                                                 \
+                                                                                               \
+    unit_x = src_image->common.transform->matrix[0][0];                                                \
+    unit_y = src_image->common.transform->matrix[1][1];                                                \
+                                                                                               \
+    /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */                  \
+    v.vector[0] -= pixman_fixed_e;                                                             \
+    v.vector[1] -= pixman_fixed_e;                                                             \
+                                                                                               \
+    vx = v.vector[0];                                                                          \
+    vy = v.vector[1];                                                                          \
+                                                                                               \
+    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                                 \
+    {                                                                                          \
+       /* Clamp repeating positions inside the actual samples */                               \
+       max_vx = src_image->bits.width << 16;                                                   \
+       max_vy = src_image->bits.height << 16;                                                  \
+                                                                                               \
+       repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx);                                             \
+       repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);                                             \
+    }                                                                                          \
+                                                                                               \
+    while (--height >= 0)                                                                      \
+    {                                                                                          \
+       dst = dst_line;                                                                         \
+       dst_line += dst_stride;                                                                 \
+                                                                                               \
+       y = vy >> 16;                                                                           \
+       vy += unit_y;                                                                           \
+       if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                              \
+           repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);                                         \
+                                                                                               \
+       src = src_first_line + src_stride * y;                                                  \
+                                                                                               \
+       scanline_func (dst, src, width, vx, unit_x, max_vx);                                    \
     }                                                                                          \
 }
 
+#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT,                          \
+                    src_type_t, dst_type_t, OP, repeat_mode)                           \
+    FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,      \
+                         SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t,               \
+                         OP, repeat_mode)                                              \
+    FAST_NEAREST_MAINLOOP(scale_func_name##_##OP,                                      \
+                         scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,       \
+                         src_type_t, dst_type_t, repeat_mode)
+
+
 #define SCALED_NEAREST_FLAGS                                           \
     (FAST_PATH_SCALE_TRANSFORM |                                       \
      FAST_PATH_NO_ALPHA_MAP    |                                       \
@@ -268,7 +288,7 @@ fast_composite_scaled_nearest_ ## scale_func_name ## _ ## OP (pixman_implementat
      FAST_PATH_NO_ACCESSORS    |                                       \
      FAST_PATH_NO_WIDE_FORMAT)
 
-#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func)                          \
+#define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func)                   \
     {   PIXMAN_OP_ ## op,                                              \
        PIXMAN_ ## s,                                                   \
        (SCALED_NEAREST_FLAGS           |                               \
@@ -277,7 +297,9 @@ fast_composite_scaled_nearest_ ## scale_func_name ## _ ## OP (pixman_implementat
        PIXMAN_null, 0,                                                 \
        PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
        fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,   \
-    },                                                                 \
+    }
+
+#define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func)                    \
     {   PIXMAN_OP_ ## op,                                              \
        PIXMAN_ ## s,                                                   \
        SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP,            \
@@ -286,4 +308,9 @@ fast_composite_scaled_nearest_ ## scale_func_name ## _ ## OP (pixman_implementat
        fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,     \
     }
 
+/* Prefer the use of 'cover' variant, because it is faster */
+#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func)                          \
+    SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func),                      \
+    SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
+
 #endif