Specialize the fast_composite_scaled_nearest_* scalers to positive x units
authorSøren Sandmann Pedersen <ssp@redhat.com>
Wed, 17 Mar 2010 14:50:42 +0000 (10:50 -0400)
committerSøren Sandmann Pedersen <ssp@redhat.com>
Wed, 17 Mar 2010 15:14:20 +0000 (11:14 -0400)
This avoids a test in the inner loop, which improves performance
especially for tiled sources.

On x86-32, I get these results:

Before:
op=1, src_fmt=20028888, dst_fmt=20028888, speed=306.96 MPix/s (73.18 FPS)
op=1, src_fmt=20028888, dst_fmt=10020565, speed=102.67 MPix/s (24.48 FPS)
op=1, src_fmt=10020565, dst_fmt=10020565, speed=324.85 MPix/s (77.45 FPS)

After:
op=1, src_fmt=20028888, dst_fmt=20028888, speed=332.19 MPix/s (79.20 FPS)
op=1, src_fmt=20028888, dst_fmt=10020565, speed=110.41 MPix/s (26.32 FPS)
op=1, src_fmt=10020565, dst_fmt=10020565, speed=363.28 MPix/s (86.61 FPS)

pixman/pixman-fast-path.c

index 5b8ff5c..bf5b298 100644 (file)
@@ -1485,13 +1485,21 @@ fast_composite_scaled_nearest_ ## scale_func_name ## _ ## OP (pixman_implementat
            x1 = vx >> 16;                                                                      \
            vx += unit_x;                                                                       \
            if (do_repeat)                                                                      \
-               repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx);                                     \
+           {                                                                                   \
+               /* This works because we know that unit_x is positive */                        \
+               while (vx >= max_vx)                                                            \
+                   vx -= max_vx;                                                               \
+           }                                                                                   \
            s1 = src[x1];                                                                       \
                                                                                                \
            x2 = vx >> 16;                                                                      \
            vx += unit_x;                                                                       \
            if (do_repeat)                                                                      \
-               repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx);                                     \
+           {                                                                                   \
+               /* This works because we know that unit_x is positive */                        \
+               while (vx >= max_vx)                                                            \
+                   vx -= max_vx;                                                               \
+           }                                                                                   \
            s2 = src[x2];                                                                       \
                                                                                                \
            if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)                                             \
@@ -1537,7 +1545,11 @@ fast_composite_scaled_nearest_ ## scale_func_name ## _ ## OP (pixman_implementat
            x1 = vx >> 16;                                                                      \
            vx += unit_x;                                                                       \
            if (do_repeat)                                                                      \
-               repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx);                                     \
+           {                                                                                   \
+               /* This works because we know that unit_x is positive */                        \
+               while (vx >= max_vx)                                                            \
+                   vx -= max_vx;                                                               \
+           }                                                                                   \
            s1 = src[x1];                                                                       \
                                                                                                \
            if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)                                             \
@@ -1806,7 +1818,7 @@ static const pixman_fast_path_t c_fast_paths[] =
 #define SIMPLE_NEAREST_FAST_PATH(op,s,d,func)                          \
     {   PIXMAN_OP_ ## op,                                              \
        PIXMAN_ ## s,                                                   \
-       SCALED_NEAREST_FLAGS | HAS_NORMAL_REPEAT_FLAGS | FAST_PATH_16BIT_SAFE \
+       SCALED_NEAREST_FLAGS | HAS_NORMAL_REPEAT_FLAGS | FAST_PATH_16BIT_SAFE | FAST_PATH_X_UNIT_POSITIVE, \
        PIXMAN_null, 0,                                                 \
        PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
        fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,   \