mmx: Add nearest over_8888_8888
authorMatt Turner <mattst88@gmail.com>
Wed, 2 Jan 2013 19:16:12 +0000 (11:16 -0800)
committerMatt Turner <mattst88@gmail.com>
Fri, 5 Sep 2014 07:22:07 +0000 (00:22 -0700)
lowlevel-blt-bench -n, over_8888_8888, 15 iterations on Loongson 2f:

           Before          After
          Mean StdDev     Mean StdDev   Change
    L1    15.8   0.02     24.0   0.06   +52.0%
    L2    14.8   0.15     23.3   0.13   +56.9%
    M     10.3   0.01     13.8   0.03   +33.6%
    HT    10.0   0.02     14.5   0.05   +44.7%
    VT     9.7   0.02     13.5   0.04   +39.2%
    R      9.1   0.01     12.2   0.04   +34.4%
    RT     7.1   0.06      8.9   0.09   +25.2%

pixman/pixman-mmx.c

index 63f4cdf82f2bf12400d0fa311c79142c9b0f51ab..c7fd5035823381604d423222b2a9b389912e1736 100644 (file)
@@ -3555,6 +3555,46 @@ mmx_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
     _mm_empty ();
 }
 
+static force_inline void
+scaled_nearest_scanline_mmx_8888_8888_OVER (uint32_t*       pd,
+                                            const uint32_t* ps,
+                                            int32_t         w,
+                                            pixman_fixed_t  vx,
+                                            pixman_fixed_t  unit_x,
+                                            pixman_fixed_t  src_width_fixed,
+                                            pixman_bool_t   fully_transparent_src)
+{
+    if (fully_transparent_src)
+       return;
+
+    while (w)
+    {
+       __m64 d = load (pd);
+       __m64 s = load (ps + pixman_fixed_to_int (vx));
+       vx += unit_x;
+       while (vx >= 0)
+           vx -= src_width_fixed;
+
+       store8888 (pd, core_combine_over_u_pixel_mmx (s, d));
+       pd++;
+
+       w--;
+    }
+}
+
+FAST_NEAREST_MAINLOOP (mmx_8888_8888_cover_OVER,
+                      scaled_nearest_scanline_mmx_8888_8888_OVER,
+                      uint32_t, uint32_t, COVER)
+FAST_NEAREST_MAINLOOP (mmx_8888_8888_none_OVER,
+                      scaled_nearest_scanline_mmx_8888_8888_OVER,
+                      uint32_t, uint32_t, NONE)
+FAST_NEAREST_MAINLOOP (mmx_8888_8888_pad_OVER,
+                      scaled_nearest_scanline_mmx_8888_8888_OVER,
+                      uint32_t, uint32_t, PAD)
+FAST_NEAREST_MAINLOOP (mmx_8888_8888_normal_OVER,
+                      scaled_nearest_scanline_mmx_8888_8888_OVER,
+                      uint32_t, uint32_t, NORMAL)
+
 static force_inline void
 scaled_nearest_scanline_mmx_8888_n_8888_OVER (const uint32_t * mask,
                                              uint32_t *       dst,
@@ -4048,6 +4088,23 @@ static const pixman_fast_path_t mmx_fast_paths[] =
     PIXMAN_STD_FAST_PATH    (IN,   a8,       null,     a8,       mmx_composite_in_8_8              ),
     PIXMAN_STD_FAST_PATH    (IN,   solid,    a8,       a8,       mmx_composite_in_n_8_8            ),
 
+    SIMPLE_NEAREST_FAST_PATH_COVER  (OVER,   a8r8g8b8, x8r8g8b8, mmx_8888_8888                     ),
+    SIMPLE_NEAREST_FAST_PATH_COVER  (OVER,   a8b8g8r8, x8b8g8r8, mmx_8888_8888                     ),
+    SIMPLE_NEAREST_FAST_PATH_COVER  (OVER,   a8r8g8b8, a8r8g8b8, mmx_8888_8888                     ),
+    SIMPLE_NEAREST_FAST_PATH_COVER  (OVER,   a8b8g8r8, a8b8g8r8, mmx_8888_8888                     ),
+    SIMPLE_NEAREST_FAST_PATH_NONE   (OVER,   a8r8g8b8, x8r8g8b8, mmx_8888_8888                     ),
+    SIMPLE_NEAREST_FAST_PATH_NONE   (OVER,   a8b8g8r8, x8b8g8r8, mmx_8888_8888                     ),
+    SIMPLE_NEAREST_FAST_PATH_NONE   (OVER,   a8r8g8b8, a8r8g8b8, mmx_8888_8888                     ),
+    SIMPLE_NEAREST_FAST_PATH_NONE   (OVER,   a8b8g8r8, a8b8g8r8, mmx_8888_8888                     ),
+    SIMPLE_NEAREST_FAST_PATH_PAD    (OVER,   a8r8g8b8, x8r8g8b8, mmx_8888_8888                     ),
+    SIMPLE_NEAREST_FAST_PATH_PAD    (OVER,   a8b8g8r8, x8b8g8r8, mmx_8888_8888                     ),
+    SIMPLE_NEAREST_FAST_PATH_PAD    (OVER,   a8r8g8b8, a8r8g8b8, mmx_8888_8888                     ),
+    SIMPLE_NEAREST_FAST_PATH_PAD    (OVER,   a8b8g8r8, a8b8g8r8, mmx_8888_8888                     ),
+    SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER,   a8r8g8b8, x8r8g8b8, mmx_8888_8888                     ),
+    SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER,   a8b8g8r8, x8b8g8r8, mmx_8888_8888                     ),
+    SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER,   a8r8g8b8, a8r8g8b8, mmx_8888_8888                     ),
+    SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER,   a8b8g8r8, a8b8g8r8, mmx_8888_8888                     ),
+
     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_n_8888                 ),
     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_n_8888                 ),
     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_n_8888                 ),