Add SSE2 fetcher for 0565
authorSøren Sandmann Pedersen <ssp@redhat.com>
Sun, 23 Jan 2011 21:53:26 +0000 (16:53 -0500)
committerSøren Sandmann Pedersen <ssp@redhat.com>
Thu, 3 Feb 2011 08:25:05 +0000 (03:25 -0500)
Before:

add_0565_0565 = L1:  61.08  L2:  61.03  M: 60.57 ( 10.95%)  HT: 46.85  VT: 45.25  R: 39.99  RT: 20.41 ( 233Kops/s)

After:

add_0565_0565 = L1:  77.84  L2:  76.25  M: 75.38 ( 13.71%)  HT: 55.99  VT: 54.56  R: 45.41  RT: 21.95 ( 255Kops/s)

pixman/pixman-sse2.c

index e28b986..91adc05 100644 (file)
@@ -6082,6 +6082,52 @@ sse2_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
 }
 
 static uint32_t *
+sse2_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
+{
+    int w = iter->width;
+    uint32_t *dst = iter->buffer;
+    uint16_t *src = (uint16_t *)iter->bits;
+    __m128i ff000000 = mask_ff000000;
+
+    iter->bits += iter->stride;
+
+    while (w && ((unsigned long)dst) & 0x0f)
+    {
+       uint16_t s = *src++;
+
+       *dst++ = CONVERT_0565_TO_8888 (s);
+       w--;
+    }
+
+    while (w >= 8)
+    {
+       __m128i lo, hi, s;
+
+       s = _mm_loadu_si128 ((__m128i *)src);
+
+       lo = unpack_565_to_8888 (_mm_unpacklo_epi16 (s, _mm_setzero_si128 ()));
+       hi = unpack_565_to_8888 (_mm_unpackhi_epi16 (s, _mm_setzero_si128 ()));
+
+       save_128_aligned ((__m128i *)(dst + 0), _mm_or_si128 (lo, ff000000));
+       save_128_aligned ((__m128i *)(dst + 4), _mm_or_si128 (hi, ff000000));
+
+       dst += 8;
+       src += 8;
+       w -= 8;
+    }
+
+    while (w)
+    {
+       uint16_t s = *src++;
+
+       *dst++ = CONVERT_0565_TO_8888 (s);
+       w--;
+    }
+
+    return iter->buffer;
+}
+
+static uint32_t *
 sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
 {
     int w = iter->width;
@@ -6136,6 +6182,7 @@ typedef struct
 static const fetcher_info_t fetchers[] =
 {
     { PIXMAN_x8r8g8b8,         sse2_fetch_x8r8g8b8 },
+    { PIXMAN_r5g6b5,           sse2_fetch_r5g6b5 },
     { PIXMAN_a8,               sse2_fetch_a8 },
     { PIXMAN_null }
 };