MIPS DSPR2 optimization of routine fetchUntransformedRGB16
authorDamir Tatalovic <dtatalovic@mips.com>
Wed, 4 Apr 2012 15:54:58 +0000 (17:54 +0200)
committerQt by Nokia <qt-info@nokia.com>
Wed, 16 May 2012 02:25:04 +0000 (04:25 +0200)
Change-Id: I109deb969009214c4d81677e127f50120443acd2
Reviewed-by: Bradley T. Hughes <bradley.hughes@nokia.com>
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Reviewed-by: Samuel Rødal <samuel.rodal@nokia.com>
src/gui/painting/qdrawhelper.cpp
src/gui/painting/qdrawhelper_mips_dsp_p.h
src/gui/painting/qdrawhelper_mips_dspr2_asm.S

index aaaf261..3ba8381 100644 (file)
@@ -555,8 +555,12 @@ static const uint *QT_FASTCALL fetchUntransformedRGB16(uint *buffer, const Opera
                                                        int length)
 {
     const quint16 *scanLine = (const quint16 *)data->texture.scanLine(y) + x;
+#ifdef QT_HAVE_MIPS_DSPR2
+    qConvertRgb16To32_asm_mips_dspr2(buffer, scanLine, length);
+#else
     for (int i = 0; i < length; ++i)
         buffer[i] = qConvertRgb16To32(scanLine[i]);
+#endif
     return buffer;
 }
 
index c68a3ff..55affca 100644 (file)
@@ -71,6 +71,12 @@ uint * QT_FASTCALL qt_destFetchARGB32_mips_dsp(uint *buffer,
 void QT_FASTCALL qt_destStoreARGB32_mips_dsp(QRasterBuffer *rasterBuffer, int x, int y,
                                              const uint *buffer, int length);
 
+#ifdef QT_HAVE_MIPS_DSPR2
+
+extern "C" void  qConvertRgb16To32_asm_mips_dspr2(quint32 *dest, const quint16 *src, int length);
+
+#endif // QT_HAVE_MIPS_DSPR2
+
 #endif // QT_HAVE_MIPS_DSP
 
 QT_END_NAMESPACE
index 688d16b..213fcf8 100644 (file)
@@ -93,3 +93,85 @@ LEAF_MIPS_DSPR2(BYTE_MUL_asm_mips_dspr2)
     j                 ra
 
 END(BYTE_MUL_asm_mips_dspr2)
+
+LEAF_MIPS_DSPR2(qConvertRgb16To32_asm_mips_dspr2)
+/*
+ * a0 - dst (a8r8g8b8)
+ * a1 - src (r5g6b5)
+ * a2 - w
+ */
+
+    beqz              a2, 3f
+     nop
+    addiu             t1, a2, -1
+    beqz              t1, 2f
+     nop
+    li                t4, 0x07e007e0
+    li                t5, 0x001F001F
+/* Convert two pixels at time (2 x rgb565 -> 2 x rgb8888) */
+1:
+    lhu               t0, 0(a1)
+    lhu               t1, 2(a1)
+    addiu             a1, a1, 4
+    addiu             a2, a2, -2
+
+    sll               t6, t0, 16
+    or                t6, t6, t1          /* t6 = R1 G1 B1 | R2 G2 B2 */
+    lui               t3, 0xff00
+    ori               t3, t3, 0xff00      /* t3 = FF 00 | FF 00 (in place) */
+    shrl.ph           t7, t6, 11          /* t7 = 0 R1 | 0 R2 */
+    and               t8, t6, t4          /* t8 = 0 G1 0 | 0 G2 0 */
+    shra.ph           t9, t7, 2           /* t9 = 0 R1 | 0 R2   (lower) */
+    shll.ph           t7, t7, 3           /* t7 = 0 R1 | 0 R2   (higher) */
+    shll.ph           t8, t8, 5           /* t8 = G1 0 | G2 0   (higher) */
+    or                t7, t7, t9          /* t7 = 0 R1 | 0 R2   (in place) */
+    shrl.qb           t9, t8, 6           /* t9 = G1 0 | G2 0   (lower) */
+    or                t3, t3, t7          /* t3 = FF R1 | FF R2 (in place) */
+    or                t8, t8, t9          /* t8 = G1 0 | G2 0   (in place) */
+    and               t6, t6, t5          /* t6 = 0 B1 | 0 B2 */
+    shll.ph           t7, t6, 3           /* t7 = 0 B1 | 0 B2   (higher) */
+    shra.ph           t9, t6, 2           /* t9 = 0 B1 | 0 B2   (lower) */
+    or                t7, t7, t9          /* t7 = 0 B1 | 0 B2   (in place) */
+    or                t8, t7, t8          /* t8 = G1 B1 | G2 B2 (in place) */
+    precrq.ph.w       t2, t3, t8          /* t2 = FF R1 G1 B1   (in place) */
+    precr_sra.ph.w    t3, t8, 0           /* t3 = FF R2 G2 B2   (in place) */
+
+    sw                t2, 0(a0)
+    sw                t3, 4(a0)
+
+    addiu             t2, a2, -1
+    bgtz              t2, 1b
+     addiu            a0, a0, 8
+2:
+    beqz              a2, 3f
+     nop
+    lhu               t0, 0(a1)
+
+/* Remaining pixel conversion (rgb565 -> rgb8888) */
+    lui               t1, 0xff00
+    sll               t2, t0, 0x3
+    andi              t3, t2, 0xff
+    ext               t2, t0, 0x2, 0x3
+    or                t2, t3, t2
+    or                t1, t1, t2
+
+    sll               t2, t0, 0x5
+    andi              t2, t2, 0xfc00
+    srl               t3, t0, 0x1
+    andi              t3, t3, 0x300
+    or                t3, t2, t3
+    or                t1, t1, t3
+
+    andi              t2, t0, 0xf800
+    srl               t3, t2, 0x5
+    andi              t3, t3, 0xff00
+    or                t2, t2, t3
+    sll               t2, t2, 0x8
+    or                t1, t1, t2
+
+    sw                t1, 0(a0)
+3:
+    j                 ra
+     nop
+
+END(qConvertRgb16To32_asm_mips_dspr2)