ps2 idct bugfix patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
authorLeon van Stuivenberg <leonvs@iae.nl>
Tue, 24 Sep 2002 23:57:17 +0000 (23:57 +0000)
committerMichael Niedermayer <michaelni@gmx.at>
Tue, 24 Sep 2002 23:57:17 +0000 (23:57 +0000)
Originally committed as revision 970 to svn://svn.ffmpeg.org/ffmpeg/trunk

libavcodec/ps2/idct_mmi.c

index d885872..e80e124 100644 (file)
 
 #define BITS_INV_ACC   5       // 4 or 5 for IEEE
 #define SHIFT_INV_ROW  (16 - BITS_INV_ACC)
-#define SHIFT_INV_COL   (BITS_INV_ACC) //(1 + BITS_INV_ACC)  no, FP15 is used
-
-#define Rounder_0      0
-#define Rounder_1      16
-#define Rounder_2      32
-#define Rounder_3      48
-#define Rounder_4      64
-#define Rounder_5      80
-#define Rounder_6      96
-#define Rounder_7      112
+#define SHIFT_INV_COL   (1 + BITS_INV_ACC)
 
 // assume SHIFT_INV_ROW == 11
 static int roundertable[8][4] align16 = {
-    {65535, 65535, 65535, 65535},
-    { 1023,  1023,  1023,  1023},
-    { 1023,  1023,  1023,  1023},
-    { 1023,  1023,  1023,  1023},
-    {    0,     0,     0,     0},
-    { 1023,  1023,  1023,  1023},
-    { 1023,  1023,  1023,  1023},
+    { 0x103ff, 0x103ff, 0x103ff, 0x103ff },
     { 1023,  1023,  1023,  1023}
 };
 
@@ -97,7 +82,7 @@ static short coltable[7][8] align16 = {
 #define        noprevh(rt, rd)
 
 
-#define DCT_8_INV_ROW1(rowoff, taboff, rounder, outreg) { \
+#define DCT_8_INV_ROW1(rowoff, taboff, rnd, outreg) { \
 \
        lq($4, rowoff, $16);    /* r16 = x7  x6  x5  x4  x3  x2  x1  x0 */ \
        lq($24, 0+taboff, $17); /* r17 = w19 w17 w3  w1  w18 w16 w2  w0 */ \
@@ -114,7 +99,6 @@ static short coltable[7][8] align16 = {
        paddw($18, $21, $18);   /* r18 = (--)(--)(b1)(a1) */ \
        pcpyud($19, $19, $21);  \
        phmadh($20, $16, $20);  /* r12 = (b3'')(a3'')(b3')(a3') */ \
-       lq($7, rounder, $22);   /* r22 = rounder */ \
        paddw($19, $21, $19);   /* r19 = (--)(--)(b2)(a2) */ \
        pextlw($19, $17, $16);  /* r16 = (b2)(b0)(a2)(a0) */ \
        pcpyud($20, $20, $21);  \
@@ -122,7 +106,7 @@ static short coltable[7][8] align16 = {
        pextlw($20, $18, $17);  /* r17 = (b3)(b1)(a3)(a1) */ \
        pextlw($17, $16, $20);  /* r20 = (a3)(a2)(a1)(a0)" */ \
        pextuw($17, $16, $21);  /* r21 = (b3)(b2)(b1)(b0) */ \
-       paddw($20, $22, $20);   /* r20 = (a3)(a2)(a1)(a0) */\
+       paddw($20, rnd, $20);   /* r20 = (a3)(a2)(a1)(a0) */\
        paddw($20, $21, $17);   /* r17 = ()()()(a0+b0) */ \
        psubw($20, $21, $18);   /* r18 = ()()()(a0-b0) */ \
        psraw($17, SHIFT_INV_ROW, $17); /* r17 = (y3 y2 y1 y0) */ \
@@ -173,8 +157,8 @@ static short coltable[7][8] align16 = {
 \
        paddw($16, $20, $2);    /* y0  a0+b0 */         \
        psubw($16, $20, $16);   /* y7  a0-b0 */         \
-       psraw($2, SHIFT_INV_COL+16, $2);                \
-       psraw($16, SHIFT_INV_COL+16, $16);              \
+       psraw($2, SHIFT_INV_COL+15, $2);                \
+       psraw($16, SHIFT_INV_COL+15, $16);              \
        ppach($0, $2, $2);                              \
        ppach($0, $16, $16);                            \
        revop($2, $2);                                  \
@@ -184,8 +168,8 @@ static short coltable[7][8] align16 = {
 \
        paddw($17, $21, $3);    /* y1  a1+b1 */         \
        psubw($17, $21, $17);   /* y6  a1-b1 */         \
-       psraw($3, SHIFT_INV_COL+16, $3);                \
-       psraw($17, SHIFT_INV_COL+16, $17);              \
+       psraw($3, SHIFT_INV_COL+15, $3);                \
+       psraw($17, SHIFT_INV_COL+15, $17);              \
        ppach($0, $3, $3);                              \
        ppach($0, $17, $17);                    \
        revop($3, $3);                          \
@@ -195,8 +179,8 @@ static short coltable[7][8] align16 = {
 \
        paddw($18, $22, $2);    /* y2  a2+b2 */ \
        psubw($18, $22, $18);   /* y5  a2-b2 */ \
-       psraw($2, SHIFT_INV_COL+16, $2);        \
-       psraw($18, SHIFT_INV_COL+16, $18);      \
+       psraw($2, SHIFT_INV_COL+15, $2);        \
+       psraw($18, SHIFT_INV_COL+15, $18);      \
        ppach($0, $2, $2);                      \
        ppach($0, $18, $18);                    \
        revop($2, $2);                          \
@@ -206,8 +190,8 @@ static short coltable[7][8] align16 = {
 \
        paddw($19, $23, $3);    /* y3  a3+b3 */ \
        psubw($19, $23, $19);   /* y4  a3-b3 */ \
-       psraw($3, SHIFT_INV_COL+16, $3);        \
-       psraw($19, SHIFT_INV_COL+16, $19);      \
+       psraw($3, SHIFT_INV_COL+15, $3);        \
+       psraw($19, SHIFT_INV_COL+15, $19);      \
        ppach($0, $3, $3);                      \
        ppach($0, $19, $19);                    \
        revop($3, $3);                          \
@@ -220,16 +204,18 @@ static short coltable[7][8] align16 = {
 void ff_mmi_idct(int16_t * block)
 {
     /* $4 = block */
-    __asm__ __volatile__("la $24, %0"::"m"(rowtable[0][0]));
     __asm__ __volatile__("la $7, %0"::"m"(roundertable[0][0]));
-    DCT_8_INV_ROW1(0, TAB_i_04, Rounder_0, $8);
-    DCT_8_INV_ROW1(16, TAB_i_17, Rounder_1, $9);
-    DCT_8_INV_ROW1(32, TAB_i_26, Rounder_2, $10);
-    DCT_8_INV_ROW1(48, TAB_i_35, Rounder_3, $11);
-    DCT_8_INV_ROW1(64, TAB_i_04, Rounder_4, $12);
-    DCT_8_INV_ROW1(80, TAB_i_35, Rounder_5, $13);
-    DCT_8_INV_ROW1(96, TAB_i_26, Rounder_6, $14);
-    DCT_8_INV_ROW1(112, TAB_i_17, Rounder_7, $15);
+    __asm__ __volatile__("la $24, %0"::"m"(rowtable[0][0]));
+    lq($7, 0, $6);
+    lq($7, 16, $7);
+    DCT_8_INV_ROW1(0, TAB_i_04, $6, $8);
+    DCT_8_INV_ROW1(16, TAB_i_17, $7, $9);
+    DCT_8_INV_ROW1(32, TAB_i_26, $7, $10);
+    DCT_8_INV_ROW1(48, TAB_i_35, $7, $11);
+    DCT_8_INV_ROW1(64, TAB_i_04, $7, $12);
+    DCT_8_INV_ROW1(80, TAB_i_35, $7, $13);
+    DCT_8_INV_ROW1(96, TAB_i_26, $7, $14);
+    DCT_8_INV_ROW1(112, TAB_i_17, $7, $15);
 
     __asm__ __volatile__("la $24, %0"::"m"(coltable[0][0]));
     DCT_8_INV_COL4(pextlh, 0, noprevh);
@@ -237,5 +223,6 @@ void ff_mmi_idct(int16_t * block)
 
     //let savedtemp regs be saved
     __asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21",
-                        "$22", "$23");
+                        "$22", "$23", "$6", "$7");
 }
+