}
/* butterfly */
+ s = 0.5 * sqrt(2.0);
for(i=0;i<4;i++) {
for(j=0;j<8;j++) {
- block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * 0.5;
- block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * 0.5;
+ block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
+ block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
}
}
/* clamp and store the result */
for(i=0;i<8;i++) {
for(j=0;j<8;j++) {
- v = block3[8*i+j] + 128.0;
+ v = block3[8*i+j];
if (v < 0)
v = 0;
else if (v > 255)
important here) */
err_max = 0;
for(it=0;it<NB_ITS;it++) {
- for(i=0;i<64;i++)
- block1[i] = (random() % 512) - 256;
+ /* XXX: use forward transform to generate values */
+ for(i=0;i<64;i++)
+ block1[i] = (random() % 256) - 128;
+ block1[0] += 1024;
+
for(i=0; i<64; i++)
block[i]= block1[i];
idct248_ref(img_dest1, 8, block);
+ for(i=0; i<64; i++)
+ block[i]= block1[i];
+ idct248_put(img_dest, 8, block);
+
+ for(i=0;i<64;i++) {
+ v = abs((int)img_dest[i] - (int)img_dest1[i]);
+ if (v == 255)
+ printf("%d %d\n", img_dest[i], img_dest1[i]);
+ if (v > err_max)
+ err_max = v;
+ }
#if 0
printf("ref=\n");
for(i=0;i<8;i++) {
}
printf("\n");
}
-#endif
-
- for(i=0; i<64; i++)
- block[i]= block1[i];
- idct248_put(img_dest, 8, block);
-#if 0
printf("out=\n");
for(i=0;i<8;i++) {
int j;
printf("\n");
}
#endif
- for(i=0;i<64;i++) {
- v = abs(img_dest[i] - img_dest1[i]);
- if (v > err_max)
- err_max = v;
- }
}
printf("%s %s: err_inf=%d\n",
1 ? "IDCT248" : "DCT248",
#define CN_SHIFT 12
#define C_FIX(x) ((int)((x) * (1 << CN_SHIFT) + 0.5))
-#define C0 C_FIX(0.7071067811)
-#define C1 C_FIX(0.9238795324)
-#define C2 C_FIX(0.3826834324)
+#define C1 C_FIX(0.6532814824)
+#define C2 C_FIX(0.2705980501)
-/* row idct is multiple by 16 * sqrt(2.0), col idct4 is multiplied by
- sqrt(2). An extra division by two is needed for the first butterfly
- stage */
-#define C_SHIFT (4+1+12+1)
+/* row idct is multiple by 16 * sqrt(2.0), col idct4 is normalized,
+ and the butterfly must be multiplied by 0.5 * sqrt(2.0) */
+#define C_SHIFT (4+1+12)
static inline void idct4col(UINT8 *dest, int line_size, const INT16 *col)
{
a1 = col[8*2];
a2 = col[8*4];
a3 = col[8*6];
- c0 = (a0 + a2) * C0 + (1 << (C_SHIFT - 1)) + (128 << C_SHIFT);
- c2 = (a0 - a2) * C0 + (1 << (C_SHIFT - 1)) + (128 << C_SHIFT);
+ c0 = ((a0 + a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1));
+ c2 = ((a0 - a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1));
c1 = a1 * C1 + a3 * C2;
c3 = a1 * C2 - a3 * C1;
dest[0] = cm[(c0 + c1) >> C_SHIFT];