ip/xjpg_dct.c

   1 /* libhpojip -- HP OfficeJet image-processing library. */
   2
   3 /* Copyright (C) 1995-2002 Hewlett-Packard Company
   4  *
   5  * This program is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU General Public License as
   7  * published by the Free Software Foundation; either version 2 of the
   8  * License, or (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful, but
  11  * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
  12  * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
  13  * NON-INFRINGEMENT.  See the GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston,
  18  * MA 02111-1307, USA.
  19  *
  20  * In addition, as a special exception, Hewlett-Packard Company
  21  * gives permission to link the code of this program with any
  22  * version of the OpenSSL library which is distributed under a
  23  * license identical to that listed in the included LICENSE.OpenSSL
  24  * file, and distribute linked combinations including the two.
  25  * You must obey the GNU General Public License in all respects
  26  * for all of the code used other than OpenSSL.  If you modify
  27  * this file, you may extend this exception to your version of the
  28  * file, but you are not obligated to do so.  If you do not wish to
  29  * do so, delete this exception statement from your version.
  30  */
  31
  32 /* Original author: Mark Overton and others.
  33  *
  34  * Ported to Linux by David Paschal.
  35  */
  36
  37 /*____________________________________________________________________________
  38  |            |                                                               |
  39  | xjpg_dct.c | Computes forward and inverse DCT for JPEG                     |
  40  |____________|_______________________________________________________________|
  41  |                                                                            |
  42  | Mark Overton, May 1997                                                     |
  43  |____________________________________________________________________________|
  44 */
  45
  46 #include "xjpg_dct.h"
  47
  48
  49 /*____________________________________________________________________________
  50  |             |                                                              |
  51  | SUB_AND_ADD | replaces a with a-b, and b with a+b using no temp registers  |
  52  |_____________|______________________________________________________________|
  53 */
  54 #define SUB_AND_ADD(a,b) {              \
  55     b += a;                             \
  56     a += a;                             \
  57     a -= b;                             \
  58 }
  59
  60
  61
  62 /*____________________________________________________________________________
  63  |           |                                                                |
  64  | MUL_ROUND | computes x = round(x*c), using no temp registers               |
  65  |___________|________________________________________________________________|
  66  |                                                                            |
  67  | c is assumed to have CONST_FRAC_BITS bits of fraction.                     |
  68  |____________________________________________________________________________|
  69 */
  70 #if (defined _WINDOWS) && !(defined _WIN32)
  71
  72     /* We are compiling for 16-bit Windows 3.1 */
  73
  74     #define MUL_ROUND(c,x) {                                         \
  75         long product;                                                \
  76         product = (long)(x) * ((long)(c) << (16-CONST_FRAC_BITS));   \
  77         x = (product+0x8000L) >> 16;                                 \
  78     }
  79
  80 #else
  81
  82     #define MUL_ROUND(c,x) {                                     \
  83         x = (short)(x) * (short)(c);                             \
  84         x = ((x)+(1l<<(CONST_FRAC_BITS-1))) >> CONST_FRAC_BITS;  \
  85     }
  86
  87 #endif
  88
  89
  90
  91 /*____________________________________________________________________________
  92  |             |                                                              |
  93  | dct_forward | computes DCT for JPEG                                        |
  94  |_____________|______________________________________________________________|
  95  |                                                                            |
  96  | This is the DCT algorithm based on the small FFT Winograd transform        |
  97  | from Trans. IEICE, vol. E 71(11), 1095-1097, Nov. 1988                     |
  98  |                                                                            |
  99  | Input:  'block' is 64 level-shifted pixels (-128..127 each).               |
 100  |                                                                            |
 101  | Output: 'block' is the DCT (64 words).                                     |
 102  |         These values need to be scaled by the forward correction matrix    |
 103  |         for the Winograd DCT.                                              |
 104  |____________________________________________________________________________|
 105 */
 106 void dct_forward (register int *block_p)
 107 {
 108     #define CONST_FRAC_BITS 14   /* bits of frac in CONST_1-CONST_5 below */
 109
 110     #define CONST_1 (23170/2)  /* 15 bits of frac shifted down to 14 */
 111     #define CONST_2 (17734/2)
 112     #define CONST_3 (23170/2)
 113     #define CONST_4 (42813/2)  /* this one wouldn't fit with 15 bits of frac */
 114     #define CONST_5 (12540/2)
 115
 116     int *data_p;
 117     int  d0, d1, d2, d3, d4, d5, d6, d7;
 118
 119       /******************/
 120      /* Transform Rows */
 121     /******************/
 122
 123     for (data_p=block_p; data_p<block_p+64; data_p+=8)
 124     {
 125         d0 = data_p[0];
 126         d1 = data_p[1];
 127         d2 = data_p[2];
 128         d3 = data_p[3];
 129         d4 = data_p[4];
 130         d5 = data_p[5];
 131         d6 = data_p[6];
 132         d7 = data_p[7];
 133
 134         SUB_AND_ADD (d0, d7)
 135         SUB_AND_ADD (d1, d6)
 136         SUB_AND_ADD (d2, d5)
 137         SUB_AND_ADD (d4, d3)
 138
 139         SUB_AND_ADD (d7, d3)
 140         SUB_AND_ADD (d6, d5)
 141
 142         SUB_AND_ADD (d3, d5)
 143         data_p[4] = d3;
 144         data_p[0] = d5;
 145
 146         d6 += d7;
 147         MUL_ROUND (CONST_1, d6)
 148         SUB_AND_ADD (d7, d6)
 149         data_p[6] = d7;
 150         data_p[2] = d6;
 151
 152         /* At this point, the only live math vars are in:  d0, d1, d2, d4 */
 153
 154         d7 = d1 + d2;
 155         MUL_ROUND (CONST_3, d7)
 156         d1 += d0;
 157         SUB_AND_ADD (d0, d7)
 158         d4 -= d2;
 159         d6 = d1 + d4;
 160         MUL_ROUND (CONST_5, d6)
 161         MUL_ROUND (CONST_4, d1)
 162         d1 -= d6;
 163
 164         SUB_AND_ADD (d7, d1)
 165         data_p[7] = d7;
 166         data_p[1] = d1;
 167
 168         MUL_ROUND (CONST_2, d4)
 169         d4 += d6;
 170         SUB_AND_ADD (d0, d4)
 171         data_p[5] = d0;
 172         data_p[3] = d4;
 173     }
 174
 175       /*********************/
 176      /* Transform Columns */
 177     /*********************/
 178
 179     for (data_p=block_p; data_p<block_p+8; data_p++)
 180     {
 181         d0 = data_p[0*8];
 182         d7 = data_p[7*8];
 183         SUB_AND_ADD (d0, d7)
 184
 185         d4 = data_p[4*8];
 186         d3 = data_p[3*8];
 187         SUB_AND_ADD (d4, d3)
 188
 189         d1 = data_p[1*8];
 190         d6 = data_p[6*8];
 191         SUB_AND_ADD (d1, d6)
 192
 193         d2 = data_p[2*8];
 194         d5 = data_p[5*8];
 195         SUB_AND_ADD (d2, d5)
 196
 197         SUB_AND_ADD (d7, d3)
 198         SUB_AND_ADD (d6, d5)
 199
 200         SUB_AND_ADD (d3, d5)
 201         data_p[4*8] = d3;
 202         data_p[0*8] = d5;
 203
 204         d6 += d7;
 205         MUL_ROUND (CONST_1, d6)
 206         SUB_AND_ADD (d7, d6)
 207         data_p[6*8] = d7;
 208         data_p[2*8] = d6;
 209
 210         /* At this point, the only live math vars are in:  d0, d1, d2, d4 */
 211
 212         d7 = d1 + d2;
 213         MUL_ROUND (CONST_3, d7)
 214         d1 += d0;
 215         SUB_AND_ADD (d0, d7)
 216         d4 -= d2;
 217         d6 = d1 + d4;
 218         MUL_ROUND (CONST_5, d6)
 219         MUL_ROUND (CONST_4, d1)
 220         d1 -= d6;
 221
 222         SUB_AND_ADD (d7, d1)
 223         data_p[7*8] = d7;
 224         data_p[1*8] = d1;
 225
 226         MUL_ROUND (CONST_2, d4)
 227         d4 += d6;
 228         SUB_AND_ADD (d0, d4)
 229         data_p[5*8] = d0;
 230         data_p[3*8] = d4;
 231     }
 232
 233     #undef CONST_FRAC_BITS
 234     #undef CONST_1
 235     #undef CONST_2
 236     #undef CONST_3
 237     #undef CONST_4
 238     #undef CONST_5
 239 } /* end of dct_forward */
 240
 241
 242
 243 /*____________________________________________________________________________
 244  |             |                                                              |
 245  | dct_inverse | computes inverse DCT for JPEG                                |
 246  |_____________|______________________________________________________________|
 247  |                                                                            |
 248  | This is the DCT algorithm based on the small FFT Winograd transform        |
 249  | from Trans. IEICE, vol. E 71(11), 1095-1097, Nov. 1988                     |
 250  |                                                                            |
 251  | Input:  'block' is the DCT (64 words).                                     |
 252  |         These values are assumed to have been scaled by the inverse        |
 253  |         correction matrix for the Winograd DCT.                            |
 254  |                                                                            |
 255  | Output: 'block' is 64 level-shifted pixels.  These values will have        |
 256  |         as many bits of fraction as the input DCT had.  After rounding     |
 257  |         and level-shifting, you must clamp these values to 0..255.         |
 258  |____________________________________________________________________________|
 259 */
 260 void dct_inverse (register int *block_p)
 261 {
 262     #define CONST_FRAC_BITS 13  /* bits of frac in CONST_1-CONST_5 below */
 263
 264     #define CONST_1 ((46341+2)/4)   /* 15 bits of frac shifted down to 13 */
 265     #define CONST_2 ((85627+2)/4)
 266     #define CONST_3 ((46341+2)/4)
 267     #define CONST_4 ((35468+2)/4)
 268     #define CONST_5 ((25080+2)/4)
 269
 270     int *data_p;
 271     int  d0, d1, d2, d3, d4, d5, d6, d7, tmp;
 272
 273       /*********************/
 274      /* Transform Columns */
 275     /*********************/
 276
 277     for (data_p=block_p; data_p<block_p+8; data_p++)
 278     {
 279         d0 = data_p[0*8];
 280         d4 = data_p[4*8];
 281         SUB_AND_ADD (d0, d4)
 282
 283         d1 = data_p[1*8];
 284         d7 = data_p[7*8];
 285         SUB_AND_ADD (d1, d7)
 286
 287         d2 = data_p[2*8];
 288         d6 = data_p[6*8];
 289         SUB_AND_ADD (d2, d6)
 290
 291         d5 = data_p[5*8];
 292         d3 = data_p[3*8];
 293         SUB_AND_ADD (d5, d3)
 294
 295         MUL_ROUND (CONST_1, d2)
 296         d2 -= d6;
 297         SUB_AND_ADD (d0, d2)
 298         SUB_AND_ADD (d4, d6)
 299         SUB_AND_ADD (d7, d3)
 300
 301         tmp = d3;
 302         SUB_AND_ADD (d6, d3)
 303         data_p[7*8] = d6;
 304         data_p[0*8] = d3;
 305
 306         d6 = d5 - d1;
 307         MUL_ROUND (CONST_5, d6);
 308         MUL_ROUND (CONST_4, d1)
 309         d1 -= d6;
 310         d1 -= tmp;
 311         MUL_ROUND (CONST_3, d7)
 312         d7 -= d1;
 313         MUL_ROUND (CONST_2, d5)
 314         d6 -= d5;
 315         d6 += d7;
 316
 317         SUB_AND_ADD (d2, d1)
 318         data_p[6*8] = d2;
 319         data_p[1*8] = d1;
 320
 321         SUB_AND_ADD (d0, d7)
 322         data_p[5*8] = d0;
 323         data_p[2*8] = d7;
 324
 325         SUB_AND_ADD (d4, d6)
 326         data_p[3*8] = d4;
 327         data_p[4*8] = d6;
 328     }
 329
 330       /******************/
 331      /* Transform Rows */
 332     /******************/
 333
 334     for (data_p=block_p; data_p<block_p+64; data_p+=8)
 335     {
 336         d0 = data_p[0];
 337         d1 = data_p[1];
 338         d2 = data_p[2];
 339         d3 = data_p[3];
 340         d4 = data_p[4];
 341         d5 = data_p[5];
 342         d6 = data_p[6];
 343         d7 = data_p[7];
 344
 345         SUB_AND_ADD (d0, d4)
 346         SUB_AND_ADD (d1, d7)
 347         SUB_AND_ADD (d2, d6)
 348         SUB_AND_ADD (d5, d3)
 349
 350         MUL_ROUND (CONST_1, d2)
 351         d2 -= d6;
 352         SUB_AND_ADD (d0, d2)
 353         SUB_AND_ADD (d4, d6)
 354         SUB_AND_ADD (d7, d3)
 355
 356         tmp = d3;
 357         SUB_AND_ADD (d6, d3)
 358         data_p[7] = d6;
 359         data_p[0] = d3;
 360
 361         d6 = d5 - d1;
 362         MUL_ROUND (CONST_5, d6)
 363         MUL_ROUND (CONST_4, d1)
 364         d1 -= d6;
 365         d1 -= tmp;
 366         MUL_ROUND (CONST_3, d7)
 367         d7 -= d1;
 368         MUL_ROUND (CONST_2, d5)
 369         d6 -= d5;
 370         d6 += d7;
 371
 372         SUB_AND_ADD (d2, d1)
 373         data_p[6] = d2;
 374         data_p[1] = d1;
 375
 376         SUB_AND_ADD (d0, d7)
 377         data_p[5] = d0;
 378         data_p[2] = d7;
 379
 380         SUB_AND_ADD (d4, d6)
 381         data_p[3] = d4;
 382         data_p[4] = d6;
 383     }
 384
 385     #undef CONST_FRAC_BITS
 386     #undef CONST_1
 387     #undef CONST_2
 388     #undef CONST_3
 389     #undef CONST_4
 390     #undef CONST_5
 391 } /* end of dct_inverse */
 392
 393 /* End of File */