jcdctmgr.c

   1 /*
   2  * jcdctmgr.c
   3  *
   4  * Copyright (C) 1994-1996, Thomas G. Lane.
   5  * Copyright (C) 1999-2006, MIYASAKA Masaru.
   6  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
   7  * Copyright (C) 2011 D. R. Commander
   8  * This file is part of the Independent JPEG Group's software.
   9  * For conditions of distribution and use, see the accompanying README file.
  10  *
  11  * This file contains the forward-DCT management logic.
  12  * This code selects a particular DCT implementation to be used,
  13  * and it performs related housekeeping chores including coefficient
  14  * quantization.
  15  */
  16
  17 #define JPEG_INTERNALS
  18 #include "jinclude.h"
  19 #include "jpeglib.h"
  20 #include "jdct.h"               /* Private declarations for DCT subsystem */
  21 #include "jsimddct.h"
  22
  23
  24 /* Private subobject for this module */
  25
  26 typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data));
  27 typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data));
  28
  29 typedef JMETHOD(void, convsamp_method_ptr,
  30                 (JSAMPARRAY sample_data, JDIMENSION start_col,
  31                  DCTELEM * workspace));
  32 typedef JMETHOD(void, float_convsamp_method_ptr,
  33                 (JSAMPARRAY sample_data, JDIMENSION start_col,
  34                  FAST_FLOAT *workspace));
  35
  36 typedef JMETHOD(void, quantize_method_ptr,
  37                 (JCOEFPTR coef_block, DCTELEM * divisors,
  38                  DCTELEM * workspace));
  39 typedef JMETHOD(void, float_quantize_method_ptr,
  40                 (JCOEFPTR coef_block, FAST_FLOAT * divisors,
  41                  FAST_FLOAT * workspace));
  42
  43 METHODDEF(void) quantize (JCOEFPTR, DCTELEM *, DCTELEM *);
  44
  45 typedef struct {
  46   struct jpeg_forward_dct pub;  /* public fields */
  47
  48   /* Pointer to the DCT routine actually in use */
  49   forward_DCT_method_ptr dct;
  50   convsamp_method_ptr convsamp;
  51   quantize_method_ptr quantize;
  52
  53   /* The actual post-DCT divisors --- not identical to the quant table
  54    * entries, because of scaling (especially for an unnormalized DCT).
  55    * Each table is given in normal array order.
  56    */
  57   DCTELEM * divisors[NUM_QUANT_TBLS];
  58
  59   /* work area for FDCT subroutine */
  60   DCTELEM * workspace;
  61
  62 #ifdef DCT_FLOAT_SUPPORTED
  63   /* Same as above for the floating-point case. */
  64   float_DCT_method_ptr float_dct;
  65   float_convsamp_method_ptr float_convsamp;
  66   float_quantize_method_ptr float_quantize;
  67   FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
  68   FAST_FLOAT * float_workspace;
  69 #endif
  70 } my_fdct_controller;
  71
  72 typedef my_fdct_controller * my_fdct_ptr;
  73
  74
  75 /*
  76  * Find the highest bit in an integer through binary search.
  77  */
  78 LOCAL(int)
  79 flss (UINT16 val)
  80 {
  81   int bit;
  82
  83   bit = 16;
  84
  85   if (!val)
  86     return 0;
  87
  88   if (!(val & 0xff00)) {
  89     bit -= 8;
  90     val <<= 8;
  91   }
  92   if (!(val & 0xf000)) {
  93     bit -= 4;
  94     val <<= 4;
  95   }
  96   if (!(val & 0xc000)) {
  97     bit -= 2;
  98     val <<= 2;
  99   }
 100   if (!(val & 0x8000)) {
 101     bit -= 1;
 102     val <<= 1;
 103   }
 104
 105   return bit;
 106 }
 107
 108 /*
 109  * Compute values to do a division using reciprocal.
 110  *
 111  * This implementation is based on an algorithm described in
 112  *   "How to optimize for the Pentium family of microprocessors"
 113  *   (http://www.agner.org/assem/).
 114  * More information about the basic algorithm can be found in
 115  * the paper "Integer Division Using Reciprocals" by Robert Alverson.
 116  *
 117  * The basic idea is to replace x/d by x * d^-1. In order to store
 118  * d^-1 with enough precision we shift it left a few places. It turns
 119  * out that this algoright gives just enough precision, and also fits
 120  * into DCTELEM:
 121  *
 122  *   b = (the number of significant bits in divisor) - 1
 123  *   r = (word size) + b
 124  *   f = 2^r / divisor
 125  *
 126  * f will not be an integer for most cases, so we need to compensate
 127  * for the rounding error introduced:
 128  *
 129  *   no fractional part:
 130  *
 131  *       result = input >> r
 132  *
 133  *   fractional part of f < 0.5:
 134  *
 135  *       round f down to nearest integer
 136  *       result = ((input + 1) * f) >> r
 137  *
 138  *   fractional part of f > 0.5:
 139  *
 140  *       round f up to nearest integer
 141  *       result = (input * f) >> r
 142  *
 143  * This is the original algorithm that gives truncated results. But we
 144  * want properly rounded results, so we replace "input" with
 145  * "input + divisor/2".
 146  *
 147  * In order to allow SIMD implementations we also tweak the values to
 148  * allow the same calculation to be made at all times:
 149  *
 150  *   dctbl[0] = f rounded to nearest integer
 151  *   dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
 152  *   dctbl[2] = 1 << ((word size) * 2 - r)
 153  *   dctbl[3] = r - (word size)
 154  *
 155  * dctbl[2] is for stupid instruction sets where the shift operation
 156  * isn't member wise (e.g. MMX).
 157  *
 158  * The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
 159  * is that most SIMD implementations have a "multiply and store top
 160  * half" operation.
 161  *
 162  * Lastly, we store each of the values in their own table instead
 163  * of in a consecutive manner, yet again in order to allow SIMD
 164  * routines.
 165  */
 166 LOCAL(int)
 167 compute_reciprocal (UINT16 divisor, DCTELEM * dtbl)
 168 {
 169   UDCTELEM2 fq, fr;
 170   UDCTELEM c;
 171   int b, r;
 172
 173   b = flss(divisor) - 1;
 174   r  = sizeof(DCTELEM) * 8 + b;
 175
 176   fq = ((UDCTELEM2)1 << r) / divisor;
 177   fr = ((UDCTELEM2)1 << r) % divisor;
 178
 179   c = divisor / 2; /* for rounding */
 180
 181   if (fr == 0) { /* divisor is power of two */
 182     /* fq will be one bit too large to fit in DCTELEM, so adjust */
 183     fq >>= 1;
 184     r--;
 185   } else if (fr <= (divisor / 2U)) { /* fractional part is < 0.5 */
 186     c++;
 187   } else { /* fractional part is > 0.5 */
 188     fq++;
 189   }
 190
 191   dtbl[DCTSIZE2 * 0] = (DCTELEM) fq;      /* reciprocal */
 192   dtbl[DCTSIZE2 * 1] = (DCTELEM) c;       /* correction + roundfactor */
 193   dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)*8*2 - r));  /* scale */
 194   dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)*8; /* shift */
 195
 196   if(r <= 16) return 0;
 197   else return 1;
 198 }
 199
 200 /*
 201  * Initialize for a processing pass.
 202  * Verify that all referenced Q-tables are present, and set up
 203  * the divisor table for each one.
 204  * In the current implementation, DCT of all components is done during
 205  * the first pass, even if only some components will be output in the
 206  * first scan.  Hence all components should be examined here.
 207  */
 208
 209 METHODDEF(void)
 210 start_pass_fdctmgr (j_compress_ptr cinfo)
 211 {
 212   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
 213   int ci, qtblno, i;
 214   jpeg_component_info *compptr;
 215   JQUANT_TBL * qtbl;
 216   DCTELEM * dtbl;
 217
 218   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
 219        ci++, compptr++) {
 220     qtblno = compptr->quant_tbl_no;
 221     /* Make sure specified quantization table is present */
 222     if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
 223         cinfo->quant_tbl_ptrs[qtblno] == NULL)
 224       ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
 225     qtbl = cinfo->quant_tbl_ptrs[qtblno];
 226     /* Compute divisors for this quant table */
 227     /* We may do this more than once for same table, but it's not a big deal */
 228     switch (cinfo->dct_method) {
 229 #ifdef DCT_ISLOW_SUPPORTED
 230     case JDCT_ISLOW:
 231       /* For LL&M IDCT method, divisors are equal to raw quantization
 232        * coefficients multiplied by 8 (to counteract scaling).
 233        */
 234       if (fdct->divisors[qtblno] == NULL) {
 235         fdct->divisors[qtblno] = (DCTELEM *)
 236           (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 237                                       (DCTSIZE2 * 4) * SIZEOF(DCTELEM));
 238       }
 239       dtbl = fdct->divisors[qtblno];
 240       for (i = 0; i < DCTSIZE2; i++) {
 241         if(!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i])
 242           && fdct->quantize == jsimd_quantize)
 243           fdct->quantize = quantize;
 244       }
 245       break;
 246 #endif
 247 #ifdef DCT_IFAST_SUPPORTED
 248     case JDCT_IFAST:
 249       {
 250         /* For AA&N IDCT method, divisors are equal to quantization
 251          * coefficients scaled by scalefactor[row]*scalefactor[col], where
 252          *   scalefactor[0] = 1
 253          *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
 254          * We apply a further scale factor of 8.
 255          */
 256 #define CONST_BITS 14
 257         static const INT16 aanscales[DCTSIZE2] = {
 258           /* precomputed values scaled up by 14 bits */
 259           16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
 260           22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
 261           21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
 262           19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
 263           16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
 264           12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
 265            8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
 266            4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
 267         };
 268         SHIFT_TEMPS
 269
 270         if (fdct->divisors[qtblno] == NULL) {
 271           fdct->divisors[qtblno] = (DCTELEM *)
 272             (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 273                                         (DCTSIZE2 * 4) * SIZEOF(DCTELEM));
 274         }
 275         dtbl = fdct->divisors[qtblno];
 276         for (i = 0; i < DCTSIZE2; i++) {
 277           if(!compute_reciprocal(
 278             DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
 279                                   (INT32) aanscales[i]),
 280                     CONST_BITS-3), &dtbl[i])
 281             && fdct->quantize == jsimd_quantize)
 282             fdct->quantize = quantize;
 283         }
 284       }
 285       break;
 286 #endif
 287 #ifdef DCT_FLOAT_SUPPORTED
 288     case JDCT_FLOAT:
 289       {
 290         /* For float AA&N IDCT method, divisors are equal to quantization
 291          * coefficients scaled by scalefactor[row]*scalefactor[col], where
 292          *   scalefactor[0] = 1
 293          *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
 294          * We apply a further scale factor of 8.
 295          * What's actually stored is 1/divisor so that the inner loop can
 296          * use a multiplication rather than a division.
 297          */
 298         FAST_FLOAT * fdtbl;
 299         int row, col;
 300         static const double aanscalefactor[DCTSIZE] = {
 301           1.0, 1.387039845, 1.306562965, 1.175875602,
 302           1.0, 0.785694958, 0.541196100, 0.275899379
 303         };
 304
 305         if (fdct->float_divisors[qtblno] == NULL) {
 306           fdct->float_divisors[qtblno] = (FAST_FLOAT *)
 307             (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 308                                         DCTSIZE2 * SIZEOF(FAST_FLOAT));
 309         }
 310         fdtbl = fdct->float_divisors[qtblno];
 311         i = 0;
 312         for (row = 0; row < DCTSIZE; row++) {
 313           for (col = 0; col < DCTSIZE; col++) {
 314             fdtbl[i] = (FAST_FLOAT)
 315               (1.0 / (((double) qtbl->quantval[i] *
 316                        aanscalefactor[row] * aanscalefactor[col] * 8.0)));
 317             i++;
 318           }
 319         }
 320       }
 321       break;
 322 #endif
 323     default:
 324       ERREXIT(cinfo, JERR_NOT_COMPILED);
 325       break;
 326     }
 327   }
 328 }
 329
 330
 331 /*
 332  * Load data into workspace, applying unsigned->signed conversion.
 333  */
 334
 335 METHODDEF(void)
 336 convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace)
 337 {
 338   register DCTELEM *workspaceptr;
 339   register JSAMPROW elemptr;
 340   register int elemr;
 341
 342   workspaceptr = workspace;
 343   for (elemr = 0; elemr < DCTSIZE; elemr++) {
 344     elemptr = sample_data[elemr] + start_col;
 345
 346 #if DCTSIZE == 8                /* unroll the inner loop */
 347     *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
 348     *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
 349     *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
 350     *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
 351     *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
 352     *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
 353     *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
 354     *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
 355 #else
 356     {
 357       register int elemc;
 358       for (elemc = DCTSIZE; elemc > 0; elemc--)
 359         *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
 360     }
 361 #endif
 362   }
 363 }
 364
 365
 366 /*
 367  * Quantize/descale the coefficients, and store into coef_blocks[].
 368  */
 369
 370 METHODDEF(void)
 371 quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)
 372 {
 373   int i;
 374   DCTELEM temp;
 375   UDCTELEM recip, corr, shift;
 376   UDCTELEM2 product;
 377   JCOEFPTR output_ptr = coef_block;
 378
 379   for (i = 0; i < DCTSIZE2; i++) {
 380     temp = workspace[i];
 381     recip = divisors[i + DCTSIZE2 * 0];
 382     corr =  divisors[i + DCTSIZE2 * 1];
 383     shift = divisors[i + DCTSIZE2 * 3];
 384
 385     if (temp < 0) {
 386       temp = -temp;
 387       product = (UDCTELEM2)(temp + corr) * recip;
 388       product >>= shift + sizeof(DCTELEM)*8;
 389       temp = product;
 390       temp = -temp;
 391     } else {
 392       product = (UDCTELEM2)(temp + corr) * recip;
 393       product >>= shift + sizeof(DCTELEM)*8;
 394       temp = product;
 395     }
 396
 397     output_ptr[i] = (JCOEF) temp;
 398   }
 399 }
 400
 401
 402 /*
 403  * Perform forward DCT on one or more blocks of a component.
 404  *
 405  * The input samples are taken from the sample_data[] array starting at
 406  * position start_row/start_col, and moving to the right for any additional
 407  * blocks. The quantized coefficients are returned in coef_blocks[].
 408  */
 409
 410 METHODDEF(void)
 411 forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
 412              JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
 413              JDIMENSION start_row, JDIMENSION start_col,
 414              JDIMENSION num_blocks)
 415 /* This version is used for integer DCT implementations. */
 416 {
 417   /* This routine is heavily used, so it's worth coding it tightly. */
 418   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
 419   DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
 420   DCTELEM * workspace;
 421   JDIMENSION bi;
 422
 423   /* Make sure the compiler doesn't look up these every pass */
 424   forward_DCT_method_ptr do_dct = fdct->dct;
 425   convsamp_method_ptr do_convsamp = fdct->convsamp;
 426   quantize_method_ptr do_quantize = fdct->quantize;
 427   workspace = fdct->workspace;
 428
 429   sample_data += start_row;     /* fold in the vertical offset once */
 430
 431   for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
 432     /* Load data into workspace, applying unsigned->signed conversion */
 433     (*do_convsamp) (sample_data, start_col, workspace);
 434
 435     /* Perform the DCT */
 436     (*do_dct) (workspace);
 437
 438     /* Quantize/descale the coefficients, and store into coef_blocks[] */
 439     (*do_quantize) (coef_blocks[bi], divisors, workspace);
 440   }
 441 }
 442
 443
 444 #ifdef DCT_FLOAT_SUPPORTED
 445
 446
 447 METHODDEF(void)
 448 convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace)
 449 {
 450   register FAST_FLOAT *workspaceptr;
 451   register JSAMPROW elemptr;
 452   register int elemr;
 453
 454   workspaceptr = workspace;
 455   for (elemr = 0; elemr < DCTSIZE; elemr++) {
 456     elemptr = sample_data[elemr] + start_col;
 457 #if DCTSIZE == 8                /* unroll the inner loop */
 458     *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
 459     *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
 460     *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
 461     *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
 462     *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
 463     *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
 464     *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
 465     *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
 466 #else
 467     {
 468       register int elemc;
 469       for (elemc = DCTSIZE; elemc > 0; elemc--)
 470         *workspaceptr++ = (FAST_FLOAT)
 471                           (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
 472     }
 473 #endif
 474   }
 475 }
 476
 477
 478 METHODDEF(void)
 479 quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)
 480 {
 481   register FAST_FLOAT temp;
 482   register int i;
 483   register JCOEFPTR output_ptr = coef_block;
 484
 485   for (i = 0; i < DCTSIZE2; i++) {
 486     /* Apply the quantization and scaling factor */
 487     temp = workspace[i] * divisors[i];
 488
 489     /* Round to nearest integer.
 490      * Since C does not specify the direction of rounding for negative
 491      * quotients, we have to force the dividend positive for portability.
 492      * The maximum coefficient size is +-16K (for 12-bit data), so this
 493      * code should work for either 16-bit or 32-bit ints.
 494      */
 495     output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
 496   }
 497 }
 498
 499
 500 METHODDEF(void)
 501 forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
 502                    JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
 503                    JDIMENSION start_row, JDIMENSION start_col,
 504                    JDIMENSION num_blocks)
 505 /* This version is used for floating-point DCT implementations. */
 506 {
 507   /* This routine is heavily used, so it's worth coding it tightly. */
 508   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
 509   FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
 510   FAST_FLOAT * workspace;
 511   JDIMENSION bi;
 512
 513
 514   /* Make sure the compiler doesn't look up these every pass */
 515   float_DCT_method_ptr do_dct = fdct->float_dct;
 516   float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
 517   float_quantize_method_ptr do_quantize = fdct->float_quantize;
 518   workspace = fdct->float_workspace;
 519
 520   sample_data += start_row;     /* fold in the vertical offset once */
 521
 522   for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
 523     /* Load data into workspace, applying unsigned->signed conversion */
 524     (*do_convsamp) (sample_data, start_col, workspace);
 525
 526     /* Perform the DCT */
 527     (*do_dct) (workspace);
 528
 529     /* Quantize/descale the coefficients, and store into coef_blocks[] */
 530     (*do_quantize) (coef_blocks[bi], divisors, workspace);
 531   }
 532 }
 533
 534 #endif /* DCT_FLOAT_SUPPORTED */
 535
 536
 537 /*
 538  * Initialize FDCT manager.
 539  */
 540
 541 GLOBAL(void)
 542 jinit_forward_dct (j_compress_ptr cinfo)
 543 {
 544   my_fdct_ptr fdct;
 545   int i;
 546
 547   fdct = (my_fdct_ptr)
 548     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 549                                 SIZEOF(my_fdct_controller));
 550   cinfo->fdct = (struct jpeg_forward_dct *) fdct;
 551   fdct->pub.start_pass = start_pass_fdctmgr;
 552
 553   /* First determine the DCT... */
 554   switch (cinfo->dct_method) {
 555 #ifdef DCT_ISLOW_SUPPORTED
 556   case JDCT_ISLOW:
 557     fdct->pub.forward_DCT = forward_DCT;
 558     if (jsimd_can_fdct_islow())
 559       fdct->dct = jsimd_fdct_islow;
 560     else
 561       fdct->dct = jpeg_fdct_islow;
 562     break;
 563 #endif
 564 #ifdef DCT_IFAST_SUPPORTED
 565   case JDCT_IFAST:
 566     fdct->pub.forward_DCT = forward_DCT;
 567     if (jsimd_can_fdct_ifast())
 568       fdct->dct = jsimd_fdct_ifast;
 569     else
 570       fdct->dct = jpeg_fdct_ifast;
 571     break;
 572 #endif
 573 #ifdef DCT_FLOAT_SUPPORTED
 574   case JDCT_FLOAT:
 575     fdct->pub.forward_DCT = forward_DCT_float;
 576     if (jsimd_can_fdct_float())
 577       fdct->float_dct = jsimd_fdct_float;
 578     else
 579       fdct->float_dct = jpeg_fdct_float;
 580     break;
 581 #endif
 582   default:
 583     ERREXIT(cinfo, JERR_NOT_COMPILED);
 584     break;
 585   }
 586
 587   /* ...then the supporting stages. */
 588   switch (cinfo->dct_method) {
 589 #ifdef DCT_ISLOW_SUPPORTED
 590   case JDCT_ISLOW:
 591 #endif
 592 #ifdef DCT_IFAST_SUPPORTED
 593   case JDCT_IFAST:
 594 #endif
 595 #if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
 596     if (jsimd_can_convsamp())
 597       fdct->convsamp = jsimd_convsamp;
 598     else
 599       fdct->convsamp = convsamp;
 600     if (jsimd_can_quantize())
 601       fdct->quantize = jsimd_quantize;
 602     else
 603       fdct->quantize = quantize;
 604     break;
 605 #endif
 606 #ifdef DCT_FLOAT_SUPPORTED
 607   case JDCT_FLOAT:
 608     if (jsimd_can_convsamp_float())
 609       fdct->float_convsamp = jsimd_convsamp_float;
 610     else
 611       fdct->float_convsamp = convsamp_float;
 612     if (jsimd_can_quantize_float())
 613       fdct->float_quantize = jsimd_quantize_float;
 614     else
 615       fdct->float_quantize = quantize_float;
 616     break;
 617 #endif
 618   default:
 619     ERREXIT(cinfo, JERR_NOT_COMPILED);
 620     break;
 621   }
 622
 623   /* Allocate workspace memory */
 624 #ifdef DCT_FLOAT_SUPPORTED
 625   if (cinfo->dct_method == JDCT_FLOAT)
 626     fdct->float_workspace = (FAST_FLOAT *)
 627       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 628                                   SIZEOF(FAST_FLOAT) * DCTSIZE2);
 629   else
 630 #endif
 631     fdct->workspace = (DCTELEM *)
 632       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 633                                   SIZEOF(DCTELEM) * DCTSIZE2);
 634
 635   /* Mark divisor tables unallocated */
 636   for (i = 0; i < NUM_QUANT_TBLS; i++) {
 637     fdct->divisors[i] = NULL;
 638 #ifdef DCT_FLOAT_SUPPORTED
 639     fdct->float_divisors[i] = NULL;
 640 #endif
 641   }
 642 }