src/loaders/jpg/tvgJpgd.cpp

   1 /*
   2  * Copyright (c) 2021 - 2023 the ThorVG project. All rights reserved.
   3
   4  * Permission is hereby granted, free of charge, to any person obtaining a copy
   5  * of this software and associated documentation files (the "Software"), to deal
   6  * in the Software without restriction, including without limitation the rights
   7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   8  * copies of the Software, and to permit persons to whom the Software is
   9  * furnished to do so, subject to the following conditions:
  10
  11  * The above copyright notice and this permission notice shall be included in all
  12  * copies or substantial portions of the Software.
  13
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  20  * SOFTWARE.
  21  */
  22
  23 // jpgd.cpp - C++ class for JPEG decompression.
  24 // Public domain, Rich Geldreich <richgel99@gmail.com>
  25 // Alex Evans: Linear memory allocator (taken from jpge.h).
  26 // v1.04, May. 19, 2012: Code tweaks to fix VS2008 static code analysis warnings (all looked harmless)
  27 //
  28 // Supports progressive and baseline sequential JPEG image files, and the most common chroma subsampling factors: Y, H1V1, H2V1, H1V2, and H2V2.
  29 //
  30 // Chroma upsampling quality: H2V2 is upsampled in the frequency domain, H2V1 and H1V2 are upsampled using point sampling.
  31 // Chroma upsampling reference: "Fast Scheme for Image Size Change in the Compressed Domain"
  32 // http://vision.ai.uiuc.edu/~dugad/research/dct/index.html
  33
  34 #include <memory.h>
  35 #include <stdlib.h>
  36 #include <stdio.h>
  37 #include <setjmp.h>
  38 #include <stdint.h>
  39 #include "tvgJpgd.h"
  40
  41 #ifdef _MSC_VER
  42   #pragma warning (disable : 4611) // warning C4611: interaction between '_setjmp' and C++ object destruction is non-portable
  43   #define JPGD_NORETURN __declspec(noreturn)
  44 #elif defined(__GNUC__)
  45   #define JPGD_NORETURN __attribute__ ((noreturn))
  46 #else
  47   #define JPGD_NORETURN
  48 #endif
  49
  50 /************************************************************************/
  51 /* Internal Class Implementation                                        */
  52 /************************************************************************/
  53
  54
  55 // Set to 1 to enable freq. domain chroma upsampling on images using H2V2 subsampling (0=faster nearest neighbor sampling).
  56 // This is slower, but results in higher quality on images with highly saturated colors.
  57 #define JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING 1
  58
  59 #define JPGD_ASSERT(x)
  60 #define JPGD_MAX(a,b) (((a)>(b)) ? (a) : (b))
  61 #define JPGD_MIN(a,b) (((a)<(b)) ? (a) : (b))
  62
  63 typedef int16_t jpgd_quant_t;
  64 typedef int16_t jpgd_block_t;
  65
  66 // Success/failure error codes.
  67 enum jpgd_status
  68 {
  69     JPGD_SUCCESS = 0, JPGD_FAILED = -1, JPGD_DONE = 1,
  70     JPGD_BAD_DHT_COUNTS = -256, JPGD_BAD_DHT_INDEX, JPGD_BAD_DHT_MARKER, JPGD_BAD_DQT_MARKER, JPGD_BAD_DQT_TABLE,
  71     JPGD_BAD_PRECISION, JPGD_BAD_HEIGHT, JPGD_BAD_WIDTH, JPGD_TOO_MANY_COMPONENTS,
  72     JPGD_BAD_SOF_LENGTH, JPGD_BAD_VARIABLE_MARKER, JPGD_BAD_DRI_LENGTH, JPGD_BAD_SOS_LENGTH,
  73     JPGD_BAD_SOS_COMP_ID, JPGD_W_EXTRA_BYTES_BEFORE_MARKER, JPGD_NO_ARITHMITIC_SUPPORT, JPGD_UNEXPECTED_MARKER,
  74     JPGD_NOT_JPEG, JPGD_UNSUPPORTED_MARKER, JPGD_BAD_DQT_LENGTH, JPGD_TOO_MANY_BLOCKS,
  75     JPGD_UNDEFINED_QUANT_TABLE, JPGD_UNDEFINED_HUFF_TABLE, JPGD_NOT_SINGLE_SCAN, JPGD_UNSUPPORTED_COLORSPACE,
  76     JPGD_UNSUPPORTED_SAMP_FACTORS, JPGD_DECODE_ERROR, JPGD_BAD_RESTART_MARKER, JPGD_ASSERTION_ERROR,
  77     JPGD_BAD_SOS_SPECTRAL, JPGD_BAD_SOS_SUCCESSIVE, JPGD_STREAM_READ, JPGD_NOTENOUGHMEM
  78 };
  79
  80 enum
  81 {
  82     JPGD_IN_BUF_SIZE = 8192, JPGD_MAX_BLOCKS_PER_MCU = 10, JPGD_MAX_HUFF_TABLES = 8, JPGD_MAX_QUANT_TABLES = 4,
  83     JPGD_MAX_COMPONENTS = 4, JPGD_MAX_COMPS_IN_SCAN = 4, JPGD_MAX_BLOCKS_PER_ROW = 8192, JPGD_MAX_HEIGHT = 16384, JPGD_MAX_WIDTH = 16384
  84 };
  85
  86 // Input stream interface.
  87 // Derive from this class to read input data from sources other than files or memory. Set m_eof_flag to true when no more data is available.
  88 // The decoder is rather greedy: it will keep on calling this method until its internal input buffer is full, or until the EOF flag is set.
  89 // It the input stream contains data after the JPEG stream's EOI (end of image) marker it will probably be pulled into the internal buffer.
  90 // Call the get_total_bytes_read() method to determine the actual size of the JPEG stream after successful decoding.
  91 struct jpeg_decoder_stream
  92 {
  93     jpeg_decoder_stream() { }
  94     virtual ~jpeg_decoder_stream() { }
  95
  96     // The read() method is called when the internal input buffer is empty.
  97     // Parameters:
  98     // pBuf - input buffer
  99     // max_bytes_to_read - maximum bytes that can be written to pBuf
 100     // pEOF_flag - set this to true if at end of stream (no more bytes remaining)
 101     // Returns -1 on error, otherwise return the number of bytes actually written to the buffer (which may be 0).
 102     // Notes: This method will be called in a loop until you set *pEOF_flag to true or the internal buffer is full.
 103     virtual int read(uint8_t *pBuf, int max_bytes_to_read, bool *pEOF_flag) = 0;
 104 };
 105
 106
 107 // stdio FILE stream class.
 108 class jpeg_decoder_file_stream : public jpeg_decoder_stream
 109 {
 110     jpeg_decoder_file_stream(const jpeg_decoder_file_stream &);
 111     jpeg_decoder_file_stream &operator =(const jpeg_decoder_file_stream &);
 112
 113     FILE *m_pFile = nullptr;
 114     bool m_eof_flag = false;
 115     bool m_error_flag = false;
 116
 117 public:
 118     jpeg_decoder_file_stream() {}
 119     virtual ~jpeg_decoder_file_stream();
 120     bool open(const char *Pfilename);
 121     void close();
 122     virtual int read(uint8_t *pBuf, int max_bytes_to_read, bool *pEOF_flag);
 123   };
 124
 125
 126 // Memory stream class.
 127 class jpeg_decoder_mem_stream : public jpeg_decoder_stream
 128 {
 129     const uint8_t *m_pSrc_data;
 130     uint32_t m_ofs, m_size;
 131
 132 public:
 133     jpeg_decoder_mem_stream() : m_pSrc_data(nullptr), m_ofs(0), m_size(0) {}
 134     jpeg_decoder_mem_stream(const uint8_t *pSrc_data, uint32_t size) : m_pSrc_data(pSrc_data), m_ofs(0), m_size(size) {}
 135     virtual ~jpeg_decoder_mem_stream() {}
 136     bool open(const uint8_t *pSrc_data, uint32_t size);
 137     void close() { m_pSrc_data = nullptr; m_ofs = 0; m_size = 0; }
 138     virtual int read(uint8_t *pBuf, int max_bytes_to_read, bool *pEOF_flag);
 139 };
 140
 141
 142 class jpeg_decoder
 143 {
 144 public:
 145     // Call get_error_code() after constructing to determine if the stream is valid or not. You may call the get_width(), get_height(), etc.
 146     // methods after the constructor is called. You may then either destruct the object, or begin decoding the image by calling begin_decoding(), then decode() on each scanline.
 147     jpeg_decoder(jpeg_decoder_stream *pStream);
 148     ~jpeg_decoder();
 149
 150     // Call this method after constructing the object to begin decompression.
 151     // If JPGD_SUCCESS is returned you may then call decode() on each scanline.
 152     int begin_decoding();
 153     // Returns the next scan line.
 154     // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (get_bytes_per_pixel() will return 1).
 155     // Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and get_bytes_per_pixel() will return 4).
 156     // Returns JPGD_SUCCESS if a scan line has been returned.
 157     // Returns JPGD_DONE if all scan lines have been returned.
 158     // Returns JPGD_FAILED if an error occurred. Call get_error_code() for a more info.
 159     int decode(const void** pScan_line, uint32_t* pScan_line_len);
 160     inline jpgd_status get_error_code() const { return m_error_code; }
 161     inline int get_width() const { return m_image_x_size; }
 162     inline int get_height() const { return m_image_y_size; }
 163     inline int get_num_components() const { return m_comps_in_frame; }
 164     inline int get_bytes_per_pixel() const { return m_dest_bytes_per_pixel; }
 165     inline int get_bytes_per_scan_line() const { return m_image_x_size * get_bytes_per_pixel(); }
 166     // Returns the total number of bytes actually consumed by the decoder (which should equal the actual size of the JPEG file).
 167     inline int get_total_bytes_read() const { return m_total_bytes_read; }
 168
 169 private:
 170     jpeg_decoder(const jpeg_decoder &);
 171     jpeg_decoder &operator =(const jpeg_decoder &);
 172
 173     typedef void (*pDecode_block_func)(jpeg_decoder *, int, int, int);
 174
 175     struct huff_tables
 176     {
 177       bool ac_table;
 178       uint32_t  look_up[256];
 179       uint32_t  look_up2[256];
 180       uint8_t code_size[256];
 181       uint32_t  tree[512];
 182     };
 183
 184     struct coeff_buf
 185     {
 186       uint8_t *pData;
 187       int block_num_x, block_num_y;
 188       int block_len_x, block_len_y;
 189       int block_size;
 190     };
 191
 192     struct mem_block
 193     {
 194       mem_block *m_pNext;
 195       size_t m_used_count;
 196       size_t m_size;
 197       char m_data[1];
 198     };
 199
 200     jmp_buf m_jmp_state;
 201     mem_block *m_pMem_blocks;
 202     int m_image_x_size;
 203     int m_image_y_size;
 204     jpeg_decoder_stream *m_pStream;
 205     int m_progressive_flag;
 206     uint8_t m_huff_ac[JPGD_MAX_HUFF_TABLES];
 207     uint8_t* m_huff_num[JPGD_MAX_HUFF_TABLES];      // pointer to number of Huffman codes per bit size
 208     uint8_t* m_huff_val[JPGD_MAX_HUFF_TABLES];      // pointer to Huffman codes per bit size
 209     jpgd_quant_t* m_quant[JPGD_MAX_QUANT_TABLES]; // pointer to quantization tables
 210     int m_scan_type;                              // Gray, Yh1v1, Yh1v2, Yh2v1, Yh2v2 (CMYK111, CMYK4114 no longer supported)
 211     int m_comps_in_frame;                         // # of components in frame
 212     int m_comp_h_samp[JPGD_MAX_COMPONENTS];       // component's horizontal sampling factor
 213     int m_comp_v_samp[JPGD_MAX_COMPONENTS];       // component's vertical sampling factor
 214     int m_comp_quant[JPGD_MAX_COMPONENTS];        // component's quantization table selector
 215     int m_comp_ident[JPGD_MAX_COMPONENTS];        // component's ID
 216     int m_comp_h_blocks[JPGD_MAX_COMPONENTS];
 217     int m_comp_v_blocks[JPGD_MAX_COMPONENTS];
 218     int m_comps_in_scan;                          // # of components in scan
 219     int m_comp_list[JPGD_MAX_COMPS_IN_SCAN];      // components in this scan
 220     int m_comp_dc_tab[JPGD_MAX_COMPONENTS];       // component's DC Huffman coding table selector
 221     int m_comp_ac_tab[JPGD_MAX_COMPONENTS];       // component's AC Huffman coding table selector
 222     int m_spectral_start;                         // spectral selection start
 223     int m_spectral_end;                           // spectral selection end
 224     int m_successive_low;                         // successive approximation low
 225     int m_successive_high;                        // successive approximation high
 226     int m_max_mcu_x_size;                         // MCU's max. X size in pixels
 227     int m_max_mcu_y_size;                         // MCU's max. Y size in pixels
 228     int m_blocks_per_mcu;
 229     int m_max_blocks_per_row;
 230     int m_mcus_per_row, m_mcus_per_col;
 231     int m_mcu_org[JPGD_MAX_BLOCKS_PER_MCU];
 232     int m_total_lines_left;                       // total # lines left in image
 233     int m_mcu_lines_left;                         // total # lines left in this MCU
 234     int m_real_dest_bytes_per_scan_line;
 235     int m_dest_bytes_per_scan_line;               // rounded up
 236     int m_dest_bytes_per_pixel;                   // 4 (RGB) or 1 (Y)
 237     huff_tables* m_pHuff_tabs[JPGD_MAX_HUFF_TABLES];
 238     coeff_buf* m_dc_coeffs[JPGD_MAX_COMPONENTS];
 239     coeff_buf* m_ac_coeffs[JPGD_MAX_COMPONENTS];
 240     int m_eob_run;
 241     int m_block_y_mcu[JPGD_MAX_COMPONENTS];
 242     uint8_t* m_pIn_buf_ofs;
 243     int m_in_buf_left;
 244     int m_tem_flag;
 245     bool m_eof_flag;
 246     uint8_t m_in_buf_pad_start[128];
 247     uint8_t m_in_buf[JPGD_IN_BUF_SIZE + 128];
 248     uint8_t m_in_buf_pad_end[128];
 249     int m_bits_left;
 250     uint32_t m_bit_buf;
 251     int m_restart_interval;
 252     int m_restarts_left;
 253     int m_next_restart_num;
 254     int m_max_mcus_per_row;
 255     int m_max_blocks_per_mcu;
 256     int m_expanded_blocks_per_mcu;
 257     int m_expanded_blocks_per_row;
 258     int m_expanded_blocks_per_component;
 259     bool  m_freq_domain_chroma_upsample;
 260     int m_max_mcus_per_col;
 261     uint32_t m_last_dc_val[JPGD_MAX_COMPONENTS];
 262     jpgd_block_t* m_pMCU_coefficients;
 263     int m_mcu_block_max_zag[JPGD_MAX_BLOCKS_PER_MCU];
 264     uint8_t* m_pSample_buf;
 265     int m_crr[256];
 266     int m_cbb[256];
 267     int m_crg[256];
 268     int m_cbg[256];
 269     uint8_t* m_pScan_line_0;
 270     uint8_t* m_pScan_line_1;
 271     jpgd_status m_error_code;
 272     bool m_ready_flag;
 273     int m_total_bytes_read;
 274
 275     void free_all_blocks();
 276     JPGD_NORETURN void stop_decoding(jpgd_status status);
 277     void *alloc(size_t n, bool zero = false);
 278     void word_clear(void *p, uint16_t c, uint32_t n);
 279     void prep_in_buffer();
 280     void read_dht_marker();
 281     void read_dqt_marker();
 282     void read_sof_marker();
 283     void skip_variable_marker();
 284     void read_dri_marker();
 285     void read_sos_marker();
 286     int next_marker();
 287     int process_markers();
 288     void locate_soi_marker();
 289     void locate_sof_marker();
 290     int locate_sos_marker();
 291     void init(jpeg_decoder_stream * pStream);
 292     void create_look_ups();
 293     void fix_in_buffer();
 294     void transform_mcu(int mcu_row);
 295     void transform_mcu_expand(int mcu_row);
 296     coeff_buf* coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y);
 297     inline jpgd_block_t *coeff_buf_getp(coeff_buf *cb, int block_x, int block_y);
 298     void load_next_row();
 299     void decode_next_row();
 300     void make_huff_table(int index, huff_tables *pH);
 301     void check_quant_tables();
 302     void check_huff_tables();
 303     void calc_mcu_block_order();
 304     int init_scan();
 305     void init_frame();
 306     void process_restart();
 307     void decode_scan(pDecode_block_func decode_block_func);
 308     void init_progressive();
 309     void init_sequential();
 310     void decode_start();
 311     void decode_init(jpeg_decoder_stream * pStream);
 312     void H2V2Convert();
 313     void H2V1Convert();
 314     void H1V2Convert();
 315     void H1V1Convert();
 316     void gray_convert();
 317     void expanded_convert();
 318     void find_eoi();
 319     inline uint32_t get_char();
 320     inline uint32_t get_char(bool *pPadding_flag);
 321     inline void stuff_char(uint8_t q);
 322     inline uint8_t get_octet();
 323     inline uint32_t get_bits(int num_bits);
 324     inline uint32_t get_bits_no_markers(int numbits);
 325     inline int huff_decode(huff_tables *pH);
 326     inline int huff_decode(huff_tables *pH, int& extrabits);
 327     static inline uint8_t clamp(int i);
 328     static void decode_block_dc_first(jpeg_decoder *pD, int component_id, int block_x, int block_y);
 329     static void decode_block_dc_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y);
 330     static void decode_block_ac_first(jpeg_decoder *pD, int component_id, int block_x, int block_y);
 331     static void decode_block_ac_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y);
 332 };
 333
 334
 335 // DCT coefficients are stored in this sequence.
 336 static int g_ZAG[64] = {  0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 };
 337
 338 enum JPEG_MARKER
 339 {
 340   M_SOF0  = 0xC0, M_SOF1  = 0xC1, M_SOF2  = 0xC2, M_SOF3  = 0xC3, M_SOF5  = 0xC5, M_SOF6  = 0xC6, M_SOF7  = 0xC7, M_JPG   = 0xC8,
 341   M_SOF9  = 0xC9, M_SOF10 = 0xCA, M_SOF11 = 0xCB, M_SOF13 = 0xCD, M_SOF14 = 0xCE, M_SOF15 = 0xCF, M_DHT   = 0xC4, M_DAC   = 0xCC,
 342   M_RST0  = 0xD0, M_RST1  = 0xD1, M_RST2  = 0xD2, M_RST3  = 0xD3, M_RST4  = 0xD4, M_RST5  = 0xD5, M_RST6  = 0xD6, M_RST7  = 0xD7,
 343   M_SOI   = 0xD8, M_EOI   = 0xD9, M_SOS   = 0xDA, M_DQT   = 0xDB, M_DNL   = 0xDC, M_DRI   = 0xDD, M_DHP   = 0xDE, M_EXP   = 0xDF,
 344   M_APP0  = 0xE0, M_APP15 = 0xEF, M_JPG0  = 0xF0, M_JPG13 = 0xFD, M_COM   = 0xFE, M_TEM   = 0x01, M_ERROR = 0x100, RST0   = 0xD0
 345 };
 346
 347 enum JPEG_SUBSAMPLING { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 };
 348
 349 #define CONST_BITS  13
 350 #define PASS1_BITS  2
 351 #define SCALEDONE ((int32_t)1)
 352 #define DESCALE(x,n)  (((x) + (SCALEDONE << ((n)-1))) >> (n))
 353 #define DESCALE_ZEROSHIFT(x,n)  (((x) + (128 << (n)) + (SCALEDONE << ((n)-1))) >> (n))
 354 #define MULTIPLY(var, cnst)  ((var) * (cnst))
 355 #define CLAMP(i) ((static_cast<uint32_t>(i) > 255) ? (((~i) >> 31) & 0xFF) : (i))
 356
 357 #define FIX_0_298631336  ((int32_t)2446)        /* FIX(0.298631336) */
 358 #define FIX_0_390180644  ((int32_t)3196)        /* FIX(0.390180644) */
 359 #define FIX_0_541196100  ((int32_t)4433)        /* FIX(0.541196100) */
 360 #define FIX_0_765366865  ((int32_t)6270)        /* FIX(0.765366865) */
 361 #define FIX_0_899976223  ((int32_t)7373)        /* FIX(0.899976223) */
 362 #define FIX_1_175875602  ((int32_t)9633)        /* FIX(1.175875602) */
 363 #define FIX_1_501321110  ((int32_t)12299)       /* FIX(1.501321110) */
 364 #define FIX_1_847759065  ((int32_t)15137)       /* FIX(1.847759065) */
 365 #define FIX_1_961570560  ((int32_t)16069)       /* FIX(1.961570560) */
 366 #define FIX_2_053119869  ((int32_t)16819)       /* FIX(2.053119869) */
 367 #define FIX_2_562915447  ((int32_t)20995)       /* FIX(2.562915447) */
 368 #define FIX_3_072711026  ((int32_t)25172)       /* FIX(3.072711026) */
 369
 370
 371 // Compiler creates a fast path 1D IDCT for X non-zero columns
 372 template <int NONZERO_COLS>
 373 struct Row
 374 {
 375     static void idct(int* pTemp, const jpgd_block_t* pSrc)
 376     {
 377         // ACCESS_COL() will be optimized at compile time to either an array access, or 0.
 378         #define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)pSrc[x] : 0)
 379
 380         const int z2 = ACCESS_COL(2), z3 = ACCESS_COL(6);
 381         const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
 382         const int tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
 383         const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
 384
 385         const int tmp0 = static_cast<unsigned int>(ACCESS_COL(0) + ACCESS_COL(4)) << CONST_BITS;
 386         const int tmp1 = static_cast<unsigned int>(ACCESS_COL(0) - ACCESS_COL(4)) << CONST_BITS;
 387
 388         const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
 389
 390         const int atmp0 = ACCESS_COL(7), atmp1 = ACCESS_COL(5), atmp2 = ACCESS_COL(3), atmp3 = ACCESS_COL(1);
 391
 392         const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
 393         const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602);
 394
 395         const int az1 = MULTIPLY(bz1, - FIX_0_899976223);
 396         const int az2 = MULTIPLY(bz2, - FIX_2_562915447);
 397         const int az3 = MULTIPLY(bz3, - FIX_1_961570560) + bz5;
 398         const int az4 = MULTIPLY(bz4, - FIX_0_390180644) + bz5;
 399
 400         const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3;
 401         const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4;
 402         const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3;
 403         const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4;
 404
 405         pTemp[0] = DESCALE(tmp10 + btmp3, CONST_BITS-PASS1_BITS);
 406         pTemp[7] = DESCALE(tmp10 - btmp3, CONST_BITS-PASS1_BITS);
 407         pTemp[1] = DESCALE(tmp11 + btmp2, CONST_BITS-PASS1_BITS);
 408         pTemp[6] = DESCALE(tmp11 - btmp2, CONST_BITS-PASS1_BITS);
 409         pTemp[2] = DESCALE(tmp12 + btmp1, CONST_BITS-PASS1_BITS);
 410         pTemp[5] = DESCALE(tmp12 - btmp1, CONST_BITS-PASS1_BITS);
 411         pTemp[3] = DESCALE(tmp13 + btmp0, CONST_BITS-PASS1_BITS);
 412         pTemp[4] = DESCALE(tmp13 - btmp0, CONST_BITS-PASS1_BITS);
 413     }
 414 };
 415
 416
 417 template <>
 418 struct Row<0>
 419 {
 420     static void idct(int* pTemp, const jpgd_block_t* pSrc)
 421     {
 422 #ifdef _MSC_VER
 423       pTemp; pSrc;
 424 #endif
 425     }
 426 };
 427
 428
 429 template <>
 430 struct Row<1>
 431 {
 432     static void idct(int* pTemp, const jpgd_block_t* pSrc)
 433     {
 434         const int dcval = (pSrc[0] << PASS1_BITS);
 435
 436         pTemp[0] = dcval;
 437         pTemp[1] = dcval;
 438         pTemp[2] = dcval;
 439         pTemp[3] = dcval;
 440         pTemp[4] = dcval;
 441         pTemp[5] = dcval;
 442         pTemp[6] = dcval;
 443         pTemp[7] = dcval;
 444     }
 445 };
 446
 447
 448 // Compiler creates a fast path 1D IDCT for X non-zero rows
 449 template <int NONZERO_ROWS>
 450 struct Col
 451 {
 452     static void idct(uint8_t* pDst_ptr, const int* pTemp)
 453     {
 454         // ACCESS_ROW() will be optimized at compile time to either an array access, or 0.
 455         #define ACCESS_ROW(x) (((x) < NONZERO_ROWS) ? pTemp[x * 8] : 0)
 456
 457         const int z2 = ACCESS_ROW(2);
 458         const int z3 = ACCESS_ROW(6);
 459
 460         const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
 461         const int tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
 462         const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
 463
 464         const int tmp0 = static_cast<unsigned int>(ACCESS_ROW(0) + ACCESS_ROW(4)) << CONST_BITS;
 465         const int tmp1 = static_cast<unsigned int>(ACCESS_ROW(0) - ACCESS_ROW(4)) << CONST_BITS;
 466
 467         const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
 468
 469         const int atmp0 = ACCESS_ROW(7), atmp1 = ACCESS_ROW(5), atmp2 = ACCESS_ROW(3), atmp3 = ACCESS_ROW(1);
 470
 471         const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
 472         const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602);
 473
 474         const int az1 = MULTIPLY(bz1, - FIX_0_899976223);
 475         const int az2 = MULTIPLY(bz2, - FIX_2_562915447);
 476         const int az3 = MULTIPLY(bz3, - FIX_1_961570560) + bz5;
 477         const int az4 = MULTIPLY(bz4, - FIX_0_390180644) + bz5;
 478
 479         const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3;
 480         const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4;
 481         const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3;
 482         const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4;
 483
 484         int i = DESCALE_ZEROSHIFT(tmp10 + btmp3, CONST_BITS+PASS1_BITS+3);
 485         pDst_ptr[8*0] = (uint8_t)CLAMP(i);
 486
 487         i = DESCALE_ZEROSHIFT(tmp10 - btmp3, CONST_BITS+PASS1_BITS+3);
 488         pDst_ptr[8*7] = (uint8_t)CLAMP(i);
 489
 490         i = DESCALE_ZEROSHIFT(tmp11 + btmp2, CONST_BITS+PASS1_BITS+3);
 491         pDst_ptr[8*1] = (uint8_t)CLAMP(i);
 492
 493         i = DESCALE_ZEROSHIFT(tmp11 - btmp2, CONST_BITS+PASS1_BITS+3);
 494         pDst_ptr[8*6] = (uint8_t)CLAMP(i);
 495
 496         i = DESCALE_ZEROSHIFT(tmp12 + btmp1, CONST_BITS+PASS1_BITS+3);
 497         pDst_ptr[8*2] = (uint8_t)CLAMP(i);
 498
 499         i = DESCALE_ZEROSHIFT(tmp12 - btmp1, CONST_BITS+PASS1_BITS+3);
 500         pDst_ptr[8*5] = (uint8_t)CLAMP(i);
 501
 502         i = DESCALE_ZEROSHIFT(tmp13 + btmp0, CONST_BITS+PASS1_BITS+3);
 503         pDst_ptr[8*3] = (uint8_t)CLAMP(i);
 504
 505         i = DESCALE_ZEROSHIFT(tmp13 - btmp0, CONST_BITS+PASS1_BITS+3);
 506         pDst_ptr[8*4] = (uint8_t)CLAMP(i);
 507     }
 508 };
 509
 510
 511 template <>
 512 struct Col<1>
 513 {
 514     static void idct(uint8_t* pDst_ptr, const int* pTemp)
 515     {
 516         int dcval = DESCALE_ZEROSHIFT(pTemp[0], PASS1_BITS+3);
 517         const uint8_t dcval_clamped = (uint8_t)CLAMP(dcval);
 518         pDst_ptr[0*8] = dcval_clamped;
 519         pDst_ptr[1*8] = dcval_clamped;
 520         pDst_ptr[2*8] = dcval_clamped;
 521         pDst_ptr[3*8] = dcval_clamped;
 522         pDst_ptr[4*8] = dcval_clamped;
 523         pDst_ptr[5*8] = dcval_clamped;
 524         pDst_ptr[6*8] = dcval_clamped;
 525         pDst_ptr[7*8] = dcval_clamped;
 526     }
 527 };
 528
 529
 530 static const uint8_t s_idct_row_table[] = {
 531     1,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, 2,1,0,0,0,0,0,0, 2,1,1,0,0,0,0,0, 2,2,1,0,0,0,0,0, 3,2,1,0,0,0,0,0, 4,2,1,0,0,0,0,0, 4,3,1,0,0,0,0,0,
 532     4,3,2,0,0,0,0,0, 4,3,2,1,0,0,0,0, 4,3,2,1,1,0,0,0, 4,3,2,2,1,0,0,0, 4,3,3,2,1,0,0,0, 4,4,3,2,1,0,0,0, 5,4,3,2,1,0,0,0, 6,4,3,2,1,0,0,0,
 533     6,5,3,2,1,0,0,0, 6,5,4,2,1,0,0,0, 6,5,4,3,1,0,0,0, 6,5,4,3,2,0,0,0, 6,5,4,3,2,1,0,0, 6,5,4,3,2,1,1,0, 6,5,4,3,2,2,1,0, 6,5,4,3,3,2,1,0,
 534     6,5,4,4,3,2,1,0, 6,5,5,4,3,2,1,0, 6,6,5,4,3,2,1,0, 7,6,5,4,3,2,1,0, 8,6,5,4,3,2,1,0, 8,7,5,4,3,2,1,0, 8,7,6,4,3,2,1,0, 8,7,6,5,3,2,1,0,
 535     8,7,6,5,4,2,1,0, 8,7,6,5,4,3,1,0, 8,7,6,5,4,3,2,0, 8,7,6,5,4,3,2,1, 8,7,6,5,4,3,2,2, 8,7,6,5,4,3,3,2, 8,7,6,5,4,4,3,2, 8,7,6,5,5,4,3,2,
 536     8,7,6,6,5,4,3,2, 8,7,7,6,5,4,3,2, 8,8,7,6,5,4,3,2, 8,8,8,6,5,4,3,2, 8,8,8,7,5,4,3,2, 8,8,8,7,6,4,3,2, 8,8,8,7,6,5,3,2, 8,8,8,7,6,5,4,2,
 537     8,8,8,7,6,5,4,3, 8,8,8,7,6,5,4,4, 8,8,8,7,6,5,5,4, 8,8,8,7,6,6,5,4, 8,8,8,7,7,6,5,4, 8,8,8,8,7,6,5,4, 8,8,8,8,8,6,5,4, 8,8,8,8,8,7,5,4,
 538     8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8,
 539 };
 540
 541
 542 static const uint8_t s_idct_col_table[] = { 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 };
 543
 544
 545 void idct(const jpgd_block_t* pSrc_ptr, uint8_t* pDst_ptr, int block_max_zag)
 546 {
 547     JPGD_ASSERT(block_max_zag >= 1);
 548     JPGD_ASSERT(block_max_zag <= 64);
 549
 550     if (block_max_zag <= 1) {
 551         int k = ((pSrc_ptr[0] + 4) >> 3) + 128;
 552         k = CLAMP(k);
 553         k = k | (k<<8);
 554         k = k | (k<<16);
 555         for (int i = 8; i > 0; i--) {
 556             *(int*)&pDst_ptr[0] = k;
 557             *(int*)&pDst_ptr[4] = k;
 558             pDst_ptr += 8;
 559         }
 560       return;
 561     }
 562
 563     int temp[64];
 564     const jpgd_block_t* pSrc = pSrc_ptr;
 565     int* pTemp = temp;
 566     const uint8_t* pRow_tab = &s_idct_row_table[(block_max_zag - 1) * 8];
 567     int i;
 568     for (i = 8; i > 0; i--, pRow_tab++) {
 569         switch (*pRow_tab) {
 570             case 0: Row<0>::idct(pTemp, pSrc); break;
 571             case 1: Row<1>::idct(pTemp, pSrc); break;
 572             case 2: Row<2>::idct(pTemp, pSrc); break;
 573             case 3: Row<3>::idct(pTemp, pSrc); break;
 574             case 4: Row<4>::idct(pTemp, pSrc); break;
 575             case 5: Row<5>::idct(pTemp, pSrc); break;
 576             case 6: Row<6>::idct(pTemp, pSrc); break;
 577             case 7: Row<7>::idct(pTemp, pSrc); break;
 578             case 8: Row<8>::idct(pTemp, pSrc); break;
 579         }
 580         pSrc += 8;
 581         pTemp += 8;
 582     }
 583
 584     pTemp = temp;
 585
 586     const int nonzero_rows = s_idct_col_table[block_max_zag - 1];
 587     for (i = 8; i > 0; i--) {
 588         switch (nonzero_rows) {
 589             case 1: Col<1>::idct(pDst_ptr, pTemp); break;
 590             case 2: Col<2>::idct(pDst_ptr, pTemp); break;
 591             case 3: Col<3>::idct(pDst_ptr, pTemp); break;
 592             case 4: Col<4>::idct(pDst_ptr, pTemp); break;
 593             case 5: Col<5>::idct(pDst_ptr, pTemp); break;
 594             case 6: Col<6>::idct(pDst_ptr, pTemp); break;
 595             case 7: Col<7>::idct(pDst_ptr, pTemp); break;
 596             case 8: Col<8>::idct(pDst_ptr, pTemp); break;
 597         }
 598         pTemp++;
 599         pDst_ptr++;
 600     }
 601 }
 602
 603
 604 void idct_4x4(const jpgd_block_t* pSrc_ptr, uint8_t* pDst_ptr)
 605 {
 606     int temp[64];
 607     int* pTemp = temp;
 608     const jpgd_block_t* pSrc = pSrc_ptr;
 609
 610     for (int i = 4; i > 0; i--) {
 611         Row<4>::idct(pTemp, pSrc);
 612         pSrc += 8;
 613         pTemp += 8;
 614     }
 615
 616     pTemp = temp;
 617
 618     for (int i = 8; i > 0; i--) {
 619         Col<4>::idct(pDst_ptr, pTemp);
 620         pTemp++;
 621         pDst_ptr++;
 622     }
 623 }
 624
 625
 626 // Retrieve one character from the input stream.
 627 inline uint32_t jpeg_decoder::get_char()
 628 {
 629     // Any bytes remaining in buffer?
 630     if (!m_in_buf_left) {
 631         // Try to get more bytes.
 632         prep_in_buffer();
 633         // Still nothing to get?
 634         if (!m_in_buf_left) {
 635             // Pad the end of the stream with 0xFF 0xD9 (EOI marker)
 636             int t = m_tem_flag;
 637             m_tem_flag ^= 1;
 638             if (t) return 0xD9;
 639             else return 0xFF;
 640         }
 641     }
 642     uint32_t c = *m_pIn_buf_ofs++;
 643     m_in_buf_left--;
 644     return c;
 645 }
 646
 647
 648 // Same as previous method, except can indicate if the character is a pad character or not.
 649 inline uint32_t jpeg_decoder::get_char(bool *pPadding_flag)
 650 {
 651     if (!m_in_buf_left) {
 652         prep_in_buffer();
 653         if (!m_in_buf_left) {
 654             *pPadding_flag = true;
 655             int t = m_tem_flag;
 656             m_tem_flag ^= 1;
 657             if (t) return 0xD9;
 658             else return 0xFF;
 659         }
 660     }
 661     *pPadding_flag = false;
 662     uint32_t c = *m_pIn_buf_ofs++;
 663     m_in_buf_left--;
 664
 665     return c;
 666 }
 667
 668
 669 // Inserts a previously retrieved character back into the input buffer.
 670 inline void jpeg_decoder::stuff_char(uint8_t q)
 671 {
 672     *(--m_pIn_buf_ofs) = q;
 673     m_in_buf_left++;
 674 }
 675
 676
 677 // Retrieves one character from the input stream, but does not read past markers. Will continue to return 0xFF when a marker is encountered.
 678 inline uint8_t jpeg_decoder::get_octet()
 679 {
 680     bool padding_flag;
 681     int c = get_char(&padding_flag);
 682
 683     if (c == 0xFF) {
 684         if (padding_flag) return 0xFF;
 685
 686         c = get_char(&padding_flag);
 687         if (padding_flag) {
 688             stuff_char(0xFF);
 689             return 0xFF;
 690         }
 691         if (c == 0x00) return 0xFF;
 692         else {
 693             stuff_char(static_cast<uint8_t>(c));
 694             stuff_char(0xFF);
 695             return 0xFF;
 696         }
 697     }
 698     return static_cast<uint8_t>(c);
 699 }
 700
 701
 702 // Retrieves a variable number of bits from the input stream. Does not recognize markers.
 703 inline uint32_t jpeg_decoder::get_bits(int num_bits)
 704 {
 705     if (!num_bits) return 0;
 706
 707     uint32_t i = m_bit_buf >> (32 - num_bits);
 708
 709     if ((m_bits_left -= num_bits) <= 0) {
 710         m_bit_buf <<= (num_bits += m_bits_left);
 711         uint32_t c1 = get_char();
 712         uint32_t c2 = get_char();
 713         m_bit_buf = (m_bit_buf & 0xFFFF0000) | (c1 << 8) | c2;
 714         m_bit_buf <<= -m_bits_left;
 715         m_bits_left += 16;
 716         JPGD_ASSERT(m_bits_left >= 0);
 717     }
 718     else m_bit_buf <<= num_bits;
 719
 720     return i;
 721 }
 722
 723
 724 // Retrieves a variable number of bits from the input stream. Markers will not be read into the input bit buffer. Instead, an infinite number of all 1's will be returned when a marker is encountered.
 725 inline uint32_t jpeg_decoder::get_bits_no_markers(int num_bits)
 726 {
 727     if (!num_bits)return 0;
 728
 729     uint32_t i = m_bit_buf >> (32 - num_bits);
 730
 731     if ((m_bits_left -= num_bits) <= 0) {
 732         m_bit_buf <<= (num_bits += m_bits_left);
 733         if ((m_in_buf_left < 2) || (m_pIn_buf_ofs[0] == 0xFF) || (m_pIn_buf_ofs[1] == 0xFF)) {
 734             uint32_t c1 = get_octet();
 735             uint32_t c2 = get_octet();
 736             m_bit_buf |= (c1 << 8) | c2;
 737         } else {
 738             m_bit_buf |= ((uint32_t)m_pIn_buf_ofs[0] << 8) | m_pIn_buf_ofs[1];
 739             m_in_buf_left -= 2;
 740             m_pIn_buf_ofs += 2;
 741         }
 742         m_bit_buf <<= -m_bits_left;
 743         m_bits_left += 16;
 744         JPGD_ASSERT(m_bits_left >= 0);
 745     } else m_bit_buf <<= num_bits;
 746
 747     return i;
 748 }
 749
 750
 751 // Decodes a Huffman encoded symbol.
 752 inline int jpeg_decoder::huff_decode(huff_tables *pH)
 753 {
 754     int symbol;
 755
 756     // Check first 8-bits: do we have a complete symbol?
 757     if ((symbol = pH->look_up[m_bit_buf >> 24]) < 0) {
 758         // Decode more bits, use a tree traversal to find symbol.
 759         int ofs = 23;
 760         do {
 761             symbol = pH->tree[-(int)(symbol + ((m_bit_buf >> ofs) & 1))];
 762             ofs--;
 763         } while (symbol < 0);
 764         get_bits_no_markers(8 + (23 - ofs));
 765     } else get_bits_no_markers(pH->code_size[symbol]);
 766
 767   return symbol;
 768 }
 769
 770
 771 // Decodes a Huffman encoded symbol.
 772 inline int jpeg_decoder::huff_decode(huff_tables *pH, int& extra_bits)
 773 {
 774     int symbol;
 775
 776     // Check first 8-bits: do we have a complete symbol?
 777     if ((symbol = pH->look_up2[m_bit_buf >> 24]) < 0) {
 778         // Use a tree traversal to find symbol.
 779         int ofs = 23;
 780         do {
 781             symbol = pH->tree[-(int)(symbol + ((m_bit_buf >> ofs) & 1))];
 782             ofs--;
 783         } while (symbol < 0);
 784
 785         get_bits_no_markers(8 + (23 - ofs));
 786         extra_bits = get_bits_no_markers(symbol & 0xF);
 787     } else {
 788         JPGD_ASSERT(((symbol >> 8) & 31) == pH->code_size[symbol & 255] + ((symbol & 0x8000) ? (symbol & 15) : 0));
 789
 790         if (symbol & 0x8000) {
 791             get_bits_no_markers((symbol >> 8) & 31);
 792             extra_bits = symbol >> 16;
 793         } else  {
 794             int code_size = (symbol >> 8) & 31;
 795             int num_extra_bits = symbol & 0xF;
 796             int bits = code_size + num_extra_bits;
 797             if (bits <= (m_bits_left + 16)) extra_bits = get_bits_no_markers(bits) & ((1 << num_extra_bits) - 1);
 798             else {
 799                 get_bits_no_markers(code_size);
 800                 extra_bits = get_bits_no_markers(num_extra_bits);
 801             }
 802         }
 803         symbol &= 0xFF;
 804     }
 805     return symbol;
 806 }
 807
 808
 809 // Tables and macro used to fully decode the DPCM differences.
 810 static const int s_extend_test[16] = { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 };
 811 static const unsigned int s_extend_offset[16] = { 0, ((~0u)<<1) + 1, ((~0u)<<2) + 1, ((~0u)<<3) + 1, ((~0u)<<4) + 1, ((~0u)<<5) + 1, ((~0u)<<6) + 1, ((~0u)<<7) + 1, ((~0u)<<8) + 1, ((~0u)<<9) + 1, ((~0u)<<10) + 1, ((~0u)<<11) + 1, ((~0u)<<12) + 1, ((~0u)<<13) + 1, ((~0u)<<14) + 1, ((~0u)<<15) + 1 };
 812
 813 // The logical AND's in this macro are to shut up static code analysis (aren't really necessary - couldn't find another way to do this)
 814 #define JPGD_HUFF_EXTEND(x, s) (((x) < s_extend_test[s & 15]) ? ((x) + s_extend_offset[s & 15]) : (x))
 815
 816
 817 // Clamps a value between 0-255.
 818 inline uint8_t jpeg_decoder::clamp(int i)
 819 {
 820     if (static_cast<uint32_t>(i) > 255) i = (((~i) >> 31) & 0xFF);
 821     return static_cast<uint8_t>(i);
 822 }
 823
 824
 825 namespace DCT_Upsample
 826 {
 827     struct Matrix44
 828     {
 829         typedef int Element_Type;
 830         enum { NUM_ROWS = 4, NUM_COLS = 4 };
 831
 832         Element_Type v[NUM_ROWS][NUM_COLS];
 833
 834         inline int rows() const { return NUM_ROWS; }
 835         inline int cols() const { return NUM_COLS; }
 836         inline const Element_Type & at(int r, int c) const { return v[r][c]; }
 837         inline       Element_Type & at(int r, int c)       { return v[r][c]; }
 838
 839         inline Matrix44() {}
 840
 841         inline Matrix44& operator += (const Matrix44& a)
 842         {
 843             for (int r = 0; r < NUM_ROWS; r++) {
 844                 at(r, 0) += a.at(r, 0);
 845                 at(r, 1) += a.at(r, 1);
 846                 at(r, 2) += a.at(r, 2);
 847                 at(r, 3) += a.at(r, 3);
 848             }
 849             return *this;
 850         }
 851
 852         inline Matrix44& operator -= (const Matrix44& a)
 853         {
 854             for (int r = 0; r < NUM_ROWS; r++) {
 855                 at(r, 0) -= a.at(r, 0);
 856                 at(r, 1) -= a.at(r, 1);
 857                 at(r, 2) -= a.at(r, 2);
 858                 at(r, 3) -= a.at(r, 3);
 859             }
 860             return *this;
 861         }
 862
 863         friend inline Matrix44 operator + (const Matrix44& a, const Matrix44& b)
 864         {
 865             Matrix44 ret;
 866             for (int r = 0; r < NUM_ROWS; r++) {
 867                 ret.at(r, 0) = a.at(r, 0) + b.at(r, 0);
 868                 ret.at(r, 1) = a.at(r, 1) + b.at(r, 1);
 869                 ret.at(r, 2) = a.at(r, 2) + b.at(r, 2);
 870                 ret.at(r, 3) = a.at(r, 3) + b.at(r, 3);
 871             }
 872             return ret;
 873         }
 874
 875         friend inline Matrix44 operator - (const Matrix44& a, const Matrix44& b)
 876         {
 877             Matrix44 ret;
 878             for (int r = 0; r < NUM_ROWS; r++) {
 879                 ret.at(r, 0) = a.at(r, 0) - b.at(r, 0);
 880                 ret.at(r, 1) = a.at(r, 1) - b.at(r, 1);
 881                 ret.at(r, 2) = a.at(r, 2) - b.at(r, 2);
 882                 ret.at(r, 3) = a.at(r, 3) - b.at(r, 3);
 883             }
 884             return ret;
 885         }
 886
 887         static inline void add_and_store(jpgd_block_t* pDst, const Matrix44& a, const Matrix44& b)
 888         {
 889             for (int r = 0; r < 4; r++) {
 890                 pDst[0*8 + r] = static_cast<jpgd_block_t>(a.at(r, 0) + b.at(r, 0));
 891                 pDst[1*8 + r] = static_cast<jpgd_block_t>(a.at(r, 1) + b.at(r, 1));
 892                 pDst[2*8 + r] = static_cast<jpgd_block_t>(a.at(r, 2) + b.at(r, 2));
 893                 pDst[3*8 + r] = static_cast<jpgd_block_t>(a.at(r, 3) + b.at(r, 3));
 894             }
 895         }
 896
 897         static inline void sub_and_store(jpgd_block_t* pDst, const Matrix44& a, const Matrix44& b)
 898         {
 899             for (int r = 0; r < 4; r++) {
 900                 pDst[0*8 + r] = static_cast<jpgd_block_t>(a.at(r, 0) - b.at(r, 0));
 901                 pDst[1*8 + r] = static_cast<jpgd_block_t>(a.at(r, 1) - b.at(r, 1));
 902                 pDst[2*8 + r] = static_cast<jpgd_block_t>(a.at(r, 2) - b.at(r, 2));
 903                 pDst[3*8 + r] = static_cast<jpgd_block_t>(a.at(r, 3) - b.at(r, 3));
 904             }
 905         }
 906     };
 907
 908     const int FRACT_BITS = 10;
 909     const int SCALE = 1 << FRACT_BITS;
 910
 911     typedef int Temp_Type;
 912     #define D(i) (((i) + (SCALE >> 1)) >> FRACT_BITS)
 913     #define F(i) ((int)((i) * SCALE + .5f))
 914
 915     // Any decent C++ compiler will optimize this at compile time to a 0, or an array access.
 916     #define AT(c, r) ((((c)>=NUM_COLS)||((r)>=NUM_ROWS)) ? 0 : pSrc[(c)+(r)*8])
 917
 918     // NUM_ROWS/NUM_COLS = # of non-zero rows/cols in input matrix
 919     template<int NUM_ROWS, int NUM_COLS>
 920     struct P_Q
 921     {
 922         static void calc(Matrix44& P, Matrix44& Q, const jpgd_block_t* pSrc)
 923         {
 924             // 4x8 = 4x8 times 8x8, matrix 0 is constant
 925             const Temp_Type X000 = AT(0, 0);
 926             const Temp_Type X001 = AT(0, 1);
 927             const Temp_Type X002 = AT(0, 2);
 928             const Temp_Type X003 = AT(0, 3);
 929             const Temp_Type X004 = AT(0, 4);
 930             const Temp_Type X005 = AT(0, 5);
 931             const Temp_Type X006 = AT(0, 6);
 932             const Temp_Type X007 = AT(0, 7);
 933             const Temp_Type X010 = D(F(0.415735f) * AT(1, 0) + F(0.791065f) * AT(3, 0) + F(-0.352443f) * AT(5, 0) + F(0.277785f) * AT(7, 0));
 934             const Temp_Type X011 = D(F(0.415735f) * AT(1, 1) + F(0.791065f) * AT(3, 1) + F(-0.352443f) * AT(5, 1) + F(0.277785f) * AT(7, 1));
 935             const Temp_Type X012 = D(F(0.415735f) * AT(1, 2) + F(0.791065f) * AT(3, 2) + F(-0.352443f) * AT(5, 2) + F(0.277785f) * AT(7, 2));
 936             const Temp_Type X013 = D(F(0.415735f) * AT(1, 3) + F(0.791065f) * AT(3, 3) + F(-0.352443f) * AT(5, 3) + F(0.277785f) * AT(7, 3));
 937             const Temp_Type X014 = D(F(0.415735f) * AT(1, 4) + F(0.791065f) * AT(3, 4) + F(-0.352443f) * AT(5, 4) + F(0.277785f) * AT(7, 4));
 938             const Temp_Type X015 = D(F(0.415735f) * AT(1, 5) + F(0.791065f) * AT(3, 5) + F(-0.352443f) * AT(5, 5) + F(0.277785f) * AT(7, 5));
 939             const Temp_Type X016 = D(F(0.415735f) * AT(1, 6) + F(0.791065f) * AT(3, 6) + F(-0.352443f) * AT(5, 6) + F(0.277785f) * AT(7, 6));
 940             const Temp_Type X017 = D(F(0.415735f) * AT(1, 7) + F(0.791065f) * AT(3, 7) + F(-0.352443f) * AT(5, 7) + F(0.277785f) * AT(7, 7));
 941             const Temp_Type X020 = AT(4, 0);
 942             const Temp_Type X021 = AT(4, 1);
 943             const Temp_Type X022 = AT(4, 2);
 944             const Temp_Type X023 = AT(4, 3);
 945             const Temp_Type X024 = AT(4, 4);
 946             const Temp_Type X025 = AT(4, 5);
 947             const Temp_Type X026 = AT(4, 6);
 948             const Temp_Type X027 = AT(4, 7);
 949             const Temp_Type X030 = D(F(0.022887f) * AT(1, 0) + F(-0.097545f) * AT(3, 0) + F(0.490393f) * AT(5, 0) + F(0.865723f) * AT(7, 0));
 950             const Temp_Type X031 = D(F(0.022887f) * AT(1, 1) + F(-0.097545f) * AT(3, 1) + F(0.490393f) * AT(5, 1) + F(0.865723f) * AT(7, 1));
 951             const Temp_Type X032 = D(F(0.022887f) * AT(1, 2) + F(-0.097545f) * AT(3, 2) + F(0.490393f) * AT(5, 2) + F(0.865723f) * AT(7, 2));
 952             const Temp_Type X033 = D(F(0.022887f) * AT(1, 3) + F(-0.097545f) * AT(3, 3) + F(0.490393f) * AT(5, 3) + F(0.865723f) * AT(7, 3));
 953             const Temp_Type X034 = D(F(0.022887f) * AT(1, 4) + F(-0.097545f) * AT(3, 4) + F(0.490393f) * AT(5, 4) + F(0.865723f) * AT(7, 4));
 954             const Temp_Type X035 = D(F(0.022887f) * AT(1, 5) + F(-0.097545f) * AT(3, 5) + F(0.490393f) * AT(5, 5) + F(0.865723f) * AT(7, 5));
 955             const Temp_Type X036 = D(F(0.022887f) * AT(1, 6) + F(-0.097545f) * AT(3, 6) + F(0.490393f) * AT(5, 6) + F(0.865723f) * AT(7, 6));
 956             const Temp_Type X037 = D(F(0.022887f) * AT(1, 7) + F(-0.097545f) * AT(3, 7) + F(0.490393f) * AT(5, 7) + F(0.865723f) * AT(7, 7));
 957
 958             // 4x4 = 4x8 times 8x4, matrix 1 is constant
 959             P.at(0, 0) = X000;
 960             P.at(0, 1) = D(X001 * F(0.415735f) + X003 * F(0.791065f) + X005 * F(-0.352443f) + X007 * F(0.277785f));
 961             P.at(0, 2) = X004;
 962             P.at(0, 3) = D(X001 * F(0.022887f) + X003 * F(-0.097545f) + X005 * F(0.490393f) + X007 * F(0.865723f));
 963             P.at(1, 0) = X010;
 964             P.at(1, 1) = D(X011 * F(0.415735f) + X013 * F(0.791065f) + X015 * F(-0.352443f) + X017 * F(0.277785f));
 965             P.at(1, 2) = X014;
 966             P.at(1, 3) = D(X011 * F(0.022887f) + X013 * F(-0.097545f) + X015 * F(0.490393f) + X017 * F(0.865723f));
 967             P.at(2, 0) = X020;
 968             P.at(2, 1) = D(X021 * F(0.415735f) + X023 * F(0.791065f) + X025 * F(-0.352443f) + X027 * F(0.277785f));
 969             P.at(2, 2) = X024;
 970             P.at(2, 3) = D(X021 * F(0.022887f) + X023 * F(-0.097545f) + X025 * F(0.490393f) + X027 * F(0.865723f));
 971             P.at(3, 0) = X030;
 972             P.at(3, 1) = D(X031 * F(0.415735f) + X033 * F(0.791065f) + X035 * F(-0.352443f) + X037 * F(0.277785f));
 973             P.at(3, 2) = X034;
 974             P.at(3, 3) = D(X031 * F(0.022887f) + X033 * F(-0.097545f) + X035 * F(0.490393f) + X037 * F(0.865723f));
 975             // 40 muls 24 adds
 976
 977             // 4x4 = 4x8 times 8x4, matrix 1 is constant
 978             Q.at(0, 0) = D(X001 * F(0.906127f) + X003 * F(-0.318190f) + X005 * F(0.212608f) + X007 * F(-0.180240f));
 979             Q.at(0, 1) = X002;
 980             Q.at(0, 2) = D(X001 * F(-0.074658f) + X003 * F(0.513280f) + X005 * F(0.768178f) + X007 * F(-0.375330f));
 981             Q.at(0, 3) = X006;
 982             Q.at(1, 0) = D(X011 * F(0.906127f) + X013 * F(-0.318190f) + X015 * F(0.212608f) + X017 * F(-0.180240f));
 983             Q.at(1, 1) = X012;
 984             Q.at(1, 2) = D(X011 * F(-0.074658f) + X013 * F(0.513280f) + X015 * F(0.768178f) + X017 * F(-0.375330f));
 985             Q.at(1, 3) = X016;
 986             Q.at(2, 0) = D(X021 * F(0.906127f) + X023 * F(-0.318190f) + X025 * F(0.212608f) + X027 * F(-0.180240f));
 987             Q.at(2, 1) = X022;
 988             Q.at(2, 2) = D(X021 * F(-0.074658f) + X023 * F(0.513280f) + X025 * F(0.768178f) + X027 * F(-0.375330f));
 989             Q.at(2, 3) = X026;
 990             Q.at(3, 0) = D(X031 * F(0.906127f) + X033 * F(-0.318190f) + X035 * F(0.212608f) + X037 * F(-0.180240f));
 991             Q.at(3, 1) = X032;
 992             Q.at(3, 2) = D(X031 * F(-0.074658f) + X033 * F(0.513280f) + X035 * F(0.768178f) + X037 * F(-0.375330f));
 993             Q.at(3, 3) = X036;
 994             // 40 muls 24 adds
 995         }
 996     };
 997
 998
 999     template<int NUM_ROWS, int NUM_COLS>
1000     struct R_S
1001     {
1002         static void calc(Matrix44& R, Matrix44& S, const jpgd_block_t* pSrc)
1003         {
1004             // 4x8 = 4x8 times 8x8, matrix 0 is constant
1005             const Temp_Type X100 = D(F(0.906127f) * AT(1, 0) + F(-0.318190f) * AT(3, 0) + F(0.212608f) * AT(5, 0) + F(-0.180240f) * AT(7, 0));
1006             const Temp_Type X101 = D(F(0.906127f) * AT(1, 1) + F(-0.318190f) * AT(3, 1) + F(0.212608f) * AT(5, 1) + F(-0.180240f) * AT(7, 1));
1007             const Temp_Type X102 = D(F(0.906127f) * AT(1, 2) + F(-0.318190f) * AT(3, 2) + F(0.212608f) * AT(5, 2) + F(-0.180240f) * AT(7, 2));
1008             const Temp_Type X103 = D(F(0.906127f) * AT(1, 3) + F(-0.318190f) * AT(3, 3) + F(0.212608f) * AT(5, 3) + F(-0.180240f) * AT(7, 3));
1009             const Temp_Type X104 = D(F(0.906127f) * AT(1, 4) + F(-0.318190f) * AT(3, 4) + F(0.212608f) * AT(5, 4) + F(-0.180240f) * AT(7, 4));
1010             const Temp_Type X105 = D(F(0.906127f) * AT(1, 5) + F(-0.318190f) * AT(3, 5) + F(0.212608f) * AT(5, 5) + F(-0.180240f) * AT(7, 5));
1011             const Temp_Type X106 = D(F(0.906127f) * AT(1, 6) + F(-0.318190f) * AT(3, 6) + F(0.212608f) * AT(5, 6) + F(-0.180240f) * AT(7, 6));
1012             const Temp_Type X107 = D(F(0.906127f) * AT(1, 7) + F(-0.318190f) * AT(3, 7) + F(0.212608f) * AT(5, 7) + F(-0.180240f) * AT(7, 7));
1013             const Temp_Type X110 = AT(2, 0);
1014             const Temp_Type X111 = AT(2, 1);
1015             const Temp_Type X112 = AT(2, 2);
1016             const Temp_Type X113 = AT(2, 3);
1017             const Temp_Type X114 = AT(2, 4);
1018             const Temp_Type X115 = AT(2, 5);
1019             const Temp_Type X116 = AT(2, 6);
1020             const Temp_Type X117 = AT(2, 7);
1021             const Temp_Type X120 = D(F(-0.074658f) * AT(1, 0) + F(0.513280f) * AT(3, 0) + F(0.768178f) * AT(5, 0) + F(-0.375330f) * AT(7, 0));
1022             const Temp_Type X121 = D(F(-0.074658f) * AT(1, 1) + F(0.513280f) * AT(3, 1) + F(0.768178f) * AT(5, 1) + F(-0.375330f) * AT(7, 1));
1023             const Temp_Type X122 = D(F(-0.074658f) * AT(1, 2) + F(0.513280f) * AT(3, 2) + F(0.768178f) * AT(5, 2) + F(-0.375330f) * AT(7, 2));
1024             const Temp_Type X123 = D(F(-0.074658f) * AT(1, 3) + F(0.513280f) * AT(3, 3) + F(0.768178f) * AT(5, 3) + F(-0.375330f) * AT(7, 3));
1025             const Temp_Type X124 = D(F(-0.074658f) * AT(1, 4) + F(0.513280f) * AT(3, 4) + F(0.768178f) * AT(5, 4) + F(-0.375330f) * AT(7, 4));
1026             const Temp_Type X125 = D(F(-0.074658f) * AT(1, 5) + F(0.513280f) * AT(3, 5) + F(0.768178f) * AT(5, 5) + F(-0.375330f) * AT(7, 5));
1027             const Temp_Type X126 = D(F(-0.074658f) * AT(1, 6) + F(0.513280f) * AT(3, 6) + F(0.768178f) * AT(5, 6) + F(-0.375330f) * AT(7, 6));
1028             const Temp_Type X127 = D(F(-0.074658f) * AT(1, 7) + F(0.513280f) * AT(3, 7) + F(0.768178f) * AT(5, 7) + F(-0.375330f) * AT(7, 7));
1029             const Temp_Type X130 = AT(6, 0);
1030             const Temp_Type X131 = AT(6, 1);
1031             const Temp_Type X132 = AT(6, 2);
1032             const Temp_Type X133 = AT(6, 3);
1033             const Temp_Type X134 = AT(6, 4);
1034             const Temp_Type X135 = AT(6, 5);
1035             const Temp_Type X136 = AT(6, 6);
1036             const Temp_Type X137 = AT(6, 7);
1037             // 80 muls 48 adds
1038
1039             // 4x4 = 4x8 times 8x4, matrix 1 is constant
1040             R.at(0, 0) = X100;
1041             R.at(0, 1) = D(X101 * F(0.415735f) + X103 * F(0.791065f) + X105 * F(-0.352443f) + X107 * F(0.277785f));
1042             R.at(0, 2) = X104;
1043             R.at(0, 3) = D(X101 * F(0.022887f) + X103 * F(-0.097545f) + X105 * F(0.490393f) + X107 * F(0.865723f));
1044             R.at(1, 0) = X110;
1045             R.at(1, 1) = D(X111 * F(0.415735f) + X113 * F(0.791065f) + X115 * F(-0.352443f) + X117 * F(0.277785f));
1046             R.at(1, 2) = X114;
1047             R.at(1, 3) = D(X111 * F(0.022887f) + X113 * F(-0.097545f) + X115 * F(0.490393f) + X117 * F(0.865723f));
1048             R.at(2, 0) = X120;
1049             R.at(2, 1) = D(X121 * F(0.415735f) + X123 * F(0.791065f) + X125 * F(-0.352443f) + X127 * F(0.277785f));
1050             R.at(2, 2) = X124;
1051             R.at(2, 3) = D(X121 * F(0.022887f) + X123 * F(-0.097545f) + X125 * F(0.490393f) + X127 * F(0.865723f));
1052             R.at(3, 0) = X130;
1053             R.at(3, 1) = D(X131 * F(0.415735f) + X133 * F(0.791065f) + X135 * F(-0.352443f) + X137 * F(0.277785f));
1054             R.at(3, 2) = X134;
1055             R.at(3, 3) = D(X131 * F(0.022887f) + X133 * F(-0.097545f) + X135 * F(0.490393f) + X137 * F(0.865723f));
1056             // 40 muls 24 adds
1057             // 4x4 = 4x8 times 8x4, matrix 1 is constant
1058             S.at(0, 0) = D(X101 * F(0.906127f) + X103 * F(-0.318190f) + X105 * F(0.212608f) + X107 * F(-0.180240f));
1059             S.at(0, 1) = X102;
1060             S.at(0, 2) = D(X101 * F(-0.074658f) + X103 * F(0.513280f) + X105 * F(0.768178f) + X107 * F(-0.375330f));
1061             S.at(0, 3) = X106;
1062             S.at(1, 0) = D(X111 * F(0.906127f) + X113 * F(-0.318190f) + X115 * F(0.212608f) + X117 * F(-0.180240f));
1063             S.at(1, 1) = X112;
1064             S.at(1, 2) = D(X111 * F(-0.074658f) + X113 * F(0.513280f) + X115 * F(0.768178f) + X117 * F(-0.375330f));
1065             S.at(1, 3) = X116;
1066             S.at(2, 0) = D(X121 * F(0.906127f) + X123 * F(-0.318190f) + X125 * F(0.212608f) + X127 * F(-0.180240f));
1067             S.at(2, 1) = X122;
1068             S.at(2, 2) = D(X121 * F(-0.074658f) + X123 * F(0.513280f) + X125 * F(0.768178f) + X127 * F(-0.375330f));
1069             S.at(2, 3) = X126;
1070             S.at(3, 0) = D(X131 * F(0.906127f) + X133 * F(-0.318190f) + X135 * F(0.212608f) + X137 * F(-0.180240f));
1071             S.at(3, 1) = X132;
1072             S.at(3, 2) = D(X131 * F(-0.074658f) + X133 * F(0.513280f) + X135 * F(0.768178f) + X137 * F(-0.375330f));
1073             S.at(3, 3) = X136;
1074             // 40 muls 24 adds
1075         }
1076     };
1077 } // end namespace DCT_Upsample
1078
1079
1080 // Unconditionally frees all allocated m_blocks.
1081 void jpeg_decoder::free_all_blocks()
1082 {
1083     delete(m_pStream);
1084     m_pStream = nullptr;
1085
1086     for (mem_block *b = m_pMem_blocks; b; ) {
1087         mem_block *n = b->m_pNext;
1088         free(b);
1089         b = n;
1090     }
1091     m_pMem_blocks = nullptr;
1092 }
1093
1094
1095 // This method handles all errors. It will never return.
1096 // It could easily be changed to use C++ exceptions.
1097 JPGD_NORETURN void jpeg_decoder::stop_decoding(jpgd_status status)
1098 {
1099     m_error_code = status;
1100     free_all_blocks();
1101     longjmp(m_jmp_state, status);
1102 }
1103
1104
1105 void *jpeg_decoder::alloc(size_t nSize, bool zero)
1106 {
1107     nSize = (JPGD_MAX(nSize, 1) + 3) & ~3;
1108     char *rv = nullptr;
1109     for (mem_block *b = m_pMem_blocks; b; b = b->m_pNext) {
1110         if ((b->m_used_count + nSize) <= b->m_size) {
1111             rv = b->m_data + b->m_used_count;
1112             b->m_used_count += nSize;
1113             break;
1114         }
1115     }
1116     if (!rv) {
1117         int capacity = JPGD_MAX(32768 - 256, (nSize + 2047) & ~2047);
1118         mem_block *b = (mem_block*)malloc(sizeof(mem_block) + capacity);
1119         if (!b) stop_decoding(JPGD_NOTENOUGHMEM);
1120         b->m_pNext = m_pMem_blocks; m_pMem_blocks = b;
1121         b->m_used_count = nSize;
1122         b->m_size = capacity;
1123         rv = b->m_data;
1124     }
1125     if (zero) memset(rv, 0, nSize);
1126     return rv;
1127 }
1128
1129
1130 void jpeg_decoder::word_clear(void *p, uint16_t c, uint32_t n)
1131 {
1132     uint8_t *pD = (uint8_t*)p;
1133     const uint8_t l = c & 0xFF, h = (c >> 8) & 0xFF;
1134     while (n) {
1135         pD[0] = l; pD[1] = h; pD += 2;
1136         n--;
1137     }
1138 }
1139
1140
1141 // Refill the input buffer.
1142 // This method will sit in a loop until (A) the buffer is full or (B)
1143 // the stream's read() method reports and end of file condition.
1144 void jpeg_decoder::prep_in_buffer()
1145 {
1146     m_in_buf_left = 0;
1147     m_pIn_buf_ofs = m_in_buf;
1148
1149     if (m_eof_flag) return;
1150
1151     do {
1152         int bytes_read = m_pStream->read(m_in_buf + m_in_buf_left, JPGD_IN_BUF_SIZE - m_in_buf_left, &m_eof_flag);
1153         if (bytes_read == -1) stop_decoding(JPGD_STREAM_READ);
1154         m_in_buf_left += bytes_read;
1155     } while ((m_in_buf_left < JPGD_IN_BUF_SIZE) && (!m_eof_flag));
1156
1157     m_total_bytes_read += m_in_buf_left;
1158
1159     // Pad the end of the block with M_EOI (prevents the decompressor from going off the rails if the stream is invalid).
1160     // (This dates way back to when this decompressor was written in C/asm, and the all-asm Huffman decoder did some fancy things to increase perf.)
1161     word_clear(m_pIn_buf_ofs + m_in_buf_left, 0xD9FF, 64);
1162 }
1163
1164
1165 // Read a Huffman code table.
1166 void jpeg_decoder::read_dht_marker()
1167 {
1168     int i, index, count;
1169     uint8_t huff_num[17];
1170     uint8_t huff_val[256];
1171     uint32_t num_left = get_bits(16);
1172
1173     if (num_left < 2) stop_decoding(JPGD_BAD_DHT_MARKER);
1174     num_left -= 2;
1175
1176     while (num_left) {
1177         index = get_bits(8);
1178         huff_num[0] = 0;
1179         count = 0;
1180
1181         for (i = 1; i <= 16; i++) {
1182             huff_num[i] = static_cast<uint8_t>(get_bits(8));
1183             count += huff_num[i];
1184         }
1185
1186         if (count > 255) stop_decoding(JPGD_BAD_DHT_COUNTS);
1187
1188         for (i = 0; i < count; i++)
1189             huff_val[i] = static_cast<uint8_t>(get_bits(8));
1190
1191         i = 1 + 16 + count;
1192
1193         if (num_left < (uint32_t)i) stop_decoding(JPGD_BAD_DHT_MARKER);
1194         num_left -= i;
1195
1196         if ((index & 0x10) > 0x10) stop_decoding(JPGD_BAD_DHT_INDEX);
1197         index = (index & 0x0F) + ((index & 0x10) >> 4) * (JPGD_MAX_HUFF_TABLES >> 1);
1198         if (index >= JPGD_MAX_HUFF_TABLES) stop_decoding(JPGD_BAD_DHT_INDEX);
1199
1200         if (!m_huff_num[index]) m_huff_num[index] = (uint8_t *)alloc(17);
1201         if (!m_huff_val[index]) m_huff_val[index] = (uint8_t *)alloc(256);
1202
1203         m_huff_ac[index] = (index & 0x10) != 0;
1204         memcpy(m_huff_num[index], huff_num, 17);
1205         memcpy(m_huff_val[index], huff_val, 256);
1206     }
1207 }
1208
1209
1210 // Read a quantization table.
1211 void jpeg_decoder::read_dqt_marker()
1212 {
1213     int n, i, prec;
1214     uint32_t temp;
1215     uint32_t num_left = get_bits(16);
1216     if (num_left < 2) stop_decoding(JPGD_BAD_DQT_MARKER);
1217     num_left -= 2;
1218
1219     while (num_left) {
1220         n = get_bits(8);
1221         prec = n >> 4;
1222         n &= 0x0F;
1223
1224         if (n >= JPGD_MAX_QUANT_TABLES) stop_decoding(JPGD_BAD_DQT_TABLE);
1225
1226         if (!m_quant[n]) m_quant[n] = (jpgd_quant_t *)alloc(64 * sizeof(jpgd_quant_t));
1227
1228         // read quantization entries, in zag order
1229         for (i = 0; i < 64; i++) {
1230             temp = get_bits(8);
1231             if (prec) temp = (temp << 8) + get_bits(8);
1232             m_quant[n][i] = static_cast<jpgd_quant_t>(temp);
1233         }
1234         i = 64 + 1;
1235         if (prec) i += 64;
1236         if (num_left < (uint32_t)i) stop_decoding(JPGD_BAD_DQT_LENGTH);
1237         num_left -= i;
1238     }
1239 }
1240
1241
1242 // Read the start of frame (SOF) marker.
1243 void jpeg_decoder::read_sof_marker()
1244 {
1245     int i;
1246     uint32_t num_left = get_bits(16);
1247
1248     if (get_bits(8) != 8) stop_decoding(JPGD_BAD_PRECISION);   /* precision: sorry, only 8-bit precision is supported right now */
1249
1250     m_image_y_size = get_bits(16);
1251     if ((m_image_y_size < 1) || (m_image_y_size > JPGD_MAX_HEIGHT)) stop_decoding(JPGD_BAD_HEIGHT);
1252
1253     m_image_x_size = get_bits(16);
1254     if ((m_image_x_size < 1) || (m_image_x_size > JPGD_MAX_WIDTH)) stop_decoding(JPGD_BAD_WIDTH);
1255
1256     m_comps_in_frame = get_bits(8);
1257     if (m_comps_in_frame > JPGD_MAX_COMPONENTS) stop_decoding(JPGD_TOO_MANY_COMPONENTS);
1258
1259     if (num_left != (uint32_t)(m_comps_in_frame * 3 + 8)) stop_decoding(JPGD_BAD_SOF_LENGTH);
1260
1261     for (i = 0; i < m_comps_in_frame; i++) {
1262         m_comp_ident[i]  = get_bits(8);
1263         m_comp_h_samp[i] = get_bits(4);
1264         m_comp_v_samp[i] = get_bits(4);
1265         m_comp_quant[i]  = get_bits(8);
1266     }
1267 }
1268
1269
1270 // Used to skip unrecognized markers.
1271 void jpeg_decoder::skip_variable_marker()
1272 {
1273     uint32_t num_left = get_bits(16);
1274     if (num_left < 2) stop_decoding(JPGD_BAD_VARIABLE_MARKER);
1275     num_left -= 2;
1276
1277     while (num_left) {
1278         get_bits(8);
1279         num_left--;
1280     }
1281 }
1282
1283
1284 // Read a define restart interval (DRI) marker.
1285 void jpeg_decoder::read_dri_marker()
1286 {
1287     if (get_bits(16) != 4) stop_decoding(JPGD_BAD_DRI_LENGTH);
1288     m_restart_interval = get_bits(16);
1289 }
1290
1291
1292 // Read a start of scan (SOS) marker.
1293 void jpeg_decoder::read_sos_marker()
1294 {
1295     int i, ci, c, cc;
1296     uint32_t num_left = get_bits(16);
1297     int n = get_bits(8);
1298
1299     m_comps_in_scan = n;
1300     num_left -= 3;
1301
1302     if ( (num_left != (uint32_t)(n * 2 + 3)) || (n < 1) || (n > JPGD_MAX_COMPS_IN_SCAN) ) stop_decoding(JPGD_BAD_SOS_LENGTH);
1303
1304     for (i = 0; i < n; i++) {
1305         cc = get_bits(8);
1306         c = get_bits(8);
1307         num_left -= 2;
1308
1309         for (ci = 0; ci < m_comps_in_frame; ci++)
1310           if (cc == m_comp_ident[ci]) break;
1311
1312         if (ci >= m_comps_in_frame) stop_decoding(JPGD_BAD_SOS_COMP_ID);
1313
1314         m_comp_list[i]    = ci;
1315         m_comp_dc_tab[ci] = (c >> 4) & 15;
1316         m_comp_ac_tab[ci] = (c & 15) + (JPGD_MAX_HUFF_TABLES >> 1);
1317     }
1318     m_spectral_start  = get_bits(8);
1319     m_spectral_end    = get_bits(8);
1320     m_successive_high = get_bits(4);
1321     m_successive_low  = get_bits(4);
1322
1323     if (!m_progressive_flag) {
1324         m_spectral_start = 0;
1325         m_spectral_end = 63;
1326     }
1327     num_left -= 3;
1328
1329     while (num_left) {    /* read past whatever is num_left */
1330         get_bits(8);
1331         num_left--;
1332     }
1333 }
1334
1335
1336 // Finds the next marker.
1337 int jpeg_decoder::next_marker()
1338 {
1339     uint32_t c, bytes = 0;
1340
1341     do {
1342         do {
1343             bytes++;
1344             c = get_bits(8);
1345         } while (c != 0xFF);
1346
1347         do {
1348             c = get_bits(8);
1349         } while (c == 0xFF);
1350     } while (c == 0);
1351
1352     // If bytes > 0 here, there where extra bytes before the marker (not good).
1353     return c;
1354 }
1355
1356
1357 // Process markers. Returns when an SOFx, SOI, EOI, or SOS marker is
1358 // encountered.
1359 int jpeg_decoder::process_markers()
1360 {
1361     int c;
1362
1363     for ( ; ; ) {
1364         c = next_marker();
1365         switch (c) {
1366             case M_SOF0:
1367             case M_SOF1:
1368             case M_SOF2:
1369             case M_SOF3:
1370             case M_SOF5:
1371             case M_SOF6:
1372             case M_SOF7:
1373       //      case M_JPG:
1374             case M_SOF9:
1375             case M_SOF10:
1376             case M_SOF11:
1377             case M_SOF13:
1378             case M_SOF14:
1379             case M_SOF15:
1380             case M_SOI:
1381             case M_EOI:
1382             case M_SOS: return c;
1383             case M_DHT: {
1384                 read_dht_marker();
1385                 break;
1386             }
1387             // No arithmitic support - dumb patents!
1388             case M_DAC: {
1389                 stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1390                 break;
1391             }
1392             case M_DQT: {
1393                 read_dqt_marker();
1394                 break;
1395             }
1396             case M_DRI: {
1397                 read_dri_marker();
1398                 break;
1399             }
1400             //case M_APP0:  /* no need to read the JFIF marker */
1401             case M_JPG:
1402             case M_RST0:    /* no parameters */
1403             case M_RST1:
1404             case M_RST2:
1405             case M_RST3:
1406             case M_RST4:
1407             case M_RST5:
1408             case M_RST6:
1409             case M_RST7:
1410             case M_TEM: {
1411                 stop_decoding(JPGD_UNEXPECTED_MARKER);
1412                 break;
1413             }
1414             default: {   /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */
1415                 skip_variable_marker();
1416                 break;
1417             }
1418         }
1419     }
1420 }
1421
1422
1423 // Finds the start of image (SOI) marker.
1424 // This code is rather defensive: it only checks the first 512 bytes to avoid
1425 // false positives.
1426 void jpeg_decoder::locate_soi_marker()
1427 {
1428     uint32_t lastchar = get_bits(8);
1429     uint32_t thischar = get_bits(8);
1430
1431     /* ok if it's a normal JPEG file without a special header */
1432     if ((lastchar == 0xFF) && (thischar == M_SOI)) return;
1433
1434     uint32_t bytesleft = 4096; //512;
1435
1436     while (true) {
1437         if (--bytesleft == 0) stop_decoding(JPGD_NOT_JPEG);
1438
1439         lastchar = thischar;
1440         thischar = get_bits(8);
1441
1442         if (lastchar == 0xFF) {
1443           if (thischar == M_SOI) break;
1444           else if (thischar == M_EOI) stop_decoding(JPGD_NOT_JPEG); // get_bits will keep returning M_EOI if we read past the end
1445         }
1446     }
1447
1448     // Check the next character after marker: if it's not 0xFF, it can't be the start of the next marker, so the file is bad.
1449     thischar = (m_bit_buf >> 24) & 0xFF;
1450     if (thischar != 0xFF) stop_decoding(JPGD_NOT_JPEG);
1451 }
1452
1453
1454 // Find a start of frame (SOF) marker.
1455 void jpeg_decoder::locate_sof_marker()
1456 {
1457     locate_soi_marker();
1458     int c = process_markers();
1459
1460     switch (c) {
1461         case M_SOF2: m_progressive_flag = true;
1462         case M_SOF0:  /* baseline DCT */
1463         case M_SOF1: { /* extended sequential DCT */
1464           read_sof_marker();
1465           break;
1466         }
1467         case M_SOF9: {  /* Arithmitic coding */
1468           stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1469           break;
1470         }
1471         default: {
1472           stop_decoding(JPGD_UNSUPPORTED_MARKER);
1473           break;
1474         }
1475     }
1476 }
1477
1478
1479 // Find a start of scan (SOS) marker.
1480 int jpeg_decoder::locate_sos_marker()
1481 {
1482     int c = process_markers();
1483     if (c == M_EOI) return false;
1484     else if (c != M_SOS) stop_decoding(JPGD_UNEXPECTED_MARKER);
1485     read_sos_marker();
1486     return true;
1487 }
1488
1489
1490 // Reset everything to default/uninitialized state.
1491 void jpeg_decoder::init(jpeg_decoder_stream *pStream)
1492 {
1493     m_pMem_blocks = nullptr;
1494     m_error_code = JPGD_SUCCESS;
1495     m_ready_flag = false;
1496     m_image_x_size = m_image_y_size = 0;
1497     m_pStream = pStream;
1498     m_progressive_flag = false;
1499
1500     memset(m_huff_ac, 0, sizeof(m_huff_ac));
1501     memset(m_huff_num, 0, sizeof(m_huff_num));
1502     memset(m_huff_val, 0, sizeof(m_huff_val));
1503     memset(m_quant, 0, sizeof(m_quant));
1504
1505     m_scan_type = 0;
1506     m_comps_in_frame = 0;
1507
1508     memset(m_comp_h_samp, 0, sizeof(m_comp_h_samp));
1509     memset(m_comp_v_samp, 0, sizeof(m_comp_v_samp));
1510     memset(m_comp_quant, 0, sizeof(m_comp_quant));
1511     memset(m_comp_ident, 0, sizeof(m_comp_ident));
1512     memset(m_comp_h_blocks, 0, sizeof(m_comp_h_blocks));
1513     memset(m_comp_v_blocks, 0, sizeof(m_comp_v_blocks));
1514
1515     m_comps_in_scan = 0;
1516     memset(m_comp_list, 0, sizeof(m_comp_list));
1517     memset(m_comp_dc_tab, 0, sizeof(m_comp_dc_tab));
1518     memset(m_comp_ac_tab, 0, sizeof(m_comp_ac_tab));
1519
1520     m_spectral_start = 0;
1521     m_spectral_end = 0;
1522     m_successive_low = 0;
1523     m_successive_high = 0;
1524     m_max_mcu_x_size = 0;
1525     m_max_mcu_y_size = 0;
1526     m_blocks_per_mcu = 0;
1527     m_max_blocks_per_row = 0;
1528     m_mcus_per_row = 0;
1529     m_mcus_per_col = 0;
1530     m_expanded_blocks_per_component = 0;
1531     m_expanded_blocks_per_mcu = 0;
1532     m_expanded_blocks_per_row = 0;
1533     m_freq_domain_chroma_upsample = false;
1534
1535     memset(m_mcu_org, 0, sizeof(m_mcu_org));
1536
1537     m_total_lines_left = 0;
1538     m_mcu_lines_left = 0;
1539     m_real_dest_bytes_per_scan_line = 0;
1540     m_dest_bytes_per_scan_line = 0;
1541     m_dest_bytes_per_pixel = 0;
1542
1543     memset(m_pHuff_tabs, 0, sizeof(m_pHuff_tabs));
1544
1545     memset(m_dc_coeffs, 0, sizeof(m_dc_coeffs));
1546     memset(m_ac_coeffs, 0, sizeof(m_ac_coeffs));
1547     memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu));
1548
1549     m_eob_run = 0;
1550
1551     memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu));
1552
1553     m_pIn_buf_ofs = m_in_buf;
1554     m_in_buf_left = 0;
1555     m_eof_flag = false;
1556     m_tem_flag = 0;
1557
1558     memset(m_in_buf_pad_start, 0, sizeof(m_in_buf_pad_start));
1559     memset(m_in_buf, 0, sizeof(m_in_buf));
1560     memset(m_in_buf_pad_end, 0, sizeof(m_in_buf_pad_end));
1561
1562     m_restart_interval = 0;
1563     m_restarts_left    = 0;
1564     m_next_restart_num = 0;
1565
1566     m_max_mcus_per_row = 0;
1567     m_max_blocks_per_mcu = 0;
1568     m_max_mcus_per_col = 0;
1569
1570     memset(m_last_dc_val, 0, sizeof(m_last_dc_val));
1571     m_pMCU_coefficients = nullptr;
1572     m_pSample_buf = nullptr;
1573
1574     m_total_bytes_read = 0;
1575
1576     m_pScan_line_0 = nullptr;
1577     m_pScan_line_1 = nullptr;
1578
1579     // Ready the input buffer.
1580     prep_in_buffer();
1581
1582     // Prime the bit buffer.
1583     m_bits_left = 16;
1584     m_bit_buf = 0;
1585
1586     get_bits(16);
1587     get_bits(16);
1588
1589     for (int i = 0; i < JPGD_MAX_BLOCKS_PER_MCU; i++) {
1590         m_mcu_block_max_zag[i] = 64;
1591     }
1592 }
1593
1594 #define SCALEBITS 16
1595 #define ONE_HALF  ((int) 1 << (SCALEBITS-1))
1596 #define FIX(x)    ((int) ((x) * (1L<<SCALEBITS) + 0.5f))
1597
1598
1599 // Create a few tables that allow us to quickly convert YCbCr to RGB.
1600 void jpeg_decoder::create_look_ups()
1601 {
1602   for (int i = 0; i <= 255; i++) {
1603       int k = i - 128;
1604       m_crr[i] = ( FIX(1.40200f)  * k + ONE_HALF) >> SCALEBITS;
1605       m_cbb[i] = ( FIX(1.77200f)  * k + ONE_HALF) >> SCALEBITS;
1606       m_crg[i] = (-FIX(0.71414f)) * k;
1607       m_cbg[i] = (-FIX(0.34414f)) * k + ONE_HALF;
1608   }
1609 }
1610
1611
1612 // This method throws back into the stream any bytes that where read
1613 // into the bit buffer during initial marker scanning.
1614 void jpeg_decoder::fix_in_buffer()
1615 {
1616     // In case any 0xFF's where pulled into the buffer during marker scanning.
1617     JPGD_ASSERT((m_bits_left & 7) == 0);
1618
1619     if (m_bits_left == 16) stuff_char( (uint8_t)(m_bit_buf & 0xFF));
1620     if (m_bits_left >= 8) stuff_char( (uint8_t)((m_bit_buf >> 8) & 0xFF));
1621
1622     stuff_char((uint8_t)((m_bit_buf >> 16) & 0xFF));
1623     stuff_char((uint8_t)((m_bit_buf >> 24) & 0xFF));
1624
1625     m_bits_left = 16;
1626     get_bits_no_markers(16);
1627     get_bits_no_markers(16);
1628 }
1629
1630
1631 void jpeg_decoder::transform_mcu(int mcu_row)
1632 {
1633     jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
1634     uint8_t* pDst_ptr = m_pSample_buf + mcu_row * m_blocks_per_mcu * 64;
1635
1636     for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) {
1637         idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block]);
1638         pSrc_ptr += 64;
1639         pDst_ptr += 64;
1640     }
1641 }
1642
1643
1644 static const uint8_t s_max_rc[64] =
1645 {
1646     17, 18, 34, 50, 50, 51, 52, 52, 52, 68, 84, 84, 84, 84, 85, 86, 86, 86, 86, 86,
1647     102, 118, 118, 118, 118, 118, 118, 119, 120, 120, 120, 120, 120, 120, 120, 136,
1648     136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136,
1649     136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136
1650 };
1651
1652
1653 void jpeg_decoder::transform_mcu_expand(int mcu_row)
1654 {
1655     jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
1656     uint8_t* pDst_ptr = m_pSample_buf + mcu_row * m_expanded_blocks_per_mcu * 64;
1657
1658     // Y IDCT
1659     int mcu_block;
1660     for (mcu_block = 0; mcu_block < m_expanded_blocks_per_component; mcu_block++) {
1661         idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block]);
1662         pSrc_ptr += 64;
1663         pDst_ptr += 64;
1664     }
1665
1666     // Chroma IDCT, with upsampling
1667     jpgd_block_t temp_block[64];
1668
1669     for (int i = 0; i < 2; i++) {
1670         DCT_Upsample::Matrix44 P, Q, R, S;
1671         JPGD_ASSERT(m_mcu_block_max_zag[mcu_block] >= 1);
1672         JPGD_ASSERT(m_mcu_block_max_zag[mcu_block] <= 64);
1673
1674         int max_zag = m_mcu_block_max_zag[mcu_block++] - 1;
1675         if (max_zag <= 0) max_zag = 0; // should never happen, only here to shut up static analysis
1676
1677         switch (s_max_rc[max_zag]) {
1678             case 1*16+1:
1679                 DCT_Upsample::P_Q<1, 1>::calc(P, Q, pSrc_ptr);
1680                 DCT_Upsample::R_S<1, 1>::calc(R, S, pSrc_ptr);
1681                 break;
1682             case 1*16+2:
1683                 DCT_Upsample::P_Q<1, 2>::calc(P, Q, pSrc_ptr);
1684                 DCT_Upsample::R_S<1, 2>::calc(R, S, pSrc_ptr);
1685                 break;
1686             case 2*16+2:
1687                 DCT_Upsample::P_Q<2, 2>::calc(P, Q, pSrc_ptr);
1688                 DCT_Upsample::R_S<2, 2>::calc(R, S, pSrc_ptr);
1689                 break;
1690             case 3*16+2:
1691                 DCT_Upsample::P_Q<3, 2>::calc(P, Q, pSrc_ptr);
1692                 DCT_Upsample::R_S<3, 2>::calc(R, S, pSrc_ptr);
1693                 break;
1694             case 3*16+3:
1695                 DCT_Upsample::P_Q<3, 3>::calc(P, Q, pSrc_ptr);
1696                 DCT_Upsample::R_S<3, 3>::calc(R, S, pSrc_ptr);
1697                 break;
1698             case 3*16+4:
1699                 DCT_Upsample::P_Q<3, 4>::calc(P, Q, pSrc_ptr);
1700                 DCT_Upsample::R_S<3, 4>::calc(R, S, pSrc_ptr);
1701                 break;
1702             case 4*16+4:
1703                 DCT_Upsample::P_Q<4, 4>::calc(P, Q, pSrc_ptr);
1704                 DCT_Upsample::R_S<4, 4>::calc(R, S, pSrc_ptr);
1705                 break;
1706             case 5*16+4:
1707                 DCT_Upsample::P_Q<5, 4>::calc(P, Q, pSrc_ptr);
1708                 DCT_Upsample::R_S<5, 4>::calc(R, S, pSrc_ptr);
1709                 break;
1710             case 5*16+5:
1711                 DCT_Upsample::P_Q<5, 5>::calc(P, Q, pSrc_ptr);
1712                 DCT_Upsample::R_S<5, 5>::calc(R, S, pSrc_ptr);
1713                 break;
1714             case 5*16+6:
1715                 DCT_Upsample::P_Q<5, 6>::calc(P, Q, pSrc_ptr);
1716                 DCT_Upsample::R_S<5, 6>::calc(R, S, pSrc_ptr);
1717                 break;
1718             case 6*16+6:
1719                 DCT_Upsample::P_Q<6, 6>::calc(P, Q, pSrc_ptr);
1720                 DCT_Upsample::R_S<6, 6>::calc(R, S, pSrc_ptr);
1721                 break;
1722             case 7*16+6:
1723                 DCT_Upsample::P_Q<7, 6>::calc(P, Q, pSrc_ptr);
1724                 DCT_Upsample::R_S<7, 6>::calc(R, S, pSrc_ptr);
1725                 break;
1726             case 7*16+7:
1727                 DCT_Upsample::P_Q<7, 7>::calc(P, Q, pSrc_ptr);
1728                 DCT_Upsample::R_S<7, 7>::calc(R, S, pSrc_ptr);
1729                 break;
1730             case 7*16+8:
1731                 DCT_Upsample::P_Q<7, 8>::calc(P, Q, pSrc_ptr);
1732                 DCT_Upsample::R_S<7, 8>::calc(R, S, pSrc_ptr);
1733                 break;
1734             case 8*16+8:
1735                 DCT_Upsample::P_Q<8, 8>::calc(P, Q, pSrc_ptr);
1736                 DCT_Upsample::R_S<8, 8>::calc(R, S, pSrc_ptr);
1737                 break;
1738             default:
1739                 JPGD_ASSERT(false);
1740         }
1741         DCT_Upsample::Matrix44 a(P + Q); P -= Q;
1742         DCT_Upsample::Matrix44& b = P;
1743         DCT_Upsample::Matrix44 c(R + S); R -= S;
1744         DCT_Upsample::Matrix44& d = R;
1745
1746         DCT_Upsample::Matrix44::add_and_store(temp_block, a, c);
1747         idct_4x4(temp_block, pDst_ptr);
1748         pDst_ptr += 64;
1749
1750         DCT_Upsample::Matrix44::sub_and_store(temp_block, a, c);
1751         idct_4x4(temp_block, pDst_ptr);
1752         pDst_ptr += 64;
1753
1754         DCT_Upsample::Matrix44::add_and_store(temp_block, b, d);
1755         idct_4x4(temp_block, pDst_ptr);
1756         pDst_ptr += 64;
1757
1758         DCT_Upsample::Matrix44::sub_and_store(temp_block, b, d);
1759         idct_4x4(temp_block, pDst_ptr);
1760         pDst_ptr += 64;
1761         pSrc_ptr += 64;
1762     }
1763 }
1764
1765
1766 // Loads and dequantizes the next row of (already decoded) coefficients.
1767 // Progressive images only.
1768 void jpeg_decoder::load_next_row()
1769 {
1770     int i;
1771     jpgd_block_t *p;
1772     jpgd_quant_t *q;
1773     int mcu_row, mcu_block, row_block = 0;
1774     int component_num, component_id;
1775     int block_x_mcu[JPGD_MAX_COMPONENTS];
1776
1777     memset(block_x_mcu, 0, JPGD_MAX_COMPONENTS * sizeof(int));
1778
1779     for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++) {
1780         int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
1781
1782         for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) {
1783             component_id = m_mcu_org[mcu_block];
1784             q = m_quant[m_comp_quant[component_id]];
1785             p = m_pMCU_coefficients + 64 * mcu_block;
1786
1787             jpgd_block_t* pAC = coeff_buf_getp(m_ac_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
1788             jpgd_block_t* pDC = coeff_buf_getp(m_dc_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
1789             p[0] = pDC[0];
1790             memcpy(&p[1], &pAC[1], 63 * sizeof(jpgd_block_t));
1791
1792             for (i = 63; i > 0; i--) {
1793                 if (p[g_ZAG[i]]) break;
1794             }
1795
1796             m_mcu_block_max_zag[mcu_block] = i + 1;
1797
1798             for ( ; i >= 0; i--) {
1799                 if (p[g_ZAG[i]]) {
1800                     p[g_ZAG[i]] = static_cast<jpgd_block_t>(p[g_ZAG[i]] * q[i]);
1801                 }
1802             }
1803
1804             row_block++;
1805
1806             if (m_comps_in_scan == 1) block_x_mcu[component_id]++;
1807             else {
1808                 if (++block_x_mcu_ofs == m_comp_h_samp[component_id]) block_x_mcu_ofs = 0;
1809                 if (++block_y_mcu_ofs == m_comp_v_samp[component_id]) {
1810                     block_y_mcu_ofs = 0;
1811                     block_x_mcu[component_id] += m_comp_h_samp[component_id];
1812                 }
1813             }
1814         }
1815         if (m_freq_domain_chroma_upsample) transform_mcu_expand(mcu_row);
1816         else transform_mcu(mcu_row);
1817     }
1818     if (m_comps_in_scan == 1) m_block_y_mcu[m_comp_list[0]]++;
1819     else {
1820         for (component_num = 0; component_num < m_comps_in_scan; component_num++) {
1821             component_id = m_comp_list[component_num];
1822             m_block_y_mcu[component_id] += m_comp_v_samp[component_id];
1823         }
1824     }
1825 }
1826
1827
1828 // Restart interval processing.
1829 void jpeg_decoder::process_restart()
1830 {
1831     int i;
1832     int c = 0;
1833
1834     // Align to a byte boundry
1835     // FIXME: Is this really necessary? get_bits_no_markers() never reads in markers!
1836     //get_bits_no_markers(m_bits_left & 7);
1837
1838     // Let's scan a little bit to find the marker, but not _too_ far.
1839     // 1536 is a "fudge factor" that determines how much to scan.
1840     for (i = 1536; i > 0; i--) {
1841         if (get_char() == 0xFF) break;
1842     }
1843     if (i == 0) stop_decoding(JPGD_BAD_RESTART_MARKER);
1844
1845     for ( ; i > 0; i--) {
1846         if ((c = get_char()) != 0xFF) break;
1847     }
1848     if (i == 0) stop_decoding(JPGD_BAD_RESTART_MARKER);
1849
1850     // Is it the expected marker? If not, something bad happened.
1851     if (c != (m_next_restart_num + M_RST0)) stop_decoding(JPGD_BAD_RESTART_MARKER);
1852
1853     // Reset each component's DC prediction values.
1854     memset(&m_last_dc_val, 0, m_comps_in_frame * sizeof(uint32_t));
1855
1856     m_eob_run = 0;
1857     m_restarts_left = m_restart_interval;
1858     m_next_restart_num = (m_next_restart_num + 1) & 7;
1859
1860     // Get the bit buffer going again...
1861     m_bits_left = 16;
1862     get_bits_no_markers(16);
1863     get_bits_no_markers(16);
1864 }
1865
1866
1867 static inline int dequantize_ac(int c, int q)
1868 {
1869     c *= q;
1870     return c;
1871 }
1872
1873 // Decodes and dequantizes the next row of coefficients.
1874 void jpeg_decoder::decode_next_row()
1875 {
1876     int row_block = 0;
1877
1878     for (int mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++) {
1879         if ((m_restart_interval) && (m_restarts_left == 0)) process_restart();
1880
1881         jpgd_block_t* p = m_pMCU_coefficients;
1882
1883         for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++, p += 64) {
1884             int component_id = m_mcu_org[mcu_block];
1885             jpgd_quant_t* q = m_quant[m_comp_quant[component_id]];
1886
1887             int r, s;
1888             s = huff_decode(m_pHuff_tabs[m_comp_dc_tab[component_id]], r);
1889             s = JPGD_HUFF_EXTEND(r, s);
1890
1891             m_last_dc_val[component_id] = (s += m_last_dc_val[component_id]);
1892
1893             p[0] = static_cast<jpgd_block_t>(s * q[0]);
1894
1895             int prev_num_set = m_mcu_block_max_zag[mcu_block];
1896             huff_tables *pH = m_pHuff_tabs[m_comp_ac_tab[component_id]];
1897             int k;
1898             for (k = 1; k < 64; k++) {
1899                 int extra_bits;
1900                 s = huff_decode(pH, extra_bits);
1901                 r = s >> 4;
1902                 s &= 15;
1903
1904                 if (s) {
1905                     if (r) {
1906                         if ((k + r) > 63) stop_decoding(JPGD_DECODE_ERROR);
1907                         if (k < prev_num_set) {
1908                             int n = JPGD_MIN(r, prev_num_set - k);
1909                             int kt = k;
1910                             while (n--) p[g_ZAG[kt++]] = 0;
1911                         }
1912                         k += r;
1913                     }
1914                     s = JPGD_HUFF_EXTEND(extra_bits, s);
1915                     JPGD_ASSERT(k < 64);
1916                     p[g_ZAG[k]] = static_cast<jpgd_block_t>(dequantize_ac(s, q[k])); //s * q[k];
1917                 } else {
1918                     if (r == 15) {
1919                         if ((k + 16) > 64) stop_decoding(JPGD_DECODE_ERROR);
1920                         if (k < prev_num_set) {
1921                             int n = JPGD_MIN(16, prev_num_set - k);
1922                             int kt = k;
1923                             while (n--) {
1924                                 JPGD_ASSERT(kt <= 63);
1925                                 p[g_ZAG[kt++]] = 0;
1926                             }
1927                         }
1928                         k += 16 - 1; // - 1 because the loop counter is k
1929                         JPGD_ASSERT(p[g_ZAG[k]] == 0);
1930                     } else  break;
1931                 }
1932             }
1933
1934             if (k < prev_num_set) {
1935                 int kt = k;
1936                 while (kt < prev_num_set) p[g_ZAG[kt++]] = 0;
1937             }
1938
1939             m_mcu_block_max_zag[mcu_block] = k;
1940             row_block++;
1941         }
1942         if (m_freq_domain_chroma_upsample) transform_mcu_expand(mcu_row);
1943         else transform_mcu(mcu_row);
1944         m_restarts_left--;
1945     }
1946 }
1947
1948
1949 // YCbCr H1V1 (1x1:1:1, 3 m_blocks per MCU) to RGB
1950 void jpeg_decoder::H1V1Convert()
1951 {
1952     int row = m_max_mcu_y_size - m_mcu_lines_left;
1953     uint8_t *d = m_pScan_line_0;
1954     uint8_t *s = m_pSample_buf + row * 8;
1955
1956     for (int i = m_max_mcus_per_row; i > 0; i--) {
1957         for (int j = 0; j < 8; j++) {
1958             int y = s[j];
1959             int cb = s[64+j];
1960             int cr = s[128+j];
1961
1962             d[0] = clamp(y + m_crr[cr]);
1963             d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16));
1964             d[2] = clamp(y + m_cbb[cb]);
1965             d[3] = 255;
1966             d += 4;
1967         }
1968         s += 64*3;
1969     }
1970 }
1971
1972
1973 // YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB
1974 void jpeg_decoder::H2V1Convert()
1975 {
1976     int row = m_max_mcu_y_size - m_mcu_lines_left;
1977     uint8_t *d0 = m_pScan_line_0;
1978     uint8_t *y = m_pSample_buf + row * 8;
1979     uint8_t *c = m_pSample_buf + 2*64 + row * 8;
1980
1981     for (int i = m_max_mcus_per_row; i > 0; i--) {
1982         for (int l = 0; l < 2; l++) {
1983             for (int j = 0; j < 4; j++) {
1984                 int cb = c[0];
1985                 int cr = c[64];
1986
1987                 int rc = m_crr[cr];
1988                 int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
1989                 int bc = m_cbb[cb];
1990
1991                 int yy = y[j<<1];
1992                 d0[0] = clamp(yy+rc);
1993                 d0[1] = clamp(yy+gc);
1994                 d0[2] = clamp(yy+bc);
1995                 d0[3] = 255;
1996
1997                 yy = y[(j<<1)+1];
1998                 d0[4] = clamp(yy+rc);
1999                 d0[5] = clamp(yy+gc);
2000                 d0[6] = clamp(yy+bc);
2001                 d0[7] = 255;
2002                 d0 += 8;
2003                 c++;
2004             }
2005             y += 64;
2006         }
2007         y += 64*4 - 64*2;
2008         c += 64*4 - 8;
2009     }
2010 }
2011
2012
2013 // YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB
2014 void jpeg_decoder::H1V2Convert()
2015 {
2016     int row = m_max_mcu_y_size - m_mcu_lines_left;
2017     uint8_t *d0 = m_pScan_line_0;
2018     uint8_t *d1 = m_pScan_line_1;
2019     uint8_t *y;
2020     uint8_t *c;
2021
2022     if (row < 8) y = m_pSample_buf + row * 8;
2023     else y = m_pSample_buf + 64*1 + (row & 7) * 8;
2024
2025     c = m_pSample_buf + 64*2 + (row >> 1) * 8;
2026
2027     for (int i = m_max_mcus_per_row; i > 0; i--) {
2028         for (int j = 0; j < 8; j++) {
2029             int cb = c[0+j];
2030             int cr = c[64+j];
2031
2032             int rc = m_crr[cr];
2033             int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
2034             int bc = m_cbb[cb];
2035
2036             int yy = y[j];
2037             d0[0] = clamp(yy+rc);
2038             d0[1] = clamp(yy+gc);
2039             d0[2] = clamp(yy+bc);
2040             d0[3] = 255;
2041
2042             yy = y[8+j];
2043             d1[0] = clamp(yy+rc);
2044             d1[1] = clamp(yy+gc);
2045             d1[2] = clamp(yy+bc);
2046             d1[3] = 255;
2047
2048             d0 += 4;
2049             d1 += 4;
2050         }
2051         y += 64*4;
2052         c += 64*4;
2053     }
2054 }
2055
2056
2057 // YCbCr H2V2 (2x2:1:1, 6 m_blocks per MCU) to RGB
2058 void jpeg_decoder::H2V2Convert()
2059 {
2060     int row = m_max_mcu_y_size - m_mcu_lines_left;
2061     uint8_t *d0 = m_pScan_line_0;
2062     uint8_t *d1 = m_pScan_line_1;
2063     uint8_t *y;
2064     uint8_t *c;
2065
2066     if (row < 8) y = m_pSample_buf + row * 8;
2067     else y = m_pSample_buf + 64*2 + (row & 7) * 8;
2068
2069     c = m_pSample_buf + 64*4 + (row >> 1) * 8;
2070
2071     for (int i = m_max_mcus_per_row; i > 0; i--) {
2072         for (int l = 0; l < 2; l++) {
2073             for (int j = 0; j < 8; j += 2) {
2074                 int cb = c[0];
2075                 int cr = c[64];
2076
2077                 int rc = m_crr[cr];
2078                 int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
2079                 int bc = m_cbb[cb];
2080
2081                 int yy = y[j];
2082                 d0[0] = clamp(yy+rc);
2083                 d0[1] = clamp(yy+gc);
2084                 d0[2] = clamp(yy+bc);
2085                 d0[3] = 255;
2086
2087                 yy = y[j+1];
2088                 d0[4] = clamp(yy+rc);
2089                 d0[5] = clamp(yy+gc);
2090                 d0[6] = clamp(yy+bc);
2091                 d0[7] = 255;
2092
2093                 yy = y[j+8];
2094                 d1[0] = clamp(yy+rc);
2095                 d1[1] = clamp(yy+gc);
2096                 d1[2] = clamp(yy+bc);
2097                 d1[3] = 255;
2098
2099                 yy = y[j+8+1];
2100                 d1[4] = clamp(yy+rc);
2101                 d1[5] = clamp(yy+gc);
2102                 d1[6] = clamp(yy+bc);
2103                 d1[7] = 255;
2104
2105                 d0 += 8;
2106                 d1 += 8;
2107
2108                 c++;
2109             }
2110             y += 64;
2111         }
2112         y += 64*6 - 64*2;
2113         c += 64*6 - 8;
2114     }
2115 }
2116
2117
2118 // Y (1 block per MCU) to 8-bit grayscale
2119 void jpeg_decoder::gray_convert()
2120 {
2121     int row = m_max_mcu_y_size - m_mcu_lines_left;
2122     uint8_t *d = m_pScan_line_0;
2123     uint8_t *s = m_pSample_buf + row * 8;
2124
2125     for (int i = m_max_mcus_per_row; i > 0; i--) {
2126         *(uint32_t *)d = *(uint32_t *)s;
2127         *(uint32_t *)(&d[4]) = *(uint32_t *)(&s[4]);
2128         s += 64;
2129         d += 8;
2130     }
2131 }
2132
2133
2134 void jpeg_decoder::expanded_convert()
2135 {
2136     int row = m_max_mcu_y_size - m_mcu_lines_left;
2137     uint8_t* Py = m_pSample_buf + (row / 8) * 64 * m_comp_h_samp[0] + (row & 7) * 8;
2138     uint8_t* d = m_pScan_line_0;
2139
2140     for (int i = m_max_mcus_per_row; i > 0; i--) {
2141         for (int k = 0; k < m_max_mcu_x_size; k += 8) {
2142             const int Y_ofs = k * 8;
2143             const int Cb_ofs = Y_ofs + 64 * m_expanded_blocks_per_component;
2144             const int Cr_ofs = Y_ofs + 64 * m_expanded_blocks_per_component * 2;
2145             for (int j = 0; j < 8; j++) {
2146                 int y = Py[Y_ofs + j];
2147                 int cb = Py[Cb_ofs + j];
2148                 int cr = Py[Cr_ofs + j];
2149
2150                 d[0] = clamp(y + m_crr[cr]);
2151                 d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16));
2152                 d[2] = clamp(y + m_cbb[cb]);
2153                 d[3] = 255;
2154
2155                 d += 4;
2156             }
2157         }
2158         Py += 64 * m_expanded_blocks_per_mcu;
2159     }
2160 }
2161
2162
2163 // Find end of image (EOI) marker, so we can return to the user the exact size of the input stream.
2164 void jpeg_decoder::find_eoi()
2165 {
2166     if (!m_progressive_flag) {
2167         // Attempt to read the EOI marker.
2168         //get_bits_no_markers(m_bits_left & 7);
2169
2170         // Prime the bit buffer
2171         m_bits_left = 16;
2172         get_bits(16);
2173         get_bits(16);
2174
2175         // The next marker _should_ be EOI
2176         process_markers();
2177     }
2178     m_total_bytes_read -= m_in_buf_left;
2179 }
2180
2181
2182 int jpeg_decoder::decode(const void** pScan_line, uint32_t* pScan_line_len)
2183 {
2184     if ((m_error_code) || (!m_ready_flag)) return JPGD_FAILED;
2185     if (m_total_lines_left == 0) return JPGD_DONE;
2186     if (m_mcu_lines_left == 0) {
2187         if (setjmp(m_jmp_state)) return JPGD_FAILED;
2188         if (m_progressive_flag) load_next_row();
2189         else decode_next_row();
2190         // Find the EOI marker if that was the last row.
2191         if (m_total_lines_left <= m_max_mcu_y_size) find_eoi();
2192         m_mcu_lines_left = m_max_mcu_y_size;
2193     }
2194
2195     if (m_freq_domain_chroma_upsample) {
2196         expanded_convert();
2197         *pScan_line = m_pScan_line_0;
2198     } else {
2199         switch (m_scan_type) {
2200             case JPGD_YH2V2: {
2201                 if ((m_mcu_lines_left & 1) == 0) {
2202                     H2V2Convert();
2203                     *pScan_line = m_pScan_line_0;
2204                 }
2205               else *pScan_line = m_pScan_line_1;
2206               break;
2207             }
2208             case JPGD_YH2V1: {
2209                 H2V1Convert();
2210                 *pScan_line = m_pScan_line_0;
2211                 break;
2212             }
2213             case JPGD_YH1V2: {
2214                 if ((m_mcu_lines_left & 1) == 0) {
2215                     H1V2Convert();
2216                     *pScan_line = m_pScan_line_0;
2217                 } else *pScan_line = m_pScan_line_1;
2218                 break;
2219             }
2220             case JPGD_YH1V1: {
2221                 H1V1Convert();
2222                 *pScan_line = m_pScan_line_0;
2223                 break;
2224             }
2225             case JPGD_GRAYSCALE: {
2226                 gray_convert();
2227                 *pScan_line = m_pScan_line_0;
2228                 break;
2229             }
2230         }
2231     }
2232
2233     *pScan_line_len = m_real_dest_bytes_per_scan_line;
2234     m_mcu_lines_left--;
2235     m_total_lines_left--;
2236
2237     return JPGD_SUCCESS;
2238 }
2239
2240
2241 // Creates the tables needed for efficient Huffman decoding.
2242 void jpeg_decoder::make_huff_table(int index, huff_tables *pH)
2243 {
2244     int p, i, l, si;
2245     uint8_t huffsize[257];
2246     uint32_t huffcode[257];
2247     uint32_t code;
2248     uint32_t subtree;
2249     int code_size;
2250     int lastp;
2251     int nextfreeentry;
2252     int currententry;
2253
2254     pH->ac_table = m_huff_ac[index] != 0;
2255     p = 0;
2256
2257     for (l = 1; l <= 16; l++)  {
2258         for (i = 1; i <= m_huff_num[index][l]; i++) {
2259             huffsize[p++] = static_cast<uint8_t>(l);
2260         }
2261     }
2262
2263     huffsize[p] = 0;
2264     lastp = p;
2265     code = 0;
2266     si = huffsize[0];
2267     p = 0;
2268
2269     while (huffsize[p]) {
2270         while (huffsize[p] == si) {
2271             huffcode[p++] = code;
2272             code++;
2273         }
2274         code <<= 1;
2275         si++;
2276     }
2277
2278     memset(pH->look_up, 0, sizeof(pH->look_up));
2279     memset(pH->look_up2, 0, sizeof(pH->look_up2));
2280     memset(pH->tree, 0, sizeof(pH->tree));
2281     memset(pH->code_size, 0, sizeof(pH->code_size));
2282
2283     nextfreeentry = -1;
2284     p = 0;
2285
2286     while (p < lastp) {
2287         i = m_huff_val[index][p];
2288         code = huffcode[p];
2289         code_size = huffsize[p];
2290         pH->code_size[i] = static_cast<uint8_t>(code_size);
2291
2292         if (code_size <= 8) {
2293             code <<= (8 - code_size);
2294             for (l = 1 << (8 - code_size); l > 0; l--) {
2295                 JPGD_ASSERT(i < 256);
2296                 pH->look_up[code] = i;
2297                 bool has_extrabits = false;
2298                 int extra_bits = 0;
2299                 int num_extra_bits = i & 15;
2300                 int bits_to_fetch = code_size;
2301
2302                 if (num_extra_bits) {
2303                     int total_codesize = code_size + num_extra_bits;
2304                     if (total_codesize <= 8) {
2305                         has_extrabits = true;
2306                         extra_bits = ((1 << num_extra_bits) - 1) & (code >> (8 - total_codesize));
2307                         JPGD_ASSERT(extra_bits <= 0x7FFF);
2308                         bits_to_fetch += num_extra_bits;
2309                     }
2310                 }
2311                 if (!has_extrabits) pH->look_up2[code] = i | (bits_to_fetch << 8);
2312                 else pH->look_up2[code] = i | 0x8000 | (extra_bits << 16) | (bits_to_fetch << 8);
2313                 code++;
2314             }
2315         } else {
2316             subtree = (code >> (code_size - 8)) & 0xFF;
2317             currententry = pH->look_up[subtree];
2318
2319             if (currententry == 0) {
2320                 pH->look_up[subtree] = currententry = nextfreeentry;
2321                 pH->look_up2[subtree] = currententry = nextfreeentry;
2322                 nextfreeentry -= 2;
2323             }
2324
2325             code <<= (16 - (code_size - 8));
2326
2327             for (l = code_size; l > 9; l--) {
2328                 if ((code & 0x8000) == 0) currententry--;
2329                 if (pH->tree[-currententry - 1] == 0) {
2330                     pH->tree[-currententry - 1] = nextfreeentry;
2331                     currententry = nextfreeentry;
2332                     nextfreeentry -= 2;
2333                 } else currententry = pH->tree[-currententry - 1];
2334                 code <<= 1;
2335             }
2336             if ((code & 0x8000) == 0) currententry--;
2337             pH->tree[-currententry - 1] = i;
2338         }
2339         p++;
2340     }
2341 }
2342
2343
2344 // Verifies the quantization tables needed for this scan are available.
2345 void jpeg_decoder::check_quant_tables()
2346 {
2347     for (int i = 0; i < m_comps_in_scan; i++) {
2348         if (m_quant[m_comp_quant[m_comp_list[i]]] == nullptr) stop_decoding(JPGD_UNDEFINED_QUANT_TABLE);
2349     }
2350 }
2351
2352
2353 // Verifies that all the Huffman tables needed for this scan are available.
2354 void jpeg_decoder::check_huff_tables()
2355 {
2356     for (int i = 0; i < m_comps_in_scan; i++) {
2357       if ((m_spectral_start == 0) && (m_huff_num[m_comp_dc_tab[m_comp_list[i]]] == nullptr)) stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2358       if ((m_spectral_end > 0) && (m_huff_num[m_comp_ac_tab[m_comp_list[i]]] == nullptr)) stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2359     }
2360
2361     for (int i = 0; i < JPGD_MAX_HUFF_TABLES; i++) {
2362         if (m_huff_num[i]) {
2363             if (!m_pHuff_tabs[i]) m_pHuff_tabs[i] = (huff_tables *)alloc(sizeof(huff_tables));
2364             make_huff_table(i, m_pHuff_tabs[i]);
2365         }
2366     }
2367 }
2368
2369
2370 // Determines the component order inside each MCU.
2371 // Also calcs how many MCU's are on each row, etc.
2372 void jpeg_decoder::calc_mcu_block_order()
2373 {
2374     int component_num, component_id;
2375     int max_h_samp = 0, max_v_samp = 0;
2376
2377     for (component_id = 0; component_id < m_comps_in_frame; component_id++) {
2378         if (m_comp_h_samp[component_id] > max_h_samp) {
2379           max_h_samp = m_comp_h_samp[component_id];
2380         }
2381         if (m_comp_v_samp[component_id] > max_v_samp) {
2382           max_v_samp = m_comp_v_samp[component_id];
2383         }
2384     }
2385
2386     for (component_id = 0; component_id < m_comps_in_frame; component_id++) {
2387         m_comp_h_blocks[component_id] = ((((m_image_x_size * m_comp_h_samp[component_id]) + (max_h_samp - 1)) / max_h_samp) + 7) / 8;
2388         m_comp_v_blocks[component_id] = ((((m_image_y_size * m_comp_v_samp[component_id]) + (max_v_samp - 1)) / max_v_samp) + 7) / 8;
2389     }
2390
2391     if (m_comps_in_scan == 1) {
2392         m_mcus_per_row = m_comp_h_blocks[m_comp_list[0]];
2393         m_mcus_per_col = m_comp_v_blocks[m_comp_list[0]];
2394     } else {
2395         m_mcus_per_row = (((m_image_x_size + 7) / 8) + (max_h_samp - 1)) / max_h_samp;
2396         m_mcus_per_col = (((m_image_y_size + 7) / 8) + (max_v_samp - 1)) / max_v_samp;
2397     }
2398
2399     if (m_comps_in_scan == 1) {
2400         m_mcu_org[0] = m_comp_list[0];
2401         m_blocks_per_mcu = 1;
2402     } else {
2403         m_blocks_per_mcu = 0;
2404
2405         for (component_num = 0; component_num < m_comps_in_scan; component_num++) {
2406             int num_blocks;
2407             component_id = m_comp_list[component_num];
2408             num_blocks = m_comp_h_samp[component_id] * m_comp_v_samp[component_id];
2409             while (num_blocks--) m_mcu_org[m_blocks_per_mcu++] = component_id;
2410         }
2411     }
2412 }
2413
2414
2415 // Starts a new scan.
2416 int jpeg_decoder::init_scan()
2417 {
2418     if (!locate_sos_marker()) return false;
2419
2420     calc_mcu_block_order();
2421     check_huff_tables();
2422     check_quant_tables();
2423
2424     memset(m_last_dc_val, 0, m_comps_in_frame * sizeof(uint32_t));
2425
2426     m_eob_run = 0;
2427
2428     if (m_restart_interval) {
2429         m_restarts_left = m_restart_interval;
2430         m_next_restart_num = 0;
2431     }
2432     fix_in_buffer();
2433     return true;
2434 }
2435
2436
2437 // Starts a frame. Determines if the number of components or sampling factors
2438 // are supported.
2439 void jpeg_decoder::init_frame()
2440 {
2441     int i;
2442
2443     if (m_comps_in_frame == 1) {
2444         if ((m_comp_h_samp[0] != 1) || (m_comp_v_samp[0] != 1)) stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2445         m_scan_type = JPGD_GRAYSCALE;
2446         m_max_blocks_per_mcu = 1;
2447         m_max_mcu_x_size = 8;
2448         m_max_mcu_y_size = 8;
2449     } else if (m_comps_in_frame == 3) {
2450         if (((m_comp_h_samp[1] != 1) || (m_comp_v_samp[1] != 1)) || ((m_comp_h_samp[2] != 1) || (m_comp_v_samp[2] != 1)))
2451             stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2452
2453         if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1)) {
2454             m_scan_type = JPGD_YH1V1;
2455             m_max_blocks_per_mcu = 3;
2456             m_max_mcu_x_size = 8;
2457             m_max_mcu_y_size = 8;
2458         } else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1)) {
2459             m_scan_type = JPGD_YH2V1;
2460             m_max_blocks_per_mcu = 4;
2461             m_max_mcu_x_size = 16;
2462             m_max_mcu_y_size = 8;
2463         } else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 2)) {
2464             m_scan_type = JPGD_YH1V2;
2465             m_max_blocks_per_mcu = 4;
2466             m_max_mcu_x_size = 8;
2467             m_max_mcu_y_size = 16;
2468         } else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2)) {
2469             m_scan_type = JPGD_YH2V2;
2470             m_max_blocks_per_mcu = 6;
2471             m_max_mcu_x_size = 16;
2472             m_max_mcu_y_size = 16;
2473         } else stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2474     } else stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
2475
2476     m_max_mcus_per_row = (m_image_x_size + (m_max_mcu_x_size - 1)) / m_max_mcu_x_size;
2477     m_max_mcus_per_col = (m_image_y_size + (m_max_mcu_y_size - 1)) / m_max_mcu_y_size;
2478
2479     // These values are for the *destination* pixels: after conversion.
2480     if (m_scan_type == JPGD_GRAYSCALE) m_dest_bytes_per_pixel = 1;
2481     else m_dest_bytes_per_pixel = 4;
2482
2483     m_dest_bytes_per_scan_line = ((m_image_x_size + 15) & 0xFFF0) * m_dest_bytes_per_pixel;
2484     m_real_dest_bytes_per_scan_line = (m_image_x_size * m_dest_bytes_per_pixel);
2485
2486     // Initialize two scan line buffers.
2487     m_pScan_line_0 = (uint8_t *)alloc(m_dest_bytes_per_scan_line, true);
2488     if ((m_scan_type == JPGD_YH1V2) || (m_scan_type == JPGD_YH2V2)) {
2489         m_pScan_line_1 = (uint8_t *)alloc(m_dest_bytes_per_scan_line, true);
2490     }
2491
2492     m_max_blocks_per_row = m_max_mcus_per_row * m_max_blocks_per_mcu;
2493
2494     // Should never happen
2495     if (m_max_blocks_per_row > JPGD_MAX_BLOCKS_PER_ROW) stop_decoding(JPGD_ASSERTION_ERROR);
2496
2497     // Allocate the coefficient buffer, enough for one MCU
2498     m_pMCU_coefficients = (jpgd_block_t*)alloc(m_max_blocks_per_mcu * 64 * sizeof(jpgd_block_t));
2499
2500     for (i = 0; i < m_max_blocks_per_mcu; i++) {
2501         m_mcu_block_max_zag[i] = 64;
2502     }
2503
2504     m_expanded_blocks_per_component = m_comp_h_samp[0] * m_comp_v_samp[0];
2505     m_expanded_blocks_per_mcu = m_expanded_blocks_per_component * m_comps_in_frame;
2506     m_expanded_blocks_per_row = m_max_mcus_per_row * m_expanded_blocks_per_mcu;
2507     // Freq. domain chroma upsampling is only supported for H2V2 subsampling factor (the most common one I've seen).
2508     m_freq_domain_chroma_upsample = false;
2509 #if JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING
2510     m_freq_domain_chroma_upsample = (m_expanded_blocks_per_mcu == 4*3);
2511 #endif
2512
2513     if (m_freq_domain_chroma_upsample)
2514         m_pSample_buf = (uint8_t *)alloc(m_expanded_blocks_per_row * 64);
2515     else
2516         m_pSample_buf = (uint8_t *)alloc(m_max_blocks_per_row * 64);
2517
2518     m_total_lines_left = m_image_y_size;
2519     m_mcu_lines_left = 0;
2520     create_look_ups();
2521 }
2522
2523
2524 // The coeff_buf series of methods originally stored the coefficients
2525 // into a "virtual" file which was located in EMS, XMS, or a disk file. A cache
2526 // was used to make this process more efficient. Now, we can store the entire
2527 // thing in RAM.
2528 jpeg_decoder::coeff_buf* jpeg_decoder::coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y)
2529 {
2530     coeff_buf* cb = (coeff_buf*)alloc(sizeof(coeff_buf));
2531     cb->block_num_x = block_num_x;
2532     cb->block_num_y = block_num_y;
2533     cb->block_len_x = block_len_x;
2534     cb->block_len_y = block_len_y;
2535     cb->block_size = (block_len_x * block_len_y) * sizeof(jpgd_block_t);
2536     cb->pData = (uint8_t *)alloc(cb->block_size * block_num_x * block_num_y, true);
2537     return cb;
2538 }
2539
2540
2541 inline jpgd_block_t *jpeg_decoder::coeff_buf_getp(coeff_buf *cb, int block_x, int block_y)
2542 {
2543     JPGD_ASSERT((block_x < cb->block_num_x) && (block_y < cb->block_num_y));
2544     return (jpgd_block_t *)(cb->pData + block_x * cb->block_size + block_y * (cb->block_size * cb->block_num_x));
2545 }
2546
2547
2548 // The following methods decode the various types of m_blocks encountered
2549 // in progressively encoded images.
2550 void jpeg_decoder::decode_block_dc_first(jpeg_decoder *pD, int component_id, int block_x, int block_y)
2551 {
2552     int s, r;
2553     jpgd_block_t *p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y);
2554
2555     if ((s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_dc_tab[component_id]])) != 0) {
2556         r = pD->get_bits_no_markers(s);
2557         s = JPGD_HUFF_EXTEND(r, s);
2558     }
2559     pD->m_last_dc_val[component_id] = (s += pD->m_last_dc_val[component_id]);
2560     p[0] = static_cast<jpgd_block_t>(static_cast<unsigned int>(s) << pD->m_successive_low);
2561 }
2562
2563
2564 void jpeg_decoder::decode_block_dc_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y)
2565 {
2566     if (pD->get_bits_no_markers(1)) {
2567         jpgd_block_t *p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y);
2568         p[0] |= (1 << pD->m_successive_low);
2569     }
2570 }
2571
2572
2573 void jpeg_decoder::decode_block_ac_first(jpeg_decoder *pD, int component_id, int block_x, int block_y)
2574 {
2575     int k, s, r;
2576
2577     if (pD->m_eob_run) {
2578         pD->m_eob_run--;
2579         return;
2580     }
2581     jpgd_block_t *p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y);
2582
2583     for (k = pD->m_spectral_start; k <= pD->m_spectral_end; k++) {
2584         s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_ac_tab[component_id]]);
2585         r = s >> 4;
2586         s &= 15;
2587         if (s) {
2588             if ((k += r) > 63) pD->stop_decoding(JPGD_DECODE_ERROR);
2589             r = pD->get_bits_no_markers(s);
2590             s = JPGD_HUFF_EXTEND(r, s);
2591             p[g_ZAG[k]] = static_cast<jpgd_block_t>(static_cast<unsigned int>(s) << pD->m_successive_low);
2592         } else {
2593             if (r == 15) {
2594                 if ((k += 15) > 63) pD->stop_decoding(JPGD_DECODE_ERROR);
2595             } else {
2596                 pD->m_eob_run = 1 << r;
2597                 if (r) pD->m_eob_run += pD->get_bits_no_markers(r);
2598                 pD->m_eob_run--;
2599                 break;
2600             }
2601         }
2602     }
2603 }
2604
2605
2606 void jpeg_decoder::decode_block_ac_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y)
2607 {
2608     int s, k, r;
2609     int p1 = 1 << pD->m_successive_low;
2610     int m1 = static_cast<unsigned int>(-1) << pD->m_successive_low;
2611     jpgd_block_t *p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y);
2612
2613     JPGD_ASSERT(pD->m_spectral_end <= 63);
2614
2615     k = pD->m_spectral_start;
2616
2617     if (pD->m_eob_run == 0) {
2618         for ( ; k <= pD->m_spectral_end; k++) {
2619             s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_ac_tab[component_id]]);
2620             r = s >> 4;
2621             s &= 15;
2622             if (s) {
2623                 if (s != 1) pD->stop_decoding(JPGD_DECODE_ERROR);
2624                 if (pD->get_bits_no_markers(1)) s = p1;
2625                 else s = m1;
2626             } else {
2627                 if (r != 15) {
2628                     pD->m_eob_run = 1 << r;
2629                     if (r) pD->m_eob_run += pD->get_bits_no_markers(r);
2630                     break;
2631                 }
2632             }
2633
2634             do {
2635                 jpgd_block_t *this_coef = p + g_ZAG[k & 63];
2636
2637                 if (*this_coef != 0) {
2638                     if (pD->get_bits_no_markers(1)) {
2639                         if ((*this_coef & p1) == 0) {
2640                             if (*this_coef >= 0) *this_coef = static_cast<jpgd_block_t>(*this_coef + p1);
2641                             else *this_coef = static_cast<jpgd_block_t>(*this_coef + m1);
2642                         }
2643                     }
2644                 } else {
2645                     if (--r < 0) break;
2646                 }
2647                 k++;
2648             } while (k <= pD->m_spectral_end);
2649
2650             if ((s) && (k < 64)) {
2651               p[g_ZAG[k]] = static_cast<jpgd_block_t>(s);
2652             }
2653         }
2654     }
2655
2656     if (pD->m_eob_run > 0) {
2657         for ( ; k <= pD->m_spectral_end; k++) {
2658             jpgd_block_t *this_coef = p + g_ZAG[k & 63]; // logical AND to shut up static code analysis
2659
2660             if (*this_coef != 0) {
2661                 if (pD->get_bits_no_markers(1)) {
2662                     if ((*this_coef & p1) == 0) {
2663                         if (*this_coef >= 0) *this_coef = static_cast<jpgd_block_t>(*this_coef + p1);
2664                         else *this_coef = static_cast<jpgd_block_t>(*this_coef + m1);
2665                     }
2666                 }
2667             }
2668         }
2669         pD->m_eob_run--;
2670     }
2671 }
2672
2673
2674 // Decode a scan in a progressively encoded image.
2675 void jpeg_decoder::decode_scan(pDecode_block_func decode_block_func)
2676 {
2677     int mcu_row, mcu_col, mcu_block;
2678     int block_x_mcu[JPGD_MAX_COMPONENTS], m_block_y_mcu[JPGD_MAX_COMPONENTS];
2679
2680     memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu));
2681
2682     for (mcu_col = 0; mcu_col < m_mcus_per_col; mcu_col++) {
2683         int component_num, component_id;
2684         memset(block_x_mcu, 0, sizeof(block_x_mcu));
2685
2686         for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++) {
2687             int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
2688
2689             if ((m_restart_interval) && (m_restarts_left == 0)) process_restart();
2690
2691             for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) {
2692                 component_id = m_mcu_org[mcu_block];
2693                 decode_block_func(this, component_id, block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
2694
2695                 if (m_comps_in_scan == 1) block_x_mcu[component_id]++;
2696                 else {
2697                     if (++block_x_mcu_ofs == m_comp_h_samp[component_id]) {
2698                         block_x_mcu_ofs = 0;
2699
2700                         if (++block_y_mcu_ofs == m_comp_v_samp[component_id]) {
2701                             block_y_mcu_ofs = 0;
2702                             block_x_mcu[component_id] += m_comp_h_samp[component_id];
2703                         }
2704                     }
2705                 }
2706             }
2707             m_restarts_left--;
2708         }
2709
2710         if (m_comps_in_scan == 1) m_block_y_mcu[m_comp_list[0]]++;
2711         else {
2712             for (component_num = 0; component_num < m_comps_in_scan; component_num++) {
2713                 component_id = m_comp_list[component_num];
2714                 m_block_y_mcu[component_id] += m_comp_v_samp[component_id];
2715             }
2716         }
2717     }
2718 }
2719
2720
2721 // Decode a progressively encoded image.
2722 void jpeg_decoder::init_progressive()
2723 {
2724     int i;
2725
2726     if (m_comps_in_frame == 4) stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
2727
2728     // Allocate the coefficient buffers.
2729     for (i = 0; i < m_comps_in_frame; i++) {
2730         m_dc_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 1, 1);
2731         m_ac_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 8, 8);
2732     }
2733
2734     while (true) {
2735         int dc_only_scan, refinement_scan;
2736         pDecode_block_func decode_block_func;
2737
2738         if (!init_scan()) break;
2739
2740         dc_only_scan = (m_spectral_start == 0);
2741         refinement_scan = (m_successive_high != 0);
2742
2743         if ((m_spectral_start > m_spectral_end) || (m_spectral_end > 63)) stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2744
2745         if (dc_only_scan) {
2746             if (m_spectral_end) stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2747         } else if (m_comps_in_scan != 1) {  /* AC scans can only contain one component */
2748             stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2749         }
2750
2751         if ((refinement_scan) && (m_successive_low != m_successive_high - 1)) stop_decoding(JPGD_BAD_SOS_SUCCESSIVE);
2752
2753         if (dc_only_scan) {
2754             if (refinement_scan) decode_block_func = decode_block_dc_refine;
2755             else decode_block_func = decode_block_dc_first;
2756         } else {
2757             if (refinement_scan) decode_block_func = decode_block_ac_refine;
2758             else decode_block_func = decode_block_ac_first;
2759         }
2760         decode_scan(decode_block_func);
2761         m_bits_left = 16;
2762         get_bits(16);
2763         get_bits(16);
2764     }
2765
2766     m_comps_in_scan = m_comps_in_frame;
2767
2768     for (i = 0; i < m_comps_in_frame; i++) {
2769         m_comp_list[i] = i;
2770     }
2771
2772     calc_mcu_block_order();
2773 }
2774
2775
2776 void jpeg_decoder::init_sequential()
2777 {
2778     if (!init_scan()) stop_decoding(JPGD_UNEXPECTED_MARKER);
2779 }
2780
2781
2782 void jpeg_decoder::decode_start()
2783 {
2784     init_frame();
2785     if (m_progressive_flag) init_progressive();
2786     else init_sequential();
2787 }
2788
2789
2790 void jpeg_decoder::decode_init(jpeg_decoder_stream *pStream)
2791 {
2792     init(pStream);
2793     locate_sof_marker();
2794 }
2795
2796
2797 jpeg_decoder::jpeg_decoder(jpeg_decoder_stream *pStream)
2798 {
2799     if (setjmp(m_jmp_state)) return;
2800     decode_init(pStream);
2801 }
2802
2803
2804 int jpeg_decoder::begin_decoding()
2805 {
2806     if (m_ready_flag) return JPGD_SUCCESS;
2807     if (m_error_code) return JPGD_FAILED;
2808     if (setjmp(m_jmp_state)) return JPGD_FAILED;
2809
2810     decode_start();
2811     m_ready_flag = true;
2812
2813     return JPGD_SUCCESS;
2814 }
2815
2816
2817 jpeg_decoder::~jpeg_decoder()
2818 {
2819     free_all_blocks();
2820 }
2821
2822
2823 void jpeg_decoder_file_stream::close()
2824 {
2825     if (m_pFile) {
2826         fclose(m_pFile);
2827         m_pFile = nullptr;
2828     }
2829     m_eof_flag = false;
2830     m_error_flag = false;
2831 }
2832
2833
2834 jpeg_decoder_file_stream::~jpeg_decoder_file_stream()
2835 {
2836     close();
2837 }
2838
2839
2840 bool jpeg_decoder_file_stream::open(const char *Pfilename)
2841 {
2842     close();
2843
2844     m_eof_flag = false;
2845     m_error_flag = false;
2846
2847 #if defined(_MSC_VER)
2848     m_pFile = nullptr;
2849     fopen_s(&m_pFile, Pfilename, "rb");
2850 #else
2851     m_pFile = fopen(Pfilename, "rb");
2852 #endif
2853     return m_pFile != nullptr;
2854 }
2855
2856
2857 int jpeg_decoder_file_stream::read(uint8_t *pBuf, int max_bytes_to_read, bool *pEOF_flag)
2858 {
2859     if (!m_pFile) return -1;
2860
2861     if (m_eof_flag) {
2862         *pEOF_flag = true;
2863         return 0;
2864     }
2865
2866     if (m_error_flag) return -1;
2867
2868     int bytes_read = static_cast<int>(fread(pBuf, 1, max_bytes_to_read, m_pFile));
2869     if (bytes_read < max_bytes_to_read) {
2870         if (ferror(m_pFile)) {
2871             m_error_flag = true;
2872             return -1;
2873         }
2874         m_eof_flag = true;
2875         *pEOF_flag = true;
2876     }
2877     return bytes_read;
2878 }
2879
2880
2881 bool jpeg_decoder_mem_stream::open(const uint8_t *pSrc_data, uint32_t size)
2882 {
2883     close();
2884     m_pSrc_data = pSrc_data;
2885     m_ofs = 0;
2886     m_size = size;
2887     return true;
2888 }
2889
2890
2891 int jpeg_decoder_mem_stream::read(uint8_t *pBuf, int max_bytes_to_read, bool *pEOF_flag)
2892 {
2893     *pEOF_flag = false;
2894     if (!m_pSrc_data) return -1;
2895
2896     uint32_t bytes_remaining = m_size - m_ofs;
2897     if ((uint32_t)max_bytes_to_read > bytes_remaining) {
2898         max_bytes_to_read = bytes_remaining;
2899         *pEOF_flag = true;
2900     }
2901     memcpy(pBuf, m_pSrc_data + m_ofs, max_bytes_to_read);
2902     m_ofs += max_bytes_to_read;
2903
2904     return max_bytes_to_read;
2905 }
2906
2907
2908 /************************************************************************/
2909 /* External Class Implementation                                        */
2910 /************************************************************************/
2911
2912
2913 jpeg_decoder* jpgdHeader(const char* data, int size, int* width, int* height)
2914 {
2915     auto decoder = new jpeg_decoder(new jpeg_decoder_mem_stream((const uint8_t*)data, size));
2916     if (decoder->get_error_code() != JPGD_SUCCESS) {
2917         delete(decoder);
2918         return nullptr;
2919     }
2920
2921     if (width) *width = decoder->get_width();
2922     if (height) *height = decoder->get_height();
2923
2924     return decoder;
2925 }
2926
2927
2928 jpeg_decoder* jpgdHeader(const char* filename, int* width, int* height)
2929 {
2930     auto fileStream = new jpeg_decoder_file_stream();
2931     if (!fileStream->open(filename)) return nullptr;
2932
2933     auto decoder = new jpeg_decoder(fileStream);
2934     if (decoder->get_error_code() != JPGD_SUCCESS) {
2935         delete(decoder);
2936         return nullptr;
2937     }
2938
2939     if (width) *width = decoder->get_width();
2940     if (height) *height = decoder->get_height();
2941
2942     return decoder;
2943 }
2944
2945
2946 void jpgdDelete(jpeg_decoder* decoder)
2947 {
2948     delete(decoder);
2949 }
2950
2951
2952 unsigned char* jpgdDecompress(jpeg_decoder* decoder)
2953 {
2954     if (!decoder) return nullptr;
2955
2956     int req_comps = 4;  //TODO: fixed 4 channel components now?
2957     if ((req_comps != 1) && (req_comps != 3) && (req_comps != 4)) return nullptr;
2958
2959     auto image_width = decoder->get_width();
2960     auto image_height = decoder->get_height();
2961     //auto actual_comps = decoder->get_num_components();
2962
2963     if (decoder->begin_decoding() != JPGD_SUCCESS) return nullptr;
2964
2965     const int dst_bpl = image_width * req_comps;
2966     uint8_t *pImage_data = (uint8_t*)malloc(dst_bpl * image_height);
2967     if (!pImage_data) return nullptr;
2968
2969     for (int y = 0; y < image_height; y++) {
2970         const uint8_t* pScan_line;
2971         uint32_t scan_line_len;
2972         if (decoder->decode((const void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS) {
2973             free(pImage_data);
2974             return nullptr;
2975         }
2976
2977         uint8_t *pDst = pImage_data + y * dst_bpl;
2978
2979         //Return as BGRA
2980         if ((req_comps == 4) && (decoder->get_num_components() == 3)) {
2981             for (int x = 0; x < image_width; x++) {
2982                 pDst[0] = pScan_line[x*4+2];
2983                 pDst[1] = pScan_line[x*4+1];
2984                 pDst[2] = pScan_line[x*4+0];
2985                 pDst[3] = 255;
2986                 pDst += 4;
2987             }
2988         } else if (((req_comps == 1) && (decoder->get_num_components() == 1)) || ((req_comps == 4) && (decoder->get_num_components() == 3))) {
2989             memcpy(pDst, pScan_line, dst_bpl);
2990         } else if (decoder->get_num_components() == 1) {
2991             if (req_comps == 3) {
2992                 for (int x = 0; x < image_width; x++) {
2993                     uint8_t luma = pScan_line[x];
2994                     pDst[0] = luma;
2995                     pDst[1] = luma;
2996                     pDst[2] = luma;
2997                     pDst += 3;
2998                 }
2999             } else {
3000                 for (int x = 0; x < image_width; x++) {
3001                     uint8_t luma = pScan_line[x];
3002                     pDst[0] = luma;
3003                     pDst[1] = luma;
3004                     pDst[2] = luma;
3005                     pDst[3] = 255;
3006                     pDst += 4;
3007                 }
3008             }
3009         } else if (decoder->get_num_components() == 3) {
3010             if (req_comps == 1) {
3011                 const int YR = 19595, YG = 38470, YB = 7471;
3012                 for (int x = 0; x < image_width; x++) {
3013                     int r = pScan_line[x*4+0];
3014                     int g = pScan_line[x*4+1];
3015                     int b = pScan_line[x*4+2];
3016                     *pDst++ = static_cast<uint8_t>((r * YR + g * YG + b * YB + 32768) >> 16);
3017                 }
3018             } else {
3019                 for (int x = 0; x < image_width; x++) {
3020                     pDst[0] = pScan_line[x*4+0];
3021                     pDst[1] = pScan_line[x*4+1];
3022                     pDst[2] = pScan_line[x*4+2];
3023                     pDst += 3;
3024                 }
3025             }
3026         }
3027     }
3028     return pImage_data;
3029 }