2 * Copyright (c) 2021 - 2023 the ThorVG project. All rights reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in all
12 * copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 // jpgd.cpp - C++ class for JPEG decompression.
24 // Public domain, Rich Geldreich <richgel99@gmail.com>
25 // Alex Evans: Linear memory allocator (taken from jpge.h).
26 // v1.04, May. 19, 2012: Code tweaks to fix VS2008 static code analysis warnings (all looked harmless)
28 // Supports progressive and baseline sequential JPEG image files, and the most common chroma subsampling factors: Y, H1V1, H2V1, H1V2, and H2V2.
30 // Chroma upsampling quality: H2V2 is upsampled in the frequency domain, H2V1 and H1V2 are upsampled using point sampling.
31 // Chroma upsampling reference: "Fast Scheme for Image Size Change in the Compressed Domain"
32 // http://vision.ai.uiuc.edu/~dugad/research/dct/index.html
42 #pragma warning (disable : 4611) // warning C4611: interaction between '_setjmp' and C++ object destruction is non-portable
43 #define JPGD_NORETURN __declspec(noreturn)
44 #elif defined(__GNUC__)
45 #define JPGD_NORETURN __attribute__ ((noreturn))
50 /************************************************************************/
51 /* Internal Class Implementation */
52 /************************************************************************/
55 // Set to 1 to enable freq. domain chroma upsampling on images using H2V2 subsampling (0=faster nearest neighbor sampling).
56 // This is slower, but results in higher quality on images with highly saturated colors.
57 #define JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING 1
59 #define JPGD_ASSERT(x)
60 #define JPGD_MAX(a,b) (((a)>(b)) ? (a) : (b))
61 #define JPGD_MIN(a,b) (((a)<(b)) ? (a) : (b))
63 typedef int16_t jpgd_quant_t;
64 typedef int16_t jpgd_block_t;
66 // Success/failure error codes.
69 JPGD_SUCCESS = 0, JPGD_FAILED = -1, JPGD_DONE = 1,
70 JPGD_BAD_DHT_COUNTS = -256, JPGD_BAD_DHT_INDEX, JPGD_BAD_DHT_MARKER, JPGD_BAD_DQT_MARKER, JPGD_BAD_DQT_TABLE,
71 JPGD_BAD_PRECISION, JPGD_BAD_HEIGHT, JPGD_BAD_WIDTH, JPGD_TOO_MANY_COMPONENTS,
72 JPGD_BAD_SOF_LENGTH, JPGD_BAD_VARIABLE_MARKER, JPGD_BAD_DRI_LENGTH, JPGD_BAD_SOS_LENGTH,
73 JPGD_BAD_SOS_COMP_ID, JPGD_W_EXTRA_BYTES_BEFORE_MARKER, JPGD_NO_ARITHMITIC_SUPPORT, JPGD_UNEXPECTED_MARKER,
74 JPGD_NOT_JPEG, JPGD_UNSUPPORTED_MARKER, JPGD_BAD_DQT_LENGTH, JPGD_TOO_MANY_BLOCKS,
75 JPGD_UNDEFINED_QUANT_TABLE, JPGD_UNDEFINED_HUFF_TABLE, JPGD_NOT_SINGLE_SCAN, JPGD_UNSUPPORTED_COLORSPACE,
76 JPGD_UNSUPPORTED_SAMP_FACTORS, JPGD_DECODE_ERROR, JPGD_BAD_RESTART_MARKER, JPGD_ASSERTION_ERROR,
77 JPGD_BAD_SOS_SPECTRAL, JPGD_BAD_SOS_SUCCESSIVE, JPGD_STREAM_READ, JPGD_NOTENOUGHMEM
82 JPGD_IN_BUF_SIZE = 8192, JPGD_MAX_BLOCKS_PER_MCU = 10, JPGD_MAX_HUFF_TABLES = 8, JPGD_MAX_QUANT_TABLES = 4,
83 JPGD_MAX_COMPONENTS = 4, JPGD_MAX_COMPS_IN_SCAN = 4, JPGD_MAX_BLOCKS_PER_ROW = 8192, JPGD_MAX_HEIGHT = 16384, JPGD_MAX_WIDTH = 16384
86 // Input stream interface.
87 // Derive from this class to read input data from sources other than files or memory. Set m_eof_flag to true when no more data is available.
88 // The decoder is rather greedy: it will keep on calling this method until its internal input buffer is full, or until the EOF flag is set.
89 // It the input stream contains data after the JPEG stream's EOI (end of image) marker it will probably be pulled into the internal buffer.
90 // Call the get_total_bytes_read() method to determine the actual size of the JPEG stream after successful decoding.
91 struct jpeg_decoder_stream
93 jpeg_decoder_stream() { }
94 virtual ~jpeg_decoder_stream() { }
96 // The read() method is called when the internal input buffer is empty.
98 // pBuf - input buffer
99 // max_bytes_to_read - maximum bytes that can be written to pBuf
100 // pEOF_flag - set this to true if at end of stream (no more bytes remaining)
101 // Returns -1 on error, otherwise return the number of bytes actually written to the buffer (which may be 0).
102 // Notes: This method will be called in a loop until you set *pEOF_flag to true or the internal buffer is full.
103 virtual int read(uint8_t *pBuf, int max_bytes_to_read, bool *pEOF_flag) = 0;
107 // stdio FILE stream class.
108 class jpeg_decoder_file_stream : public jpeg_decoder_stream
110 jpeg_decoder_file_stream(const jpeg_decoder_file_stream &);
111 jpeg_decoder_file_stream &operator =(const jpeg_decoder_file_stream &);
113 FILE *m_pFile = nullptr;
114 bool m_eof_flag = false;
115 bool m_error_flag = false;
118 jpeg_decoder_file_stream() {}
119 virtual ~jpeg_decoder_file_stream();
120 bool open(const char *Pfilename);
122 virtual int read(uint8_t *pBuf, int max_bytes_to_read, bool *pEOF_flag);
126 // Memory stream class.
127 class jpeg_decoder_mem_stream : public jpeg_decoder_stream
129 const uint8_t *m_pSrc_data;
130 uint32_t m_ofs, m_size;
133 jpeg_decoder_mem_stream() : m_pSrc_data(nullptr), m_ofs(0), m_size(0) {}
134 jpeg_decoder_mem_stream(const uint8_t *pSrc_data, uint32_t size) : m_pSrc_data(pSrc_data), m_ofs(0), m_size(size) {}
135 virtual ~jpeg_decoder_mem_stream() {}
136 bool open(const uint8_t *pSrc_data, uint32_t size);
137 void close() { m_pSrc_data = nullptr; m_ofs = 0; m_size = 0; }
138 virtual int read(uint8_t *pBuf, int max_bytes_to_read, bool *pEOF_flag);
145 // Call get_error_code() after constructing to determine if the stream is valid or not. You may call the get_width(), get_height(), etc.
146 // methods after the constructor is called. You may then either destruct the object, or begin decoding the image by calling begin_decoding(), then decode() on each scanline.
147 jpeg_decoder(jpeg_decoder_stream *pStream);
150 // Call this method after constructing the object to begin decompression.
151 // If JPGD_SUCCESS is returned you may then call decode() on each scanline.
152 int begin_decoding();
153 // Returns the next scan line.
154 // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (get_bytes_per_pixel() will return 1).
155 // Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and get_bytes_per_pixel() will return 4).
156 // Returns JPGD_SUCCESS if a scan line has been returned.
157 // Returns JPGD_DONE if all scan lines have been returned.
158 // Returns JPGD_FAILED if an error occurred. Call get_error_code() for a more info.
159 int decode(const void** pScan_line, uint32_t* pScan_line_len);
160 inline jpgd_status get_error_code() const { return m_error_code; }
161 inline int get_width() const { return m_image_x_size; }
162 inline int get_height() const { return m_image_y_size; }
163 inline int get_num_components() const { return m_comps_in_frame; }
164 inline int get_bytes_per_pixel() const { return m_dest_bytes_per_pixel; }
165 inline int get_bytes_per_scan_line() const { return m_image_x_size * get_bytes_per_pixel(); }
166 // Returns the total number of bytes actually consumed by the decoder (which should equal the actual size of the JPEG file).
167 inline int get_total_bytes_read() const { return m_total_bytes_read; }
170 jpeg_decoder(const jpeg_decoder &);
171 jpeg_decoder &operator =(const jpeg_decoder &);
173 typedef void (*pDecode_block_func)(jpeg_decoder *, int, int, int);
178 uint32_t look_up[256];
179 uint32_t look_up2[256];
180 uint8_t code_size[256];
187 int block_num_x, block_num_y;
188 int block_len_x, block_len_y;
201 mem_block *m_pMem_blocks;
204 jpeg_decoder_stream *m_pStream;
205 int m_progressive_flag;
206 uint8_t m_huff_ac[JPGD_MAX_HUFF_TABLES];
207 uint8_t* m_huff_num[JPGD_MAX_HUFF_TABLES]; // pointer to number of Huffman codes per bit size
208 uint8_t* m_huff_val[JPGD_MAX_HUFF_TABLES]; // pointer to Huffman codes per bit size
209 jpgd_quant_t* m_quant[JPGD_MAX_QUANT_TABLES]; // pointer to quantization tables
210 int m_scan_type; // Gray, Yh1v1, Yh1v2, Yh2v1, Yh2v2 (CMYK111, CMYK4114 no longer supported)
211 int m_comps_in_frame; // # of components in frame
212 int m_comp_h_samp[JPGD_MAX_COMPONENTS]; // component's horizontal sampling factor
213 int m_comp_v_samp[JPGD_MAX_COMPONENTS]; // component's vertical sampling factor
214 int m_comp_quant[JPGD_MAX_COMPONENTS]; // component's quantization table selector
215 int m_comp_ident[JPGD_MAX_COMPONENTS]; // component's ID
216 int m_comp_h_blocks[JPGD_MAX_COMPONENTS];
217 int m_comp_v_blocks[JPGD_MAX_COMPONENTS];
218 int m_comps_in_scan; // # of components in scan
219 int m_comp_list[JPGD_MAX_COMPS_IN_SCAN]; // components in this scan
220 int m_comp_dc_tab[JPGD_MAX_COMPONENTS]; // component's DC Huffman coding table selector
221 int m_comp_ac_tab[JPGD_MAX_COMPONENTS]; // component's AC Huffman coding table selector
222 int m_spectral_start; // spectral selection start
223 int m_spectral_end; // spectral selection end
224 int m_successive_low; // successive approximation low
225 int m_successive_high; // successive approximation high
226 int m_max_mcu_x_size; // MCU's max. X size in pixels
227 int m_max_mcu_y_size; // MCU's max. Y size in pixels
228 int m_blocks_per_mcu;
229 int m_max_blocks_per_row;
230 int m_mcus_per_row, m_mcus_per_col;
231 int m_mcu_org[JPGD_MAX_BLOCKS_PER_MCU];
232 int m_total_lines_left; // total # lines left in image
233 int m_mcu_lines_left; // total # lines left in this MCU
234 int m_real_dest_bytes_per_scan_line;
235 int m_dest_bytes_per_scan_line; // rounded up
236 int m_dest_bytes_per_pixel; // 4 (RGB) or 1 (Y)
237 huff_tables* m_pHuff_tabs[JPGD_MAX_HUFF_TABLES];
238 coeff_buf* m_dc_coeffs[JPGD_MAX_COMPONENTS];
239 coeff_buf* m_ac_coeffs[JPGD_MAX_COMPONENTS];
241 int m_block_y_mcu[JPGD_MAX_COMPONENTS];
242 uint8_t* m_pIn_buf_ofs;
246 uint8_t m_in_buf_pad_start[128];
247 uint8_t m_in_buf[JPGD_IN_BUF_SIZE + 128];
248 uint8_t m_in_buf_pad_end[128];
251 int m_restart_interval;
253 int m_next_restart_num;
254 int m_max_mcus_per_row;
255 int m_max_blocks_per_mcu;
256 int m_expanded_blocks_per_mcu;
257 int m_expanded_blocks_per_row;
258 int m_expanded_blocks_per_component;
259 bool m_freq_domain_chroma_upsample;
260 int m_max_mcus_per_col;
261 uint32_t m_last_dc_val[JPGD_MAX_COMPONENTS];
262 jpgd_block_t* m_pMCU_coefficients;
263 int m_mcu_block_max_zag[JPGD_MAX_BLOCKS_PER_MCU];
264 uint8_t* m_pSample_buf;
269 uint8_t* m_pScan_line_0;
270 uint8_t* m_pScan_line_1;
271 jpgd_status m_error_code;
273 int m_total_bytes_read;
275 void free_all_blocks();
276 JPGD_NORETURN void stop_decoding(jpgd_status status);
277 void *alloc(size_t n, bool zero = false);
278 void word_clear(void *p, uint16_t c, uint32_t n);
279 void prep_in_buffer();
280 void read_dht_marker();
281 void read_dqt_marker();
282 void read_sof_marker();
283 void skip_variable_marker();
284 void read_dri_marker();
285 void read_sos_marker();
287 int process_markers();
288 void locate_soi_marker();
289 void locate_sof_marker();
290 int locate_sos_marker();
291 void init(jpeg_decoder_stream * pStream);
292 void create_look_ups();
293 void fix_in_buffer();
294 void transform_mcu(int mcu_row);
295 void transform_mcu_expand(int mcu_row);
296 coeff_buf* coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y);
297 inline jpgd_block_t *coeff_buf_getp(coeff_buf *cb, int block_x, int block_y);
298 void load_next_row();
299 void decode_next_row();
300 void make_huff_table(int index, huff_tables *pH);
301 void check_quant_tables();
302 void check_huff_tables();
303 void calc_mcu_block_order();
306 void process_restart();
307 void decode_scan(pDecode_block_func decode_block_func);
308 void init_progressive();
309 void init_sequential();
311 void decode_init(jpeg_decoder_stream * pStream);
317 void expanded_convert();
319 inline uint32_t get_char();
320 inline uint32_t get_char(bool *pPadding_flag);
321 inline void stuff_char(uint8_t q);
322 inline uint8_t get_octet();
323 inline uint32_t get_bits(int num_bits);
324 inline uint32_t get_bits_no_markers(int numbits);
325 inline int huff_decode(huff_tables *pH);
326 inline int huff_decode(huff_tables *pH, int& extrabits);
327 static inline uint8_t clamp(int i);
328 static void decode_block_dc_first(jpeg_decoder *pD, int component_id, int block_x, int block_y);
329 static void decode_block_dc_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y);
330 static void decode_block_ac_first(jpeg_decoder *pD, int component_id, int block_x, int block_y);
331 static void decode_block_ac_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y);
335 // DCT coefficients are stored in this sequence.
336 static int g_ZAG[64] = { 0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 };
340 M_SOF0 = 0xC0, M_SOF1 = 0xC1, M_SOF2 = 0xC2, M_SOF3 = 0xC3, M_SOF5 = 0xC5, M_SOF6 = 0xC6, M_SOF7 = 0xC7, M_JPG = 0xC8,
341 M_SOF9 = 0xC9, M_SOF10 = 0xCA, M_SOF11 = 0xCB, M_SOF13 = 0xCD, M_SOF14 = 0xCE, M_SOF15 = 0xCF, M_DHT = 0xC4, M_DAC = 0xCC,
342 M_RST0 = 0xD0, M_RST1 = 0xD1, M_RST2 = 0xD2, M_RST3 = 0xD3, M_RST4 = 0xD4, M_RST5 = 0xD5, M_RST6 = 0xD6, M_RST7 = 0xD7,
343 M_SOI = 0xD8, M_EOI = 0xD9, M_SOS = 0xDA, M_DQT = 0xDB, M_DNL = 0xDC, M_DRI = 0xDD, M_DHP = 0xDE, M_EXP = 0xDF,
344 M_APP0 = 0xE0, M_APP15 = 0xEF, M_JPG0 = 0xF0, M_JPG13 = 0xFD, M_COM = 0xFE, M_TEM = 0x01, M_ERROR = 0x100, RST0 = 0xD0
347 enum JPEG_SUBSAMPLING { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 };
349 #define CONST_BITS 13
351 #define SCALEDONE ((int32_t)1)
352 #define DESCALE(x,n) (((x) + (SCALEDONE << ((n)-1))) >> (n))
353 #define DESCALE_ZEROSHIFT(x,n) (((x) + (128 << (n)) + (SCALEDONE << ((n)-1))) >> (n))
354 #define MULTIPLY(var, cnst) ((var) * (cnst))
355 #define CLAMP(i) ((static_cast<uint32_t>(i) > 255) ? (((~i) >> 31) & 0xFF) : (i))
357 #define FIX_0_298631336 ((int32_t)2446) /* FIX(0.298631336) */
358 #define FIX_0_390180644 ((int32_t)3196) /* FIX(0.390180644) */
359 #define FIX_0_541196100 ((int32_t)4433) /* FIX(0.541196100) */
360 #define FIX_0_765366865 ((int32_t)6270) /* FIX(0.765366865) */
361 #define FIX_0_899976223 ((int32_t)7373) /* FIX(0.899976223) */
362 #define FIX_1_175875602 ((int32_t)9633) /* FIX(1.175875602) */
363 #define FIX_1_501321110 ((int32_t)12299) /* FIX(1.501321110) */
364 #define FIX_1_847759065 ((int32_t)15137) /* FIX(1.847759065) */
365 #define FIX_1_961570560 ((int32_t)16069) /* FIX(1.961570560) */
366 #define FIX_2_053119869 ((int32_t)16819) /* FIX(2.053119869) */
367 #define FIX_2_562915447 ((int32_t)20995) /* FIX(2.562915447) */
368 #define FIX_3_072711026 ((int32_t)25172) /* FIX(3.072711026) */
371 // Compiler creates a fast path 1D IDCT for X non-zero columns
372 template <int NONZERO_COLS>
375 static void idct(int* pTemp, const jpgd_block_t* pSrc)
377 // ACCESS_COL() will be optimized at compile time to either an array access, or 0.
378 #define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)pSrc[x] : 0)
380 const int z2 = ACCESS_COL(2), z3 = ACCESS_COL(6);
381 const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
382 const int tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
383 const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
385 const int tmp0 = static_cast<unsigned int>(ACCESS_COL(0) + ACCESS_COL(4)) << CONST_BITS;
386 const int tmp1 = static_cast<unsigned int>(ACCESS_COL(0) - ACCESS_COL(4)) << CONST_BITS;
388 const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
390 const int atmp0 = ACCESS_COL(7), atmp1 = ACCESS_COL(5), atmp2 = ACCESS_COL(3), atmp3 = ACCESS_COL(1);
392 const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
393 const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602);
395 const int az1 = MULTIPLY(bz1, - FIX_0_899976223);
396 const int az2 = MULTIPLY(bz2, - FIX_2_562915447);
397 const int az3 = MULTIPLY(bz3, - FIX_1_961570560) + bz5;
398 const int az4 = MULTIPLY(bz4, - FIX_0_390180644) + bz5;
400 const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3;
401 const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4;
402 const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3;
403 const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4;
405 pTemp[0] = DESCALE(tmp10 + btmp3, CONST_BITS-PASS1_BITS);
406 pTemp[7] = DESCALE(tmp10 - btmp3, CONST_BITS-PASS1_BITS);
407 pTemp[1] = DESCALE(tmp11 + btmp2, CONST_BITS-PASS1_BITS);
408 pTemp[6] = DESCALE(tmp11 - btmp2, CONST_BITS-PASS1_BITS);
409 pTemp[2] = DESCALE(tmp12 + btmp1, CONST_BITS-PASS1_BITS);
410 pTemp[5] = DESCALE(tmp12 - btmp1, CONST_BITS-PASS1_BITS);
411 pTemp[3] = DESCALE(tmp13 + btmp0, CONST_BITS-PASS1_BITS);
412 pTemp[4] = DESCALE(tmp13 - btmp0, CONST_BITS-PASS1_BITS);
420 static void idct(int* pTemp, const jpgd_block_t* pSrc)
432 static void idct(int* pTemp, const jpgd_block_t* pSrc)
434 const int dcval = (pSrc[0] << PASS1_BITS);
448 // Compiler creates a fast path 1D IDCT for X non-zero rows
449 template <int NONZERO_ROWS>
452 static void idct(uint8_t* pDst_ptr, const int* pTemp)
454 // ACCESS_ROW() will be optimized at compile time to either an array access, or 0.
455 #define ACCESS_ROW(x) (((x) < NONZERO_ROWS) ? pTemp[x * 8] : 0)
457 const int z2 = ACCESS_ROW(2);
458 const int z3 = ACCESS_ROW(6);
460 const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
461 const int tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
462 const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
464 const int tmp0 = static_cast<unsigned int>(ACCESS_ROW(0) + ACCESS_ROW(4)) << CONST_BITS;
465 const int tmp1 = static_cast<unsigned int>(ACCESS_ROW(0) - ACCESS_ROW(4)) << CONST_BITS;
467 const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
469 const int atmp0 = ACCESS_ROW(7), atmp1 = ACCESS_ROW(5), atmp2 = ACCESS_ROW(3), atmp3 = ACCESS_ROW(1);
471 const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
472 const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602);
474 const int az1 = MULTIPLY(bz1, - FIX_0_899976223);
475 const int az2 = MULTIPLY(bz2, - FIX_2_562915447);
476 const int az3 = MULTIPLY(bz3, - FIX_1_961570560) + bz5;
477 const int az4 = MULTIPLY(bz4, - FIX_0_390180644) + bz5;
479 const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3;
480 const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4;
481 const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3;
482 const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4;
484 int i = DESCALE_ZEROSHIFT(tmp10 + btmp3, CONST_BITS+PASS1_BITS+3);
485 pDst_ptr[8*0] = (uint8_t)CLAMP(i);
487 i = DESCALE_ZEROSHIFT(tmp10 - btmp3, CONST_BITS+PASS1_BITS+3);
488 pDst_ptr[8*7] = (uint8_t)CLAMP(i);
490 i = DESCALE_ZEROSHIFT(tmp11 + btmp2, CONST_BITS+PASS1_BITS+3);
491 pDst_ptr[8*1] = (uint8_t)CLAMP(i);
493 i = DESCALE_ZEROSHIFT(tmp11 - btmp2, CONST_BITS+PASS1_BITS+3);
494 pDst_ptr[8*6] = (uint8_t)CLAMP(i);
496 i = DESCALE_ZEROSHIFT(tmp12 + btmp1, CONST_BITS+PASS1_BITS+3);
497 pDst_ptr[8*2] = (uint8_t)CLAMP(i);
499 i = DESCALE_ZEROSHIFT(tmp12 - btmp1, CONST_BITS+PASS1_BITS+3);
500 pDst_ptr[8*5] = (uint8_t)CLAMP(i);
502 i = DESCALE_ZEROSHIFT(tmp13 + btmp0, CONST_BITS+PASS1_BITS+3);
503 pDst_ptr[8*3] = (uint8_t)CLAMP(i);
505 i = DESCALE_ZEROSHIFT(tmp13 - btmp0, CONST_BITS+PASS1_BITS+3);
506 pDst_ptr[8*4] = (uint8_t)CLAMP(i);
514 static void idct(uint8_t* pDst_ptr, const int* pTemp)
516 int dcval = DESCALE_ZEROSHIFT(pTemp[0], PASS1_BITS+3);
517 const uint8_t dcval_clamped = (uint8_t)CLAMP(dcval);
518 pDst_ptr[0*8] = dcval_clamped;
519 pDst_ptr[1*8] = dcval_clamped;
520 pDst_ptr[2*8] = dcval_clamped;
521 pDst_ptr[3*8] = dcval_clamped;
522 pDst_ptr[4*8] = dcval_clamped;
523 pDst_ptr[5*8] = dcval_clamped;
524 pDst_ptr[6*8] = dcval_clamped;
525 pDst_ptr[7*8] = dcval_clamped;
530 static const uint8_t s_idct_row_table[] = {
531 1,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, 2,1,0,0,0,0,0,0, 2,1,1,0,0,0,0,0, 2,2,1,0,0,0,0,0, 3,2,1,0,0,0,0,0, 4,2,1,0,0,0,0,0, 4,3,1,0,0,0,0,0,
532 4,3,2,0,0,0,0,0, 4,3,2,1,0,0,0,0, 4,3,2,1,1,0,0,0, 4,3,2,2,1,0,0,0, 4,3,3,2,1,0,0,0, 4,4,3,2,1,0,0,0, 5,4,3,2,1,0,0,0, 6,4,3,2,1,0,0,0,
533 6,5,3,2,1,0,0,0, 6,5,4,2,1,0,0,0, 6,5,4,3,1,0,0,0, 6,5,4,3,2,0,0,0, 6,5,4,3,2,1,0,0, 6,5,4,3,2,1,1,0, 6,5,4,3,2,2,1,0, 6,5,4,3,3,2,1,0,
534 6,5,4,4,3,2,1,0, 6,5,5,4,3,2,1,0, 6,6,5,4,3,2,1,0, 7,6,5,4,3,2,1,0, 8,6,5,4,3,2,1,0, 8,7,5,4,3,2,1,0, 8,7,6,4,3,2,1,0, 8,7,6,5,3,2,1,0,
535 8,7,6,5,4,2,1,0, 8,7,6,5,4,3,1,0, 8,7,6,5,4,3,2,0, 8,7,6,5,4,3,2,1, 8,7,6,5,4,3,2,2, 8,7,6,5,4,3,3,2, 8,7,6,5,4,4,3,2, 8,7,6,5,5,4,3,2,
536 8,7,6,6,5,4,3,2, 8,7,7,6,5,4,3,2, 8,8,7,6,5,4,3,2, 8,8,8,6,5,4,3,2, 8,8,8,7,5,4,3,2, 8,8,8,7,6,4,3,2, 8,8,8,7,6,5,3,2, 8,8,8,7,6,5,4,2,
537 8,8,8,7,6,5,4,3, 8,8,8,7,6,5,4,4, 8,8,8,7,6,5,5,4, 8,8,8,7,6,6,5,4, 8,8,8,7,7,6,5,4, 8,8,8,8,7,6,5,4, 8,8,8,8,8,6,5,4, 8,8,8,8,8,7,5,4,
538 8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8,
542 static const uint8_t s_idct_col_table[] = { 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 };
545 void idct(const jpgd_block_t* pSrc_ptr, uint8_t* pDst_ptr, int block_max_zag)
547 JPGD_ASSERT(block_max_zag >= 1);
548 JPGD_ASSERT(block_max_zag <= 64);
550 if (block_max_zag <= 1) {
551 int k = ((pSrc_ptr[0] + 4) >> 3) + 128;
555 for (int i = 8; i > 0; i--) {
556 *(int*)&pDst_ptr[0] = k;
557 *(int*)&pDst_ptr[4] = k;
564 const jpgd_block_t* pSrc = pSrc_ptr;
566 const uint8_t* pRow_tab = &s_idct_row_table[(block_max_zag - 1) * 8];
568 for (i = 8; i > 0; i--, pRow_tab++) {
570 case 0: Row<0>::idct(pTemp, pSrc); break;
571 case 1: Row<1>::idct(pTemp, pSrc); break;
572 case 2: Row<2>::idct(pTemp, pSrc); break;
573 case 3: Row<3>::idct(pTemp, pSrc); break;
574 case 4: Row<4>::idct(pTemp, pSrc); break;
575 case 5: Row<5>::idct(pTemp, pSrc); break;
576 case 6: Row<6>::idct(pTemp, pSrc); break;
577 case 7: Row<7>::idct(pTemp, pSrc); break;
578 case 8: Row<8>::idct(pTemp, pSrc); break;
586 const int nonzero_rows = s_idct_col_table[block_max_zag - 1];
587 for (i = 8; i > 0; i--) {
588 switch (nonzero_rows) {
589 case 1: Col<1>::idct(pDst_ptr, pTemp); break;
590 case 2: Col<2>::idct(pDst_ptr, pTemp); break;
591 case 3: Col<3>::idct(pDst_ptr, pTemp); break;
592 case 4: Col<4>::idct(pDst_ptr, pTemp); break;
593 case 5: Col<5>::idct(pDst_ptr, pTemp); break;
594 case 6: Col<6>::idct(pDst_ptr, pTemp); break;
595 case 7: Col<7>::idct(pDst_ptr, pTemp); break;
596 case 8: Col<8>::idct(pDst_ptr, pTemp); break;
604 void idct_4x4(const jpgd_block_t* pSrc_ptr, uint8_t* pDst_ptr)
608 const jpgd_block_t* pSrc = pSrc_ptr;
610 for (int i = 4; i > 0; i--) {
611 Row<4>::idct(pTemp, pSrc);
618 for (int i = 8; i > 0; i--) {
619 Col<4>::idct(pDst_ptr, pTemp);
626 // Retrieve one character from the input stream.
627 inline uint32_t jpeg_decoder::get_char()
629 // Any bytes remaining in buffer?
630 if (!m_in_buf_left) {
631 // Try to get more bytes.
633 // Still nothing to get?
634 if (!m_in_buf_left) {
635 // Pad the end of the stream with 0xFF 0xD9 (EOI marker)
642 uint32_t c = *m_pIn_buf_ofs++;
648 // Same as previous method, except can indicate if the character is a pad character or not.
649 inline uint32_t jpeg_decoder::get_char(bool *pPadding_flag)
651 if (!m_in_buf_left) {
653 if (!m_in_buf_left) {
654 *pPadding_flag = true;
661 *pPadding_flag = false;
662 uint32_t c = *m_pIn_buf_ofs++;
669 // Inserts a previously retrieved character back into the input buffer.
670 inline void jpeg_decoder::stuff_char(uint8_t q)
672 *(--m_pIn_buf_ofs) = q;
677 // Retrieves one character from the input stream, but does not read past markers. Will continue to return 0xFF when a marker is encountered.
678 inline uint8_t jpeg_decoder::get_octet()
681 int c = get_char(&padding_flag);
684 if (padding_flag) return 0xFF;
686 c = get_char(&padding_flag);
691 if (c == 0x00) return 0xFF;
693 stuff_char(static_cast<uint8_t>(c));
698 return static_cast<uint8_t>(c);
702 // Retrieves a variable number of bits from the input stream. Does not recognize markers.
703 inline uint32_t jpeg_decoder::get_bits(int num_bits)
705 if (!num_bits) return 0;
707 uint32_t i = m_bit_buf >> (32 - num_bits);
709 if ((m_bits_left -= num_bits) <= 0) {
710 m_bit_buf <<= (num_bits += m_bits_left);
711 uint32_t c1 = get_char();
712 uint32_t c2 = get_char();
713 m_bit_buf = (m_bit_buf & 0xFFFF0000) | (c1 << 8) | c2;
714 m_bit_buf <<= -m_bits_left;
716 JPGD_ASSERT(m_bits_left >= 0);
718 else m_bit_buf <<= num_bits;
724 // Retrieves a variable number of bits from the input stream. Markers will not be read into the input bit buffer. Instead, an infinite number of all 1's will be returned when a marker is encountered.
725 inline uint32_t jpeg_decoder::get_bits_no_markers(int num_bits)
727 if (!num_bits)return 0;
729 uint32_t i = m_bit_buf >> (32 - num_bits);
731 if ((m_bits_left -= num_bits) <= 0) {
732 m_bit_buf <<= (num_bits += m_bits_left);
733 if ((m_in_buf_left < 2) || (m_pIn_buf_ofs[0] == 0xFF) || (m_pIn_buf_ofs[1] == 0xFF)) {
734 uint32_t c1 = get_octet();
735 uint32_t c2 = get_octet();
736 m_bit_buf |= (c1 << 8) | c2;
738 m_bit_buf |= ((uint32_t)m_pIn_buf_ofs[0] << 8) | m_pIn_buf_ofs[1];
742 m_bit_buf <<= -m_bits_left;
744 JPGD_ASSERT(m_bits_left >= 0);
745 } else m_bit_buf <<= num_bits;
751 // Decodes a Huffman encoded symbol.
752 inline int jpeg_decoder::huff_decode(huff_tables *pH)
756 // Check first 8-bits: do we have a complete symbol?
757 if ((symbol = pH->look_up[m_bit_buf >> 24]) < 0) {
758 // Decode more bits, use a tree traversal to find symbol.
761 symbol = pH->tree[-(int)(symbol + ((m_bit_buf >> ofs) & 1))];
763 } while (symbol < 0);
764 get_bits_no_markers(8 + (23 - ofs));
765 } else get_bits_no_markers(pH->code_size[symbol]);
771 // Decodes a Huffman encoded symbol.
772 inline int jpeg_decoder::huff_decode(huff_tables *pH, int& extra_bits)
776 // Check first 8-bits: do we have a complete symbol?
777 if ((symbol = pH->look_up2[m_bit_buf >> 24]) < 0) {
778 // Use a tree traversal to find symbol.
781 symbol = pH->tree[-(int)(symbol + ((m_bit_buf >> ofs) & 1))];
783 } while (symbol < 0);
785 get_bits_no_markers(8 + (23 - ofs));
786 extra_bits = get_bits_no_markers(symbol & 0xF);
788 JPGD_ASSERT(((symbol >> 8) & 31) == pH->code_size[symbol & 255] + ((symbol & 0x8000) ? (symbol & 15) : 0));
790 if (symbol & 0x8000) {
791 get_bits_no_markers((symbol >> 8) & 31);
792 extra_bits = symbol >> 16;
794 int code_size = (symbol >> 8) & 31;
795 int num_extra_bits = symbol & 0xF;
796 int bits = code_size + num_extra_bits;
797 if (bits <= (m_bits_left + 16)) extra_bits = get_bits_no_markers(bits) & ((1 << num_extra_bits) - 1);
799 get_bits_no_markers(code_size);
800 extra_bits = get_bits_no_markers(num_extra_bits);
809 // Tables and macro used to fully decode the DPCM differences.
810 static const int s_extend_test[16] = { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 };
811 static const unsigned int s_extend_offset[16] = { 0, ((~0u)<<1) + 1, ((~0u)<<2) + 1, ((~0u)<<3) + 1, ((~0u)<<4) + 1, ((~0u)<<5) + 1, ((~0u)<<6) + 1, ((~0u)<<7) + 1, ((~0u)<<8) + 1, ((~0u)<<9) + 1, ((~0u)<<10) + 1, ((~0u)<<11) + 1, ((~0u)<<12) + 1, ((~0u)<<13) + 1, ((~0u)<<14) + 1, ((~0u)<<15) + 1 };
813 // The logical AND's in this macro are to shut up static code analysis (aren't really necessary - couldn't find another way to do this)
814 #define JPGD_HUFF_EXTEND(x, s) (((x) < s_extend_test[s & 15]) ? ((x) + s_extend_offset[s & 15]) : (x))
817 // Clamps a value between 0-255.
818 inline uint8_t jpeg_decoder::clamp(int i)
820 if (static_cast<uint32_t>(i) > 255) i = (((~i) >> 31) & 0xFF);
821 return static_cast<uint8_t>(i);
825 namespace DCT_Upsample
829 typedef int Element_Type;
830 enum { NUM_ROWS = 4, NUM_COLS = 4 };
832 Element_Type v[NUM_ROWS][NUM_COLS];
834 inline int rows() const { return NUM_ROWS; }
835 inline int cols() const { return NUM_COLS; }
836 inline const Element_Type & at(int r, int c) const { return v[r][c]; }
837 inline Element_Type & at(int r, int c) { return v[r][c]; }
841 inline Matrix44& operator += (const Matrix44& a)
843 for (int r = 0; r < NUM_ROWS; r++) {
844 at(r, 0) += a.at(r, 0);
845 at(r, 1) += a.at(r, 1);
846 at(r, 2) += a.at(r, 2);
847 at(r, 3) += a.at(r, 3);
852 inline Matrix44& operator -= (const Matrix44& a)
854 for (int r = 0; r < NUM_ROWS; r++) {
855 at(r, 0) -= a.at(r, 0);
856 at(r, 1) -= a.at(r, 1);
857 at(r, 2) -= a.at(r, 2);
858 at(r, 3) -= a.at(r, 3);
863 friend inline Matrix44 operator + (const Matrix44& a, const Matrix44& b)
866 for (int r = 0; r < NUM_ROWS; r++) {
867 ret.at(r, 0) = a.at(r, 0) + b.at(r, 0);
868 ret.at(r, 1) = a.at(r, 1) + b.at(r, 1);
869 ret.at(r, 2) = a.at(r, 2) + b.at(r, 2);
870 ret.at(r, 3) = a.at(r, 3) + b.at(r, 3);
875 friend inline Matrix44 operator - (const Matrix44& a, const Matrix44& b)
878 for (int r = 0; r < NUM_ROWS; r++) {
879 ret.at(r, 0) = a.at(r, 0) - b.at(r, 0);
880 ret.at(r, 1) = a.at(r, 1) - b.at(r, 1);
881 ret.at(r, 2) = a.at(r, 2) - b.at(r, 2);
882 ret.at(r, 3) = a.at(r, 3) - b.at(r, 3);
887 static inline void add_and_store(jpgd_block_t* pDst, const Matrix44& a, const Matrix44& b)
889 for (int r = 0; r < 4; r++) {
890 pDst[0*8 + r] = static_cast<jpgd_block_t>(a.at(r, 0) + b.at(r, 0));
891 pDst[1*8 + r] = static_cast<jpgd_block_t>(a.at(r, 1) + b.at(r, 1));
892 pDst[2*8 + r] = static_cast<jpgd_block_t>(a.at(r, 2) + b.at(r, 2));
893 pDst[3*8 + r] = static_cast<jpgd_block_t>(a.at(r, 3) + b.at(r, 3));
897 static inline void sub_and_store(jpgd_block_t* pDst, const Matrix44& a, const Matrix44& b)
899 for (int r = 0; r < 4; r++) {
900 pDst[0*8 + r] = static_cast<jpgd_block_t>(a.at(r, 0) - b.at(r, 0));
901 pDst[1*8 + r] = static_cast<jpgd_block_t>(a.at(r, 1) - b.at(r, 1));
902 pDst[2*8 + r] = static_cast<jpgd_block_t>(a.at(r, 2) - b.at(r, 2));
903 pDst[3*8 + r] = static_cast<jpgd_block_t>(a.at(r, 3) - b.at(r, 3));
908 const int FRACT_BITS = 10;
909 const int SCALE = 1 << FRACT_BITS;
911 typedef int Temp_Type;
912 #define D(i) (((i) + (SCALE >> 1)) >> FRACT_BITS)
913 #define F(i) ((int)((i) * SCALE + .5f))
915 // Any decent C++ compiler will optimize this at compile time to a 0, or an array access.
916 #define AT(c, r) ((((c)>=NUM_COLS)||((r)>=NUM_ROWS)) ? 0 : pSrc[(c)+(r)*8])
918 // NUM_ROWS/NUM_COLS = # of non-zero rows/cols in input matrix
919 template<int NUM_ROWS, int NUM_COLS>
922 static void calc(Matrix44& P, Matrix44& Q, const jpgd_block_t* pSrc)
924 // 4x8 = 4x8 times 8x8, matrix 0 is constant
925 const Temp_Type X000 = AT(0, 0);
926 const Temp_Type X001 = AT(0, 1);
927 const Temp_Type X002 = AT(0, 2);
928 const Temp_Type X003 = AT(0, 3);
929 const Temp_Type X004 = AT(0, 4);
930 const Temp_Type X005 = AT(0, 5);
931 const Temp_Type X006 = AT(0, 6);
932 const Temp_Type X007 = AT(0, 7);
933 const Temp_Type X010 = D(F(0.415735f) * AT(1, 0) + F(0.791065f) * AT(3, 0) + F(-0.352443f) * AT(5, 0) + F(0.277785f) * AT(7, 0));
934 const Temp_Type X011 = D(F(0.415735f) * AT(1, 1) + F(0.791065f) * AT(3, 1) + F(-0.352443f) * AT(5, 1) + F(0.277785f) * AT(7, 1));
935 const Temp_Type X012 = D(F(0.415735f) * AT(1, 2) + F(0.791065f) * AT(3, 2) + F(-0.352443f) * AT(5, 2) + F(0.277785f) * AT(7, 2));
936 const Temp_Type X013 = D(F(0.415735f) * AT(1, 3) + F(0.791065f) * AT(3, 3) + F(-0.352443f) * AT(5, 3) + F(0.277785f) * AT(7, 3));
937 const Temp_Type X014 = D(F(0.415735f) * AT(1, 4) + F(0.791065f) * AT(3, 4) + F(-0.352443f) * AT(5, 4) + F(0.277785f) * AT(7, 4));
938 const Temp_Type X015 = D(F(0.415735f) * AT(1, 5) + F(0.791065f) * AT(3, 5) + F(-0.352443f) * AT(5, 5) + F(0.277785f) * AT(7, 5));
939 const Temp_Type X016 = D(F(0.415735f) * AT(1, 6) + F(0.791065f) * AT(3, 6) + F(-0.352443f) * AT(5, 6) + F(0.277785f) * AT(7, 6));
940 const Temp_Type X017 = D(F(0.415735f) * AT(1, 7) + F(0.791065f) * AT(3, 7) + F(-0.352443f) * AT(5, 7) + F(0.277785f) * AT(7, 7));
941 const Temp_Type X020 = AT(4, 0);
942 const Temp_Type X021 = AT(4, 1);
943 const Temp_Type X022 = AT(4, 2);
944 const Temp_Type X023 = AT(4, 3);
945 const Temp_Type X024 = AT(4, 4);
946 const Temp_Type X025 = AT(4, 5);
947 const Temp_Type X026 = AT(4, 6);
948 const Temp_Type X027 = AT(4, 7);
949 const Temp_Type X030 = D(F(0.022887f) * AT(1, 0) + F(-0.097545f) * AT(3, 0) + F(0.490393f) * AT(5, 0) + F(0.865723f) * AT(7, 0));
950 const Temp_Type X031 = D(F(0.022887f) * AT(1, 1) + F(-0.097545f) * AT(3, 1) + F(0.490393f) * AT(5, 1) + F(0.865723f) * AT(7, 1));
951 const Temp_Type X032 = D(F(0.022887f) * AT(1, 2) + F(-0.097545f) * AT(3, 2) + F(0.490393f) * AT(5, 2) + F(0.865723f) * AT(7, 2));
952 const Temp_Type X033 = D(F(0.022887f) * AT(1, 3) + F(-0.097545f) * AT(3, 3) + F(0.490393f) * AT(5, 3) + F(0.865723f) * AT(7, 3));
953 const Temp_Type X034 = D(F(0.022887f) * AT(1, 4) + F(-0.097545f) * AT(3, 4) + F(0.490393f) * AT(5, 4) + F(0.865723f) * AT(7, 4));
954 const Temp_Type X035 = D(F(0.022887f) * AT(1, 5) + F(-0.097545f) * AT(3, 5) + F(0.490393f) * AT(5, 5) + F(0.865723f) * AT(7, 5));
955 const Temp_Type X036 = D(F(0.022887f) * AT(1, 6) + F(-0.097545f) * AT(3, 6) + F(0.490393f) * AT(5, 6) + F(0.865723f) * AT(7, 6));
956 const Temp_Type X037 = D(F(0.022887f) * AT(1, 7) + F(-0.097545f) * AT(3, 7) + F(0.490393f) * AT(5, 7) + F(0.865723f) * AT(7, 7));
958 // 4x4 = 4x8 times 8x4, matrix 1 is constant
960 P.at(0, 1) = D(X001 * F(0.415735f) + X003 * F(0.791065f) + X005 * F(-0.352443f) + X007 * F(0.277785f));
962 P.at(0, 3) = D(X001 * F(0.022887f) + X003 * F(-0.097545f) + X005 * F(0.490393f) + X007 * F(0.865723f));
964 P.at(1, 1) = D(X011 * F(0.415735f) + X013 * F(0.791065f) + X015 * F(-0.352443f) + X017 * F(0.277785f));
966 P.at(1, 3) = D(X011 * F(0.022887f) + X013 * F(-0.097545f) + X015 * F(0.490393f) + X017 * F(0.865723f));
968 P.at(2, 1) = D(X021 * F(0.415735f) + X023 * F(0.791065f) + X025 * F(-0.352443f) + X027 * F(0.277785f));
970 P.at(2, 3) = D(X021 * F(0.022887f) + X023 * F(-0.097545f) + X025 * F(0.490393f) + X027 * F(0.865723f));
972 P.at(3, 1) = D(X031 * F(0.415735f) + X033 * F(0.791065f) + X035 * F(-0.352443f) + X037 * F(0.277785f));
974 P.at(3, 3) = D(X031 * F(0.022887f) + X033 * F(-0.097545f) + X035 * F(0.490393f) + X037 * F(0.865723f));
977 // 4x4 = 4x8 times 8x4, matrix 1 is constant
978 Q.at(0, 0) = D(X001 * F(0.906127f) + X003 * F(-0.318190f) + X005 * F(0.212608f) + X007 * F(-0.180240f));
980 Q.at(0, 2) = D(X001 * F(-0.074658f) + X003 * F(0.513280f) + X005 * F(0.768178f) + X007 * F(-0.375330f));
982 Q.at(1, 0) = D(X011 * F(0.906127f) + X013 * F(-0.318190f) + X015 * F(0.212608f) + X017 * F(-0.180240f));
984 Q.at(1, 2) = D(X011 * F(-0.074658f) + X013 * F(0.513280f) + X015 * F(0.768178f) + X017 * F(-0.375330f));
986 Q.at(2, 0) = D(X021 * F(0.906127f) + X023 * F(-0.318190f) + X025 * F(0.212608f) + X027 * F(-0.180240f));
988 Q.at(2, 2) = D(X021 * F(-0.074658f) + X023 * F(0.513280f) + X025 * F(0.768178f) + X027 * F(-0.375330f));
990 Q.at(3, 0) = D(X031 * F(0.906127f) + X033 * F(-0.318190f) + X035 * F(0.212608f) + X037 * F(-0.180240f));
992 Q.at(3, 2) = D(X031 * F(-0.074658f) + X033 * F(0.513280f) + X035 * F(0.768178f) + X037 * F(-0.375330f));
999 template<int NUM_ROWS, int NUM_COLS>
1002 static void calc(Matrix44& R, Matrix44& S, const jpgd_block_t* pSrc)
1004 // 4x8 = 4x8 times 8x8, matrix 0 is constant
1005 const Temp_Type X100 = D(F(0.906127f) * AT(1, 0) + F(-0.318190f) * AT(3, 0) + F(0.212608f) * AT(5, 0) + F(-0.180240f) * AT(7, 0));
1006 const Temp_Type X101 = D(F(0.906127f) * AT(1, 1) + F(-0.318190f) * AT(3, 1) + F(0.212608f) * AT(5, 1) + F(-0.180240f) * AT(7, 1));
1007 const Temp_Type X102 = D(F(0.906127f) * AT(1, 2) + F(-0.318190f) * AT(3, 2) + F(0.212608f) * AT(5, 2) + F(-0.180240f) * AT(7, 2));
1008 const Temp_Type X103 = D(F(0.906127f) * AT(1, 3) + F(-0.318190f) * AT(3, 3) + F(0.212608f) * AT(5, 3) + F(-0.180240f) * AT(7, 3));
1009 const Temp_Type X104 = D(F(0.906127f) * AT(1, 4) + F(-0.318190f) * AT(3, 4) + F(0.212608f) * AT(5, 4) + F(-0.180240f) * AT(7, 4));
1010 const Temp_Type X105 = D(F(0.906127f) * AT(1, 5) + F(-0.318190f) * AT(3, 5) + F(0.212608f) * AT(5, 5) + F(-0.180240f) * AT(7, 5));
1011 const Temp_Type X106 = D(F(0.906127f) * AT(1, 6) + F(-0.318190f) * AT(3, 6) + F(0.212608f) * AT(5, 6) + F(-0.180240f) * AT(7, 6));
1012 const Temp_Type X107 = D(F(0.906127f) * AT(1, 7) + F(-0.318190f) * AT(3, 7) + F(0.212608f) * AT(5, 7) + F(-0.180240f) * AT(7, 7));
1013 const Temp_Type X110 = AT(2, 0);
1014 const Temp_Type X111 = AT(2, 1);
1015 const Temp_Type X112 = AT(2, 2);
1016 const Temp_Type X113 = AT(2, 3);
1017 const Temp_Type X114 = AT(2, 4);
1018 const Temp_Type X115 = AT(2, 5);
1019 const Temp_Type X116 = AT(2, 6);
1020 const Temp_Type X117 = AT(2, 7);
1021 const Temp_Type X120 = D(F(-0.074658f) * AT(1, 0) + F(0.513280f) * AT(3, 0) + F(0.768178f) * AT(5, 0) + F(-0.375330f) * AT(7, 0));
1022 const Temp_Type X121 = D(F(-0.074658f) * AT(1, 1) + F(0.513280f) * AT(3, 1) + F(0.768178f) * AT(5, 1) + F(-0.375330f) * AT(7, 1));
1023 const Temp_Type X122 = D(F(-0.074658f) * AT(1, 2) + F(0.513280f) * AT(3, 2) + F(0.768178f) * AT(5, 2) + F(-0.375330f) * AT(7, 2));
1024 const Temp_Type X123 = D(F(-0.074658f) * AT(1, 3) + F(0.513280f) * AT(3, 3) + F(0.768178f) * AT(5, 3) + F(-0.375330f) * AT(7, 3));
1025 const Temp_Type X124 = D(F(-0.074658f) * AT(1, 4) + F(0.513280f) * AT(3, 4) + F(0.768178f) * AT(5, 4) + F(-0.375330f) * AT(7, 4));
1026 const Temp_Type X125 = D(F(-0.074658f) * AT(1, 5) + F(0.513280f) * AT(3, 5) + F(0.768178f) * AT(5, 5) + F(-0.375330f) * AT(7, 5));
1027 const Temp_Type X126 = D(F(-0.074658f) * AT(1, 6) + F(0.513280f) * AT(3, 6) + F(0.768178f) * AT(5, 6) + F(-0.375330f) * AT(7, 6));
1028 const Temp_Type X127 = D(F(-0.074658f) * AT(1, 7) + F(0.513280f) * AT(3, 7) + F(0.768178f) * AT(5, 7) + F(-0.375330f) * AT(7, 7));
1029 const Temp_Type X130 = AT(6, 0);
1030 const Temp_Type X131 = AT(6, 1);
1031 const Temp_Type X132 = AT(6, 2);
1032 const Temp_Type X133 = AT(6, 3);
1033 const Temp_Type X134 = AT(6, 4);
1034 const Temp_Type X135 = AT(6, 5);
1035 const Temp_Type X136 = AT(6, 6);
1036 const Temp_Type X137 = AT(6, 7);
1039 // 4x4 = 4x8 times 8x4, matrix 1 is constant
1041 R.at(0, 1) = D(X101 * F(0.415735f) + X103 * F(0.791065f) + X105 * F(-0.352443f) + X107 * F(0.277785f));
1043 R.at(0, 3) = D(X101 * F(0.022887f) + X103 * F(-0.097545f) + X105 * F(0.490393f) + X107 * F(0.865723f));
1045 R.at(1, 1) = D(X111 * F(0.415735f) + X113 * F(0.791065f) + X115 * F(-0.352443f) + X117 * F(0.277785f));
1047 R.at(1, 3) = D(X111 * F(0.022887f) + X113 * F(-0.097545f) + X115 * F(0.490393f) + X117 * F(0.865723f));
1049 R.at(2, 1) = D(X121 * F(0.415735f) + X123 * F(0.791065f) + X125 * F(-0.352443f) + X127 * F(0.277785f));
1051 R.at(2, 3) = D(X121 * F(0.022887f) + X123 * F(-0.097545f) + X125 * F(0.490393f) + X127 * F(0.865723f));
1053 R.at(3, 1) = D(X131 * F(0.415735f) + X133 * F(0.791065f) + X135 * F(-0.352443f) + X137 * F(0.277785f));
1055 R.at(3, 3) = D(X131 * F(0.022887f) + X133 * F(-0.097545f) + X135 * F(0.490393f) + X137 * F(0.865723f));
1057 // 4x4 = 4x8 times 8x4, matrix 1 is constant
1058 S.at(0, 0) = D(X101 * F(0.906127f) + X103 * F(-0.318190f) + X105 * F(0.212608f) + X107 * F(-0.180240f));
1060 S.at(0, 2) = D(X101 * F(-0.074658f) + X103 * F(0.513280f) + X105 * F(0.768178f) + X107 * F(-0.375330f));
1062 S.at(1, 0) = D(X111 * F(0.906127f) + X113 * F(-0.318190f) + X115 * F(0.212608f) + X117 * F(-0.180240f));
1064 S.at(1, 2) = D(X111 * F(-0.074658f) + X113 * F(0.513280f) + X115 * F(0.768178f) + X117 * F(-0.375330f));
1066 S.at(2, 0) = D(X121 * F(0.906127f) + X123 * F(-0.318190f) + X125 * F(0.212608f) + X127 * F(-0.180240f));
1068 S.at(2, 2) = D(X121 * F(-0.074658f) + X123 * F(0.513280f) + X125 * F(0.768178f) + X127 * F(-0.375330f));
1070 S.at(3, 0) = D(X131 * F(0.906127f) + X133 * F(-0.318190f) + X135 * F(0.212608f) + X137 * F(-0.180240f));
1072 S.at(3, 2) = D(X131 * F(-0.074658f) + X133 * F(0.513280f) + X135 * F(0.768178f) + X137 * F(-0.375330f));
1077 } // end namespace DCT_Upsample
1080 // Unconditionally frees all allocated m_blocks.
1081 void jpeg_decoder::free_all_blocks()
1084 m_pStream = nullptr;
1086 for (mem_block *b = m_pMem_blocks; b; ) {
1087 mem_block *n = b->m_pNext;
1091 m_pMem_blocks = nullptr;
1095 // This method handles all errors. It will never return.
1096 // It could easily be changed to use C++ exceptions.
1097 JPGD_NORETURN void jpeg_decoder::stop_decoding(jpgd_status status)
1099 m_error_code = status;
1101 longjmp(m_jmp_state, status);
1105 void *jpeg_decoder::alloc(size_t nSize, bool zero)
1107 nSize = (JPGD_MAX(nSize, 1) + 3) & ~3;
1109 for (mem_block *b = m_pMem_blocks; b; b = b->m_pNext) {
1110 if ((b->m_used_count + nSize) <= b->m_size) {
1111 rv = b->m_data + b->m_used_count;
1112 b->m_used_count += nSize;
1117 int capacity = JPGD_MAX(32768 - 256, (nSize + 2047) & ~2047);
1118 mem_block *b = (mem_block*)malloc(sizeof(mem_block) + capacity);
1119 if (!b) stop_decoding(JPGD_NOTENOUGHMEM);
1120 b->m_pNext = m_pMem_blocks; m_pMem_blocks = b;
1121 b->m_used_count = nSize;
1122 b->m_size = capacity;
1125 if (zero) memset(rv, 0, nSize);
1130 void jpeg_decoder::word_clear(void *p, uint16_t c, uint32_t n)
1132 uint8_t *pD = (uint8_t*)p;
1133 const uint8_t l = c & 0xFF, h = (c >> 8) & 0xFF;
1135 pD[0] = l; pD[1] = h; pD += 2;
1141 // Refill the input buffer.
1142 // This method will sit in a loop until (A) the buffer is full or (B)
1143 // the stream's read() method reports and end of file condition.
1144 void jpeg_decoder::prep_in_buffer()
1147 m_pIn_buf_ofs = m_in_buf;
1149 if (m_eof_flag) return;
1152 int bytes_read = m_pStream->read(m_in_buf + m_in_buf_left, JPGD_IN_BUF_SIZE - m_in_buf_left, &m_eof_flag);
1153 if (bytes_read == -1) stop_decoding(JPGD_STREAM_READ);
1154 m_in_buf_left += bytes_read;
1155 } while ((m_in_buf_left < JPGD_IN_BUF_SIZE) && (!m_eof_flag));
1157 m_total_bytes_read += m_in_buf_left;
1159 // Pad the end of the block with M_EOI (prevents the decompressor from going off the rails if the stream is invalid).
1160 // (This dates way back to when this decompressor was written in C/asm, and the all-asm Huffman decoder did some fancy things to increase perf.)
1161 word_clear(m_pIn_buf_ofs + m_in_buf_left, 0xD9FF, 64);
1165 // Read a Huffman code table.
1166 void jpeg_decoder::read_dht_marker()
1168 int i, index, count;
1169 uint8_t huff_num[17];
1170 uint8_t huff_val[256];
1171 uint32_t num_left = get_bits(16);
1173 if (num_left < 2) stop_decoding(JPGD_BAD_DHT_MARKER);
1177 index = get_bits(8);
1181 for (i = 1; i <= 16; i++) {
1182 huff_num[i] = static_cast<uint8_t>(get_bits(8));
1183 count += huff_num[i];
1186 if (count > 255) stop_decoding(JPGD_BAD_DHT_COUNTS);
1188 for (i = 0; i < count; i++)
1189 huff_val[i] = static_cast<uint8_t>(get_bits(8));
1193 if (num_left < (uint32_t)i) stop_decoding(JPGD_BAD_DHT_MARKER);
1196 if ((index & 0x10) > 0x10) stop_decoding(JPGD_BAD_DHT_INDEX);
1197 index = (index & 0x0F) + ((index & 0x10) >> 4) * (JPGD_MAX_HUFF_TABLES >> 1);
1198 if (index >= JPGD_MAX_HUFF_TABLES) stop_decoding(JPGD_BAD_DHT_INDEX);
1200 if (!m_huff_num[index]) m_huff_num[index] = (uint8_t *)alloc(17);
1201 if (!m_huff_val[index]) m_huff_val[index] = (uint8_t *)alloc(256);
1203 m_huff_ac[index] = (index & 0x10) != 0;
1204 memcpy(m_huff_num[index], huff_num, 17);
1205 memcpy(m_huff_val[index], huff_val, 256);
1210 // Read a quantization table.
1211 void jpeg_decoder::read_dqt_marker()
1215 uint32_t num_left = get_bits(16);
1216 if (num_left < 2) stop_decoding(JPGD_BAD_DQT_MARKER);
1224 if (n >= JPGD_MAX_QUANT_TABLES) stop_decoding(JPGD_BAD_DQT_TABLE);
1226 if (!m_quant[n]) m_quant[n] = (jpgd_quant_t *)alloc(64 * sizeof(jpgd_quant_t));
1228 // read quantization entries, in zag order
1229 for (i = 0; i < 64; i++) {
1231 if (prec) temp = (temp << 8) + get_bits(8);
1232 m_quant[n][i] = static_cast<jpgd_quant_t>(temp);
1236 if (num_left < (uint32_t)i) stop_decoding(JPGD_BAD_DQT_LENGTH);
1242 // Read the start of frame (SOF) marker.
1243 void jpeg_decoder::read_sof_marker()
1246 uint32_t num_left = get_bits(16);
1248 if (get_bits(8) != 8) stop_decoding(JPGD_BAD_PRECISION); /* precision: sorry, only 8-bit precision is supported right now */
1250 m_image_y_size = get_bits(16);
1251 if ((m_image_y_size < 1) || (m_image_y_size > JPGD_MAX_HEIGHT)) stop_decoding(JPGD_BAD_HEIGHT);
1253 m_image_x_size = get_bits(16);
1254 if ((m_image_x_size < 1) || (m_image_x_size > JPGD_MAX_WIDTH)) stop_decoding(JPGD_BAD_WIDTH);
1256 m_comps_in_frame = get_bits(8);
1257 if (m_comps_in_frame > JPGD_MAX_COMPONENTS) stop_decoding(JPGD_TOO_MANY_COMPONENTS);
1259 if (num_left != (uint32_t)(m_comps_in_frame * 3 + 8)) stop_decoding(JPGD_BAD_SOF_LENGTH);
1261 for (i = 0; i < m_comps_in_frame; i++) {
1262 m_comp_ident[i] = get_bits(8);
1263 m_comp_h_samp[i] = get_bits(4);
1264 m_comp_v_samp[i] = get_bits(4);
1265 m_comp_quant[i] = get_bits(8);
1270 // Used to skip unrecognized markers.
1271 void jpeg_decoder::skip_variable_marker()
1273 uint32_t num_left = get_bits(16);
1274 if (num_left < 2) stop_decoding(JPGD_BAD_VARIABLE_MARKER);
1284 // Read a define restart interval (DRI) marker.
1285 void jpeg_decoder::read_dri_marker()
1287 if (get_bits(16) != 4) stop_decoding(JPGD_BAD_DRI_LENGTH);
1288 m_restart_interval = get_bits(16);
1292 // Read a start of scan (SOS) marker.
1293 void jpeg_decoder::read_sos_marker()
1296 uint32_t num_left = get_bits(16);
1297 int n = get_bits(8);
1299 m_comps_in_scan = n;
1302 if ( (num_left != (uint32_t)(n * 2 + 3)) || (n < 1) || (n > JPGD_MAX_COMPS_IN_SCAN) ) stop_decoding(JPGD_BAD_SOS_LENGTH);
1304 for (i = 0; i < n; i++) {
1309 for (ci = 0; ci < m_comps_in_frame; ci++)
1310 if (cc == m_comp_ident[ci]) break;
1312 if (ci >= m_comps_in_frame) stop_decoding(JPGD_BAD_SOS_COMP_ID);
1314 m_comp_list[i] = ci;
1315 m_comp_dc_tab[ci] = (c >> 4) & 15;
1316 m_comp_ac_tab[ci] = (c & 15) + (JPGD_MAX_HUFF_TABLES >> 1);
1318 m_spectral_start = get_bits(8);
1319 m_spectral_end = get_bits(8);
1320 m_successive_high = get_bits(4);
1321 m_successive_low = get_bits(4);
1323 if (!m_progressive_flag) {
1324 m_spectral_start = 0;
1325 m_spectral_end = 63;
1329 while (num_left) { /* read past whatever is num_left */
1336 // Finds the next marker.
1337 int jpeg_decoder::next_marker()
1339 uint32_t c, bytes = 0;
1345 } while (c != 0xFF);
1349 } while (c == 0xFF);
1352 // If bytes > 0 here, there where extra bytes before the marker (not good).
1357 // Process markers. Returns when an SOFx, SOI, EOI, or SOS marker is
1359 int jpeg_decoder::process_markers()
1382 case M_SOS: return c;
1387 // No arithmitic support - dumb patents!
1389 stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1400 //case M_APP0: /* no need to read the JFIF marker */
1402 case M_RST0: /* no parameters */
1411 stop_decoding(JPGD_UNEXPECTED_MARKER);
1414 default: { /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */
1415 skip_variable_marker();
1423 // Finds the start of image (SOI) marker.
1424 // This code is rather defensive: it only checks the first 512 bytes to avoid
1426 void jpeg_decoder::locate_soi_marker()
1428 uint32_t lastchar = get_bits(8);
1429 uint32_t thischar = get_bits(8);
1431 /* ok if it's a normal JPEG file without a special header */
1432 if ((lastchar == 0xFF) && (thischar == M_SOI)) return;
1434 uint32_t bytesleft = 4096; //512;
1437 if (--bytesleft == 0) stop_decoding(JPGD_NOT_JPEG);
1439 lastchar = thischar;
1440 thischar = get_bits(8);
1442 if (lastchar == 0xFF) {
1443 if (thischar == M_SOI) break;
1444 else if (thischar == M_EOI) stop_decoding(JPGD_NOT_JPEG); // get_bits will keep returning M_EOI if we read past the end
1448 // Check the next character after marker: if it's not 0xFF, it can't be the start of the next marker, so the file is bad.
1449 thischar = (m_bit_buf >> 24) & 0xFF;
1450 if (thischar != 0xFF) stop_decoding(JPGD_NOT_JPEG);
1454 // Find a start of frame (SOF) marker.
1455 void jpeg_decoder::locate_sof_marker()
1457 locate_soi_marker();
1458 int c = process_markers();
1461 case M_SOF2: m_progressive_flag = true;
1462 case M_SOF0: /* baseline DCT */
1463 case M_SOF1: { /* extended sequential DCT */
1467 case M_SOF9: { /* Arithmitic coding */
1468 stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1472 stop_decoding(JPGD_UNSUPPORTED_MARKER);
1479 // Find a start of scan (SOS) marker.
1480 int jpeg_decoder::locate_sos_marker()
1482 int c = process_markers();
1483 if (c == M_EOI) return false;
1484 else if (c != M_SOS) stop_decoding(JPGD_UNEXPECTED_MARKER);
1490 // Reset everything to default/uninitialized state.
1491 void jpeg_decoder::init(jpeg_decoder_stream *pStream)
1493 m_pMem_blocks = nullptr;
1494 m_error_code = JPGD_SUCCESS;
1495 m_ready_flag = false;
1496 m_image_x_size = m_image_y_size = 0;
1497 m_pStream = pStream;
1498 m_progressive_flag = false;
1500 memset(m_huff_ac, 0, sizeof(m_huff_ac));
1501 memset(m_huff_num, 0, sizeof(m_huff_num));
1502 memset(m_huff_val, 0, sizeof(m_huff_val));
1503 memset(m_quant, 0, sizeof(m_quant));
1506 m_comps_in_frame = 0;
1508 memset(m_comp_h_samp, 0, sizeof(m_comp_h_samp));
1509 memset(m_comp_v_samp, 0, sizeof(m_comp_v_samp));
1510 memset(m_comp_quant, 0, sizeof(m_comp_quant));
1511 memset(m_comp_ident, 0, sizeof(m_comp_ident));
1512 memset(m_comp_h_blocks, 0, sizeof(m_comp_h_blocks));
1513 memset(m_comp_v_blocks, 0, sizeof(m_comp_v_blocks));
1515 m_comps_in_scan = 0;
1516 memset(m_comp_list, 0, sizeof(m_comp_list));
1517 memset(m_comp_dc_tab, 0, sizeof(m_comp_dc_tab));
1518 memset(m_comp_ac_tab, 0, sizeof(m_comp_ac_tab));
1520 m_spectral_start = 0;
1522 m_successive_low = 0;
1523 m_successive_high = 0;
1524 m_max_mcu_x_size = 0;
1525 m_max_mcu_y_size = 0;
1526 m_blocks_per_mcu = 0;
1527 m_max_blocks_per_row = 0;
1530 m_expanded_blocks_per_component = 0;
1531 m_expanded_blocks_per_mcu = 0;
1532 m_expanded_blocks_per_row = 0;
1533 m_freq_domain_chroma_upsample = false;
1535 memset(m_mcu_org, 0, sizeof(m_mcu_org));
1537 m_total_lines_left = 0;
1538 m_mcu_lines_left = 0;
1539 m_real_dest_bytes_per_scan_line = 0;
1540 m_dest_bytes_per_scan_line = 0;
1541 m_dest_bytes_per_pixel = 0;
1543 memset(m_pHuff_tabs, 0, sizeof(m_pHuff_tabs));
1545 memset(m_dc_coeffs, 0, sizeof(m_dc_coeffs));
1546 memset(m_ac_coeffs, 0, sizeof(m_ac_coeffs));
1547 memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu));
1551 memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu));
1553 m_pIn_buf_ofs = m_in_buf;
1558 memset(m_in_buf_pad_start, 0, sizeof(m_in_buf_pad_start));
1559 memset(m_in_buf, 0, sizeof(m_in_buf));
1560 memset(m_in_buf_pad_end, 0, sizeof(m_in_buf_pad_end));
1562 m_restart_interval = 0;
1563 m_restarts_left = 0;
1564 m_next_restart_num = 0;
1566 m_max_mcus_per_row = 0;
1567 m_max_blocks_per_mcu = 0;
1568 m_max_mcus_per_col = 0;
1570 memset(m_last_dc_val, 0, sizeof(m_last_dc_val));
1571 m_pMCU_coefficients = nullptr;
1572 m_pSample_buf = nullptr;
1574 m_total_bytes_read = 0;
1576 m_pScan_line_0 = nullptr;
1577 m_pScan_line_1 = nullptr;
1579 // Ready the input buffer.
1582 // Prime the bit buffer.
1589 for (int i = 0; i < JPGD_MAX_BLOCKS_PER_MCU; i++) {
1590 m_mcu_block_max_zag[i] = 64;
1594 #define SCALEBITS 16
1595 #define ONE_HALF ((int) 1 << (SCALEBITS-1))
1596 #define FIX(x) ((int) ((x) * (1L<<SCALEBITS) + 0.5f))
1599 // Create a few tables that allow us to quickly convert YCbCr to RGB.
1600 void jpeg_decoder::create_look_ups()
1602 for (int i = 0; i <= 255; i++) {
1604 m_crr[i] = ( FIX(1.40200f) * k + ONE_HALF) >> SCALEBITS;
1605 m_cbb[i] = ( FIX(1.77200f) * k + ONE_HALF) >> SCALEBITS;
1606 m_crg[i] = (-FIX(0.71414f)) * k;
1607 m_cbg[i] = (-FIX(0.34414f)) * k + ONE_HALF;
1612 // This method throws back into the stream any bytes that where read
1613 // into the bit buffer during initial marker scanning.
1614 void jpeg_decoder::fix_in_buffer()
1616 // In case any 0xFF's where pulled into the buffer during marker scanning.
1617 JPGD_ASSERT((m_bits_left & 7) == 0);
1619 if (m_bits_left == 16) stuff_char( (uint8_t)(m_bit_buf & 0xFF));
1620 if (m_bits_left >= 8) stuff_char( (uint8_t)((m_bit_buf >> 8) & 0xFF));
1622 stuff_char((uint8_t)((m_bit_buf >> 16) & 0xFF));
1623 stuff_char((uint8_t)((m_bit_buf >> 24) & 0xFF));
1626 get_bits_no_markers(16);
1627 get_bits_no_markers(16);
1631 void jpeg_decoder::transform_mcu(int mcu_row)
1633 jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
1634 uint8_t* pDst_ptr = m_pSample_buf + mcu_row * m_blocks_per_mcu * 64;
1636 for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) {
1637 idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block]);
1644 static const uint8_t s_max_rc[64] =
1646 17, 18, 34, 50, 50, 51, 52, 52, 52, 68, 84, 84, 84, 84, 85, 86, 86, 86, 86, 86,
1647 102, 118, 118, 118, 118, 118, 118, 119, 120, 120, 120, 120, 120, 120, 120, 136,
1648 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136,
1649 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136
1653 void jpeg_decoder::transform_mcu_expand(int mcu_row)
1655 jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
1656 uint8_t* pDst_ptr = m_pSample_buf + mcu_row * m_expanded_blocks_per_mcu * 64;
1660 for (mcu_block = 0; mcu_block < m_expanded_blocks_per_component; mcu_block++) {
1661 idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block]);
1666 // Chroma IDCT, with upsampling
1667 jpgd_block_t temp_block[64];
1669 for (int i = 0; i < 2; i++) {
1670 DCT_Upsample::Matrix44 P, Q, R, S;
1671 JPGD_ASSERT(m_mcu_block_max_zag[mcu_block] >= 1);
1672 JPGD_ASSERT(m_mcu_block_max_zag[mcu_block] <= 64);
1674 int max_zag = m_mcu_block_max_zag[mcu_block++] - 1;
1675 if (max_zag <= 0) max_zag = 0; // should never happen, only here to shut up static analysis
1677 switch (s_max_rc[max_zag]) {
1679 DCT_Upsample::P_Q<1, 1>::calc(P, Q, pSrc_ptr);
1680 DCT_Upsample::R_S<1, 1>::calc(R, S, pSrc_ptr);
1683 DCT_Upsample::P_Q<1, 2>::calc(P, Q, pSrc_ptr);
1684 DCT_Upsample::R_S<1, 2>::calc(R, S, pSrc_ptr);
1687 DCT_Upsample::P_Q<2, 2>::calc(P, Q, pSrc_ptr);
1688 DCT_Upsample::R_S<2, 2>::calc(R, S, pSrc_ptr);
1691 DCT_Upsample::P_Q<3, 2>::calc(P, Q, pSrc_ptr);
1692 DCT_Upsample::R_S<3, 2>::calc(R, S, pSrc_ptr);
1695 DCT_Upsample::P_Q<3, 3>::calc(P, Q, pSrc_ptr);
1696 DCT_Upsample::R_S<3, 3>::calc(R, S, pSrc_ptr);
1699 DCT_Upsample::P_Q<3, 4>::calc(P, Q, pSrc_ptr);
1700 DCT_Upsample::R_S<3, 4>::calc(R, S, pSrc_ptr);
1703 DCT_Upsample::P_Q<4, 4>::calc(P, Q, pSrc_ptr);
1704 DCT_Upsample::R_S<4, 4>::calc(R, S, pSrc_ptr);
1707 DCT_Upsample::P_Q<5, 4>::calc(P, Q, pSrc_ptr);
1708 DCT_Upsample::R_S<5, 4>::calc(R, S, pSrc_ptr);
1711 DCT_Upsample::P_Q<5, 5>::calc(P, Q, pSrc_ptr);
1712 DCT_Upsample::R_S<5, 5>::calc(R, S, pSrc_ptr);
1715 DCT_Upsample::P_Q<5, 6>::calc(P, Q, pSrc_ptr);
1716 DCT_Upsample::R_S<5, 6>::calc(R, S, pSrc_ptr);
1719 DCT_Upsample::P_Q<6, 6>::calc(P, Q, pSrc_ptr);
1720 DCT_Upsample::R_S<6, 6>::calc(R, S, pSrc_ptr);
1723 DCT_Upsample::P_Q<7, 6>::calc(P, Q, pSrc_ptr);
1724 DCT_Upsample::R_S<7, 6>::calc(R, S, pSrc_ptr);
1727 DCT_Upsample::P_Q<7, 7>::calc(P, Q, pSrc_ptr);
1728 DCT_Upsample::R_S<7, 7>::calc(R, S, pSrc_ptr);
1731 DCT_Upsample::P_Q<7, 8>::calc(P, Q, pSrc_ptr);
1732 DCT_Upsample::R_S<7, 8>::calc(R, S, pSrc_ptr);
1735 DCT_Upsample::P_Q<8, 8>::calc(P, Q, pSrc_ptr);
1736 DCT_Upsample::R_S<8, 8>::calc(R, S, pSrc_ptr);
1741 DCT_Upsample::Matrix44 a(P + Q); P -= Q;
1742 DCT_Upsample::Matrix44& b = P;
1743 DCT_Upsample::Matrix44 c(R + S); R -= S;
1744 DCT_Upsample::Matrix44& d = R;
1746 DCT_Upsample::Matrix44::add_and_store(temp_block, a, c);
1747 idct_4x4(temp_block, pDst_ptr);
1750 DCT_Upsample::Matrix44::sub_and_store(temp_block, a, c);
1751 idct_4x4(temp_block, pDst_ptr);
1754 DCT_Upsample::Matrix44::add_and_store(temp_block, b, d);
1755 idct_4x4(temp_block, pDst_ptr);
1758 DCT_Upsample::Matrix44::sub_and_store(temp_block, b, d);
1759 idct_4x4(temp_block, pDst_ptr);
1766 // Loads and dequantizes the next row of (already decoded) coefficients.
1767 // Progressive images only.
1768 void jpeg_decoder::load_next_row()
1773 int mcu_row, mcu_block, row_block = 0;
1774 int component_num, component_id;
1775 int block_x_mcu[JPGD_MAX_COMPONENTS];
1777 memset(block_x_mcu, 0, JPGD_MAX_COMPONENTS * sizeof(int));
1779 for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++) {
1780 int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
1782 for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) {
1783 component_id = m_mcu_org[mcu_block];
1784 q = m_quant[m_comp_quant[component_id]];
1785 p = m_pMCU_coefficients + 64 * mcu_block;
1787 jpgd_block_t* pAC = coeff_buf_getp(m_ac_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
1788 jpgd_block_t* pDC = coeff_buf_getp(m_dc_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
1790 memcpy(&p[1], &pAC[1], 63 * sizeof(jpgd_block_t));
1792 for (i = 63; i > 0; i--) {
1793 if (p[g_ZAG[i]]) break;
1796 m_mcu_block_max_zag[mcu_block] = i + 1;
1798 for ( ; i >= 0; i--) {
1800 p[g_ZAG[i]] = static_cast<jpgd_block_t>(p[g_ZAG[i]] * q[i]);
1806 if (m_comps_in_scan == 1) block_x_mcu[component_id]++;
1808 if (++block_x_mcu_ofs == m_comp_h_samp[component_id]) block_x_mcu_ofs = 0;
1809 if (++block_y_mcu_ofs == m_comp_v_samp[component_id]) {
1810 block_y_mcu_ofs = 0;
1811 block_x_mcu[component_id] += m_comp_h_samp[component_id];
1815 if (m_freq_domain_chroma_upsample) transform_mcu_expand(mcu_row);
1816 else transform_mcu(mcu_row);
1818 if (m_comps_in_scan == 1) m_block_y_mcu[m_comp_list[0]]++;
1820 for (component_num = 0; component_num < m_comps_in_scan; component_num++) {
1821 component_id = m_comp_list[component_num];
1822 m_block_y_mcu[component_id] += m_comp_v_samp[component_id];
1828 // Restart interval processing.
1829 void jpeg_decoder::process_restart()
1834 // Align to a byte boundry
1835 // FIXME: Is this really necessary? get_bits_no_markers() never reads in markers!
1836 //get_bits_no_markers(m_bits_left & 7);
1838 // Let's scan a little bit to find the marker, but not _too_ far.
1839 // 1536 is a "fudge factor" that determines how much to scan.
1840 for (i = 1536; i > 0; i--) {
1841 if (get_char() == 0xFF) break;
1843 if (i == 0) stop_decoding(JPGD_BAD_RESTART_MARKER);
1845 for ( ; i > 0; i--) {
1846 if ((c = get_char()) != 0xFF) break;
1848 if (i == 0) stop_decoding(JPGD_BAD_RESTART_MARKER);
1850 // Is it the expected marker? If not, something bad happened.
1851 if (c != (m_next_restart_num + M_RST0)) stop_decoding(JPGD_BAD_RESTART_MARKER);
1853 // Reset each component's DC prediction values.
1854 memset(&m_last_dc_val, 0, m_comps_in_frame * sizeof(uint32_t));
1857 m_restarts_left = m_restart_interval;
1858 m_next_restart_num = (m_next_restart_num + 1) & 7;
1860 // Get the bit buffer going again...
1862 get_bits_no_markers(16);
1863 get_bits_no_markers(16);
1867 static inline int dequantize_ac(int c, int q)
1873 // Decodes and dequantizes the next row of coefficients.
1874 void jpeg_decoder::decode_next_row()
1878 for (int mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++) {
1879 if ((m_restart_interval) && (m_restarts_left == 0)) process_restart();
1881 jpgd_block_t* p = m_pMCU_coefficients;
1883 for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++, p += 64) {
1884 int component_id = m_mcu_org[mcu_block];
1885 jpgd_quant_t* q = m_quant[m_comp_quant[component_id]];
1888 s = huff_decode(m_pHuff_tabs[m_comp_dc_tab[component_id]], r);
1889 s = JPGD_HUFF_EXTEND(r, s);
1891 m_last_dc_val[component_id] = (s += m_last_dc_val[component_id]);
1893 p[0] = static_cast<jpgd_block_t>(s * q[0]);
1895 int prev_num_set = m_mcu_block_max_zag[mcu_block];
1896 huff_tables *pH = m_pHuff_tabs[m_comp_ac_tab[component_id]];
1898 for (k = 1; k < 64; k++) {
1900 s = huff_decode(pH, extra_bits);
1906 if ((k + r) > 63) stop_decoding(JPGD_DECODE_ERROR);
1907 if (k < prev_num_set) {
1908 int n = JPGD_MIN(r, prev_num_set - k);
1910 while (n--) p[g_ZAG[kt++]] = 0;
1914 s = JPGD_HUFF_EXTEND(extra_bits, s);
1915 JPGD_ASSERT(k < 64);
1916 p[g_ZAG[k]] = static_cast<jpgd_block_t>(dequantize_ac(s, q[k])); //s * q[k];
1919 if ((k + 16) > 64) stop_decoding(JPGD_DECODE_ERROR);
1920 if (k < prev_num_set) {
1921 int n = JPGD_MIN(16, prev_num_set - k);
1924 JPGD_ASSERT(kt <= 63);
1928 k += 16 - 1; // - 1 because the loop counter is k
1929 JPGD_ASSERT(p[g_ZAG[k]] == 0);
1934 if (k < prev_num_set) {
1936 while (kt < prev_num_set) p[g_ZAG[kt++]] = 0;
1939 m_mcu_block_max_zag[mcu_block] = k;
1942 if (m_freq_domain_chroma_upsample) transform_mcu_expand(mcu_row);
1943 else transform_mcu(mcu_row);
1949 // YCbCr H1V1 (1x1:1:1, 3 m_blocks per MCU) to RGB
1950 void jpeg_decoder::H1V1Convert()
1952 int row = m_max_mcu_y_size - m_mcu_lines_left;
1953 uint8_t *d = m_pScan_line_0;
1954 uint8_t *s = m_pSample_buf + row * 8;
1956 for (int i = m_max_mcus_per_row; i > 0; i--) {
1957 for (int j = 0; j < 8; j++) {
1962 d[0] = clamp(y + m_crr[cr]);
1963 d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16));
1964 d[2] = clamp(y + m_cbb[cb]);
1973 // YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB
1974 void jpeg_decoder::H2V1Convert()
1976 int row = m_max_mcu_y_size - m_mcu_lines_left;
1977 uint8_t *d0 = m_pScan_line_0;
1978 uint8_t *y = m_pSample_buf + row * 8;
1979 uint8_t *c = m_pSample_buf + 2*64 + row * 8;
1981 for (int i = m_max_mcus_per_row; i > 0; i--) {
1982 for (int l = 0; l < 2; l++) {
1983 for (int j = 0; j < 4; j++) {
1988 int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
1992 d0[0] = clamp(yy+rc);
1993 d0[1] = clamp(yy+gc);
1994 d0[2] = clamp(yy+bc);
1998 d0[4] = clamp(yy+rc);
1999 d0[5] = clamp(yy+gc);
2000 d0[6] = clamp(yy+bc);
2013 // YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB
2014 void jpeg_decoder::H1V2Convert()
2016 int row = m_max_mcu_y_size - m_mcu_lines_left;
2017 uint8_t *d0 = m_pScan_line_0;
2018 uint8_t *d1 = m_pScan_line_1;
2022 if (row < 8) y = m_pSample_buf + row * 8;
2023 else y = m_pSample_buf + 64*1 + (row & 7) * 8;
2025 c = m_pSample_buf + 64*2 + (row >> 1) * 8;
2027 for (int i = m_max_mcus_per_row; i > 0; i--) {
2028 for (int j = 0; j < 8; j++) {
2033 int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
2037 d0[0] = clamp(yy+rc);
2038 d0[1] = clamp(yy+gc);
2039 d0[2] = clamp(yy+bc);
2043 d1[0] = clamp(yy+rc);
2044 d1[1] = clamp(yy+gc);
2045 d1[2] = clamp(yy+bc);
2057 // YCbCr H2V2 (2x2:1:1, 6 m_blocks per MCU) to RGB
2058 void jpeg_decoder::H2V2Convert()
2060 int row = m_max_mcu_y_size - m_mcu_lines_left;
2061 uint8_t *d0 = m_pScan_line_0;
2062 uint8_t *d1 = m_pScan_line_1;
2066 if (row < 8) y = m_pSample_buf + row * 8;
2067 else y = m_pSample_buf + 64*2 + (row & 7) * 8;
2069 c = m_pSample_buf + 64*4 + (row >> 1) * 8;
2071 for (int i = m_max_mcus_per_row; i > 0; i--) {
2072 for (int l = 0; l < 2; l++) {
2073 for (int j = 0; j < 8; j += 2) {
2078 int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
2082 d0[0] = clamp(yy+rc);
2083 d0[1] = clamp(yy+gc);
2084 d0[2] = clamp(yy+bc);
2088 d0[4] = clamp(yy+rc);
2089 d0[5] = clamp(yy+gc);
2090 d0[6] = clamp(yy+bc);
2094 d1[0] = clamp(yy+rc);
2095 d1[1] = clamp(yy+gc);
2096 d1[2] = clamp(yy+bc);
2100 d1[4] = clamp(yy+rc);
2101 d1[5] = clamp(yy+gc);
2102 d1[6] = clamp(yy+bc);
2118 // Y (1 block per MCU) to 8-bit grayscale
2119 void jpeg_decoder::gray_convert()
2121 int row = m_max_mcu_y_size - m_mcu_lines_left;
2122 uint8_t *d = m_pScan_line_0;
2123 uint8_t *s = m_pSample_buf + row * 8;
2125 for (int i = m_max_mcus_per_row; i > 0; i--) {
2126 *(uint32_t *)d = *(uint32_t *)s;
2127 *(uint32_t *)(&d[4]) = *(uint32_t *)(&s[4]);
2134 void jpeg_decoder::expanded_convert()
2136 int row = m_max_mcu_y_size - m_mcu_lines_left;
2137 uint8_t* Py = m_pSample_buf + (row / 8) * 64 * m_comp_h_samp[0] + (row & 7) * 8;
2138 uint8_t* d = m_pScan_line_0;
2140 for (int i = m_max_mcus_per_row; i > 0; i--) {
2141 for (int k = 0; k < m_max_mcu_x_size; k += 8) {
2142 const int Y_ofs = k * 8;
2143 const int Cb_ofs = Y_ofs + 64 * m_expanded_blocks_per_component;
2144 const int Cr_ofs = Y_ofs + 64 * m_expanded_blocks_per_component * 2;
2145 for (int j = 0; j < 8; j++) {
2146 int y = Py[Y_ofs + j];
2147 int cb = Py[Cb_ofs + j];
2148 int cr = Py[Cr_ofs + j];
2150 d[0] = clamp(y + m_crr[cr]);
2151 d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16));
2152 d[2] = clamp(y + m_cbb[cb]);
2158 Py += 64 * m_expanded_blocks_per_mcu;
2163 // Find end of image (EOI) marker, so we can return to the user the exact size of the input stream.
2164 void jpeg_decoder::find_eoi()
2166 if (!m_progressive_flag) {
2167 // Attempt to read the EOI marker.
2168 //get_bits_no_markers(m_bits_left & 7);
2170 // Prime the bit buffer
2175 // The next marker _should_ be EOI
2178 m_total_bytes_read -= m_in_buf_left;
2182 int jpeg_decoder::decode(const void** pScan_line, uint32_t* pScan_line_len)
2184 if ((m_error_code) || (!m_ready_flag)) return JPGD_FAILED;
2185 if (m_total_lines_left == 0) return JPGD_DONE;
2186 if (m_mcu_lines_left == 0) {
2187 if (setjmp(m_jmp_state)) return JPGD_FAILED;
2188 if (m_progressive_flag) load_next_row();
2189 else decode_next_row();
2190 // Find the EOI marker if that was the last row.
2191 if (m_total_lines_left <= m_max_mcu_y_size) find_eoi();
2192 m_mcu_lines_left = m_max_mcu_y_size;
2195 if (m_freq_domain_chroma_upsample) {
2197 *pScan_line = m_pScan_line_0;
2199 switch (m_scan_type) {
2201 if ((m_mcu_lines_left & 1) == 0) {
2203 *pScan_line = m_pScan_line_0;
2205 else *pScan_line = m_pScan_line_1;
2210 *pScan_line = m_pScan_line_0;
2214 if ((m_mcu_lines_left & 1) == 0) {
2216 *pScan_line = m_pScan_line_0;
2217 } else *pScan_line = m_pScan_line_1;
2222 *pScan_line = m_pScan_line_0;
2225 case JPGD_GRAYSCALE: {
2227 *pScan_line = m_pScan_line_0;
2233 *pScan_line_len = m_real_dest_bytes_per_scan_line;
2235 m_total_lines_left--;
2237 return JPGD_SUCCESS;
2241 // Creates the tables needed for efficient Huffman decoding.
2242 void jpeg_decoder::make_huff_table(int index, huff_tables *pH)
2245 uint8_t huffsize[257];
2246 uint32_t huffcode[257];
2254 pH->ac_table = m_huff_ac[index] != 0;
2257 for (l = 1; l <= 16; l++) {
2258 for (i = 1; i <= m_huff_num[index][l]; i++) {
2259 huffsize[p++] = static_cast<uint8_t>(l);
2269 while (huffsize[p]) {
2270 while (huffsize[p] == si) {
2271 huffcode[p++] = code;
2278 memset(pH->look_up, 0, sizeof(pH->look_up));
2279 memset(pH->look_up2, 0, sizeof(pH->look_up2));
2280 memset(pH->tree, 0, sizeof(pH->tree));
2281 memset(pH->code_size, 0, sizeof(pH->code_size));
2287 i = m_huff_val[index][p];
2289 code_size = huffsize[p];
2290 pH->code_size[i] = static_cast<uint8_t>(code_size);
2292 if (code_size <= 8) {
2293 code <<= (8 - code_size);
2294 for (l = 1 << (8 - code_size); l > 0; l--) {
2295 JPGD_ASSERT(i < 256);
2296 pH->look_up[code] = i;
2297 bool has_extrabits = false;
2299 int num_extra_bits = i & 15;
2300 int bits_to_fetch = code_size;
2302 if (num_extra_bits) {
2303 int total_codesize = code_size + num_extra_bits;
2304 if (total_codesize <= 8) {
2305 has_extrabits = true;
2306 extra_bits = ((1 << num_extra_bits) - 1) & (code >> (8 - total_codesize));
2307 JPGD_ASSERT(extra_bits <= 0x7FFF);
2308 bits_to_fetch += num_extra_bits;
2311 if (!has_extrabits) pH->look_up2[code] = i | (bits_to_fetch << 8);
2312 else pH->look_up2[code] = i | 0x8000 | (extra_bits << 16) | (bits_to_fetch << 8);
2316 subtree = (code >> (code_size - 8)) & 0xFF;
2317 currententry = pH->look_up[subtree];
2319 if (currententry == 0) {
2320 pH->look_up[subtree] = currententry = nextfreeentry;
2321 pH->look_up2[subtree] = currententry = nextfreeentry;
2325 code <<= (16 - (code_size - 8));
2327 for (l = code_size; l > 9; l--) {
2328 if ((code & 0x8000) == 0) currententry--;
2329 if (pH->tree[-currententry - 1] == 0) {
2330 pH->tree[-currententry - 1] = nextfreeentry;
2331 currententry = nextfreeentry;
2333 } else currententry = pH->tree[-currententry - 1];
2336 if ((code & 0x8000) == 0) currententry--;
2337 pH->tree[-currententry - 1] = i;
2344 // Verifies the quantization tables needed for this scan are available.
2345 void jpeg_decoder::check_quant_tables()
2347 for (int i = 0; i < m_comps_in_scan; i++) {
2348 if (m_quant[m_comp_quant[m_comp_list[i]]] == nullptr) stop_decoding(JPGD_UNDEFINED_QUANT_TABLE);
2353 // Verifies that all the Huffman tables needed for this scan are available.
2354 void jpeg_decoder::check_huff_tables()
2356 for (int i = 0; i < m_comps_in_scan; i++) {
2357 if ((m_spectral_start == 0) && (m_huff_num[m_comp_dc_tab[m_comp_list[i]]] == nullptr)) stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2358 if ((m_spectral_end > 0) && (m_huff_num[m_comp_ac_tab[m_comp_list[i]]] == nullptr)) stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2361 for (int i = 0; i < JPGD_MAX_HUFF_TABLES; i++) {
2362 if (m_huff_num[i]) {
2363 if (!m_pHuff_tabs[i]) m_pHuff_tabs[i] = (huff_tables *)alloc(sizeof(huff_tables));
2364 make_huff_table(i, m_pHuff_tabs[i]);
2370 // Determines the component order inside each MCU.
2371 // Also calcs how many MCU's are on each row, etc.
2372 void jpeg_decoder::calc_mcu_block_order()
2374 int component_num, component_id;
2375 int max_h_samp = 0, max_v_samp = 0;
2377 for (component_id = 0; component_id < m_comps_in_frame; component_id++) {
2378 if (m_comp_h_samp[component_id] > max_h_samp) {
2379 max_h_samp = m_comp_h_samp[component_id];
2381 if (m_comp_v_samp[component_id] > max_v_samp) {
2382 max_v_samp = m_comp_v_samp[component_id];
2386 for (component_id = 0; component_id < m_comps_in_frame; component_id++) {
2387 m_comp_h_blocks[component_id] = ((((m_image_x_size * m_comp_h_samp[component_id]) + (max_h_samp - 1)) / max_h_samp) + 7) / 8;
2388 m_comp_v_blocks[component_id] = ((((m_image_y_size * m_comp_v_samp[component_id]) + (max_v_samp - 1)) / max_v_samp) + 7) / 8;
2391 if (m_comps_in_scan == 1) {
2392 m_mcus_per_row = m_comp_h_blocks[m_comp_list[0]];
2393 m_mcus_per_col = m_comp_v_blocks[m_comp_list[0]];
2395 m_mcus_per_row = (((m_image_x_size + 7) / 8) + (max_h_samp - 1)) / max_h_samp;
2396 m_mcus_per_col = (((m_image_y_size + 7) / 8) + (max_v_samp - 1)) / max_v_samp;
2399 if (m_comps_in_scan == 1) {
2400 m_mcu_org[0] = m_comp_list[0];
2401 m_blocks_per_mcu = 1;
2403 m_blocks_per_mcu = 0;
2405 for (component_num = 0; component_num < m_comps_in_scan; component_num++) {
2407 component_id = m_comp_list[component_num];
2408 num_blocks = m_comp_h_samp[component_id] * m_comp_v_samp[component_id];
2409 while (num_blocks--) m_mcu_org[m_blocks_per_mcu++] = component_id;
2415 // Starts a new scan.
2416 int jpeg_decoder::init_scan()
2418 if (!locate_sos_marker()) return false;
2420 calc_mcu_block_order();
2421 check_huff_tables();
2422 check_quant_tables();
2424 memset(m_last_dc_val, 0, m_comps_in_frame * sizeof(uint32_t));
2428 if (m_restart_interval) {
2429 m_restarts_left = m_restart_interval;
2430 m_next_restart_num = 0;
2437 // Starts a frame. Determines if the number of components or sampling factors
2439 void jpeg_decoder::init_frame()
2443 if (m_comps_in_frame == 1) {
2444 if ((m_comp_h_samp[0] != 1) || (m_comp_v_samp[0] != 1)) stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2445 m_scan_type = JPGD_GRAYSCALE;
2446 m_max_blocks_per_mcu = 1;
2447 m_max_mcu_x_size = 8;
2448 m_max_mcu_y_size = 8;
2449 } else if (m_comps_in_frame == 3) {
2450 if (((m_comp_h_samp[1] != 1) || (m_comp_v_samp[1] != 1)) || ((m_comp_h_samp[2] != 1) || (m_comp_v_samp[2] != 1)))
2451 stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2453 if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1)) {
2454 m_scan_type = JPGD_YH1V1;
2455 m_max_blocks_per_mcu = 3;
2456 m_max_mcu_x_size = 8;
2457 m_max_mcu_y_size = 8;
2458 } else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1)) {
2459 m_scan_type = JPGD_YH2V1;
2460 m_max_blocks_per_mcu = 4;
2461 m_max_mcu_x_size = 16;
2462 m_max_mcu_y_size = 8;
2463 } else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 2)) {
2464 m_scan_type = JPGD_YH1V2;
2465 m_max_blocks_per_mcu = 4;
2466 m_max_mcu_x_size = 8;
2467 m_max_mcu_y_size = 16;
2468 } else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2)) {
2469 m_scan_type = JPGD_YH2V2;
2470 m_max_blocks_per_mcu = 6;
2471 m_max_mcu_x_size = 16;
2472 m_max_mcu_y_size = 16;
2473 } else stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2474 } else stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
2476 m_max_mcus_per_row = (m_image_x_size + (m_max_mcu_x_size - 1)) / m_max_mcu_x_size;
2477 m_max_mcus_per_col = (m_image_y_size + (m_max_mcu_y_size - 1)) / m_max_mcu_y_size;
2479 // These values are for the *destination* pixels: after conversion.
2480 if (m_scan_type == JPGD_GRAYSCALE) m_dest_bytes_per_pixel = 1;
2481 else m_dest_bytes_per_pixel = 4;
2483 m_dest_bytes_per_scan_line = ((m_image_x_size + 15) & 0xFFF0) * m_dest_bytes_per_pixel;
2484 m_real_dest_bytes_per_scan_line = (m_image_x_size * m_dest_bytes_per_pixel);
2486 // Initialize two scan line buffers.
2487 m_pScan_line_0 = (uint8_t *)alloc(m_dest_bytes_per_scan_line, true);
2488 if ((m_scan_type == JPGD_YH1V2) || (m_scan_type == JPGD_YH2V2)) {
2489 m_pScan_line_1 = (uint8_t *)alloc(m_dest_bytes_per_scan_line, true);
2492 m_max_blocks_per_row = m_max_mcus_per_row * m_max_blocks_per_mcu;
2494 // Should never happen
2495 if (m_max_blocks_per_row > JPGD_MAX_BLOCKS_PER_ROW) stop_decoding(JPGD_ASSERTION_ERROR);
2497 // Allocate the coefficient buffer, enough for one MCU
2498 m_pMCU_coefficients = (jpgd_block_t*)alloc(m_max_blocks_per_mcu * 64 * sizeof(jpgd_block_t));
2500 for (i = 0; i < m_max_blocks_per_mcu; i++) {
2501 m_mcu_block_max_zag[i] = 64;
2504 m_expanded_blocks_per_component = m_comp_h_samp[0] * m_comp_v_samp[0];
2505 m_expanded_blocks_per_mcu = m_expanded_blocks_per_component * m_comps_in_frame;
2506 m_expanded_blocks_per_row = m_max_mcus_per_row * m_expanded_blocks_per_mcu;
2507 // Freq. domain chroma upsampling is only supported for H2V2 subsampling factor (the most common one I've seen).
2508 m_freq_domain_chroma_upsample = false;
2509 #if JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING
2510 m_freq_domain_chroma_upsample = (m_expanded_blocks_per_mcu == 4*3);
2513 if (m_freq_domain_chroma_upsample)
2514 m_pSample_buf = (uint8_t *)alloc(m_expanded_blocks_per_row * 64);
2516 m_pSample_buf = (uint8_t *)alloc(m_max_blocks_per_row * 64);
2518 m_total_lines_left = m_image_y_size;
2519 m_mcu_lines_left = 0;
2524 // The coeff_buf series of methods originally stored the coefficients
2525 // into a "virtual" file which was located in EMS, XMS, or a disk file. A cache
2526 // was used to make this process more efficient. Now, we can store the entire
2528 jpeg_decoder::coeff_buf* jpeg_decoder::coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y)
2530 coeff_buf* cb = (coeff_buf*)alloc(sizeof(coeff_buf));
2531 cb->block_num_x = block_num_x;
2532 cb->block_num_y = block_num_y;
2533 cb->block_len_x = block_len_x;
2534 cb->block_len_y = block_len_y;
2535 cb->block_size = (block_len_x * block_len_y) * sizeof(jpgd_block_t);
2536 cb->pData = (uint8_t *)alloc(cb->block_size * block_num_x * block_num_y, true);
2541 inline jpgd_block_t *jpeg_decoder::coeff_buf_getp(coeff_buf *cb, int block_x, int block_y)
2543 JPGD_ASSERT((block_x < cb->block_num_x) && (block_y < cb->block_num_y));
2544 return (jpgd_block_t *)(cb->pData + block_x * cb->block_size + block_y * (cb->block_size * cb->block_num_x));
2548 // The following methods decode the various types of m_blocks encountered
2549 // in progressively encoded images.
2550 void jpeg_decoder::decode_block_dc_first(jpeg_decoder *pD, int component_id, int block_x, int block_y)
2553 jpgd_block_t *p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y);
2555 if ((s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_dc_tab[component_id]])) != 0) {
2556 r = pD->get_bits_no_markers(s);
2557 s = JPGD_HUFF_EXTEND(r, s);
2559 pD->m_last_dc_val[component_id] = (s += pD->m_last_dc_val[component_id]);
2560 p[0] = static_cast<jpgd_block_t>(static_cast<unsigned int>(s) << pD->m_successive_low);
2564 void jpeg_decoder::decode_block_dc_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y)
2566 if (pD->get_bits_no_markers(1)) {
2567 jpgd_block_t *p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y);
2568 p[0] |= (1 << pD->m_successive_low);
2573 void jpeg_decoder::decode_block_ac_first(jpeg_decoder *pD, int component_id, int block_x, int block_y)
2577 if (pD->m_eob_run) {
2581 jpgd_block_t *p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y);
2583 for (k = pD->m_spectral_start; k <= pD->m_spectral_end; k++) {
2584 s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_ac_tab[component_id]]);
2588 if ((k += r) > 63) pD->stop_decoding(JPGD_DECODE_ERROR);
2589 r = pD->get_bits_no_markers(s);
2590 s = JPGD_HUFF_EXTEND(r, s);
2591 p[g_ZAG[k]] = static_cast<jpgd_block_t>(static_cast<unsigned int>(s) << pD->m_successive_low);
2594 if ((k += 15) > 63) pD->stop_decoding(JPGD_DECODE_ERROR);
2596 pD->m_eob_run = 1 << r;
2597 if (r) pD->m_eob_run += pD->get_bits_no_markers(r);
2606 void jpeg_decoder::decode_block_ac_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y)
2609 int p1 = 1 << pD->m_successive_low;
2610 int m1 = static_cast<unsigned int>(-1) << pD->m_successive_low;
2611 jpgd_block_t *p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y);
2613 JPGD_ASSERT(pD->m_spectral_end <= 63);
2615 k = pD->m_spectral_start;
2617 if (pD->m_eob_run == 0) {
2618 for ( ; k <= pD->m_spectral_end; k++) {
2619 s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_ac_tab[component_id]]);
2623 if (s != 1) pD->stop_decoding(JPGD_DECODE_ERROR);
2624 if (pD->get_bits_no_markers(1)) s = p1;
2628 pD->m_eob_run = 1 << r;
2629 if (r) pD->m_eob_run += pD->get_bits_no_markers(r);
2635 jpgd_block_t *this_coef = p + g_ZAG[k & 63];
2637 if (*this_coef != 0) {
2638 if (pD->get_bits_no_markers(1)) {
2639 if ((*this_coef & p1) == 0) {
2640 if (*this_coef >= 0) *this_coef = static_cast<jpgd_block_t>(*this_coef + p1);
2641 else *this_coef = static_cast<jpgd_block_t>(*this_coef + m1);
2648 } while (k <= pD->m_spectral_end);
2650 if ((s) && (k < 64)) {
2651 p[g_ZAG[k]] = static_cast<jpgd_block_t>(s);
2656 if (pD->m_eob_run > 0) {
2657 for ( ; k <= pD->m_spectral_end; k++) {
2658 jpgd_block_t *this_coef = p + g_ZAG[k & 63]; // logical AND to shut up static code analysis
2660 if (*this_coef != 0) {
2661 if (pD->get_bits_no_markers(1)) {
2662 if ((*this_coef & p1) == 0) {
2663 if (*this_coef >= 0) *this_coef = static_cast<jpgd_block_t>(*this_coef + p1);
2664 else *this_coef = static_cast<jpgd_block_t>(*this_coef + m1);
2674 // Decode a scan in a progressively encoded image.
2675 void jpeg_decoder::decode_scan(pDecode_block_func decode_block_func)
2677 int mcu_row, mcu_col, mcu_block;
2678 int block_x_mcu[JPGD_MAX_COMPONENTS], m_block_y_mcu[JPGD_MAX_COMPONENTS];
2680 memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu));
2682 for (mcu_col = 0; mcu_col < m_mcus_per_col; mcu_col++) {
2683 int component_num, component_id;
2684 memset(block_x_mcu, 0, sizeof(block_x_mcu));
2686 for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++) {
2687 int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
2689 if ((m_restart_interval) && (m_restarts_left == 0)) process_restart();
2691 for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) {
2692 component_id = m_mcu_org[mcu_block];
2693 decode_block_func(this, component_id, block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
2695 if (m_comps_in_scan == 1) block_x_mcu[component_id]++;
2697 if (++block_x_mcu_ofs == m_comp_h_samp[component_id]) {
2698 block_x_mcu_ofs = 0;
2700 if (++block_y_mcu_ofs == m_comp_v_samp[component_id]) {
2701 block_y_mcu_ofs = 0;
2702 block_x_mcu[component_id] += m_comp_h_samp[component_id];
2710 if (m_comps_in_scan == 1) m_block_y_mcu[m_comp_list[0]]++;
2712 for (component_num = 0; component_num < m_comps_in_scan; component_num++) {
2713 component_id = m_comp_list[component_num];
2714 m_block_y_mcu[component_id] += m_comp_v_samp[component_id];
2721 // Decode a progressively encoded image.
2722 void jpeg_decoder::init_progressive()
2726 if (m_comps_in_frame == 4) stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
2728 // Allocate the coefficient buffers.
2729 for (i = 0; i < m_comps_in_frame; i++) {
2730 m_dc_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 1, 1);
2731 m_ac_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 8, 8);
2735 int dc_only_scan, refinement_scan;
2736 pDecode_block_func decode_block_func;
2738 if (!init_scan()) break;
2740 dc_only_scan = (m_spectral_start == 0);
2741 refinement_scan = (m_successive_high != 0);
2743 if ((m_spectral_start > m_spectral_end) || (m_spectral_end > 63)) stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2746 if (m_spectral_end) stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2747 } else if (m_comps_in_scan != 1) { /* AC scans can only contain one component */
2748 stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2751 if ((refinement_scan) && (m_successive_low != m_successive_high - 1)) stop_decoding(JPGD_BAD_SOS_SUCCESSIVE);
2754 if (refinement_scan) decode_block_func = decode_block_dc_refine;
2755 else decode_block_func = decode_block_dc_first;
2757 if (refinement_scan) decode_block_func = decode_block_ac_refine;
2758 else decode_block_func = decode_block_ac_first;
2760 decode_scan(decode_block_func);
2766 m_comps_in_scan = m_comps_in_frame;
2768 for (i = 0; i < m_comps_in_frame; i++) {
2772 calc_mcu_block_order();
2776 void jpeg_decoder::init_sequential()
2778 if (!init_scan()) stop_decoding(JPGD_UNEXPECTED_MARKER);
2782 void jpeg_decoder::decode_start()
2785 if (m_progressive_flag) init_progressive();
2786 else init_sequential();
2790 void jpeg_decoder::decode_init(jpeg_decoder_stream *pStream)
2793 locate_sof_marker();
2797 jpeg_decoder::jpeg_decoder(jpeg_decoder_stream *pStream)
2799 if (setjmp(m_jmp_state)) return;
2800 decode_init(pStream);
2804 int jpeg_decoder::begin_decoding()
2806 if (m_ready_flag) return JPGD_SUCCESS;
2807 if (m_error_code) return JPGD_FAILED;
2808 if (setjmp(m_jmp_state)) return JPGD_FAILED;
2811 m_ready_flag = true;
2813 return JPGD_SUCCESS;
2817 jpeg_decoder::~jpeg_decoder()
2823 void jpeg_decoder_file_stream::close()
2830 m_error_flag = false;
2834 jpeg_decoder_file_stream::~jpeg_decoder_file_stream()
2840 bool jpeg_decoder_file_stream::open(const char *Pfilename)
2845 m_error_flag = false;
2847 #if defined(_MSC_VER)
2849 fopen_s(&m_pFile, Pfilename, "rb");
2851 m_pFile = fopen(Pfilename, "rb");
2853 return m_pFile != nullptr;
2857 int jpeg_decoder_file_stream::read(uint8_t *pBuf, int max_bytes_to_read, bool *pEOF_flag)
2859 if (!m_pFile) return -1;
2866 if (m_error_flag) return -1;
2868 int bytes_read = static_cast<int>(fread(pBuf, 1, max_bytes_to_read, m_pFile));
2869 if (bytes_read < max_bytes_to_read) {
2870 if (ferror(m_pFile)) {
2871 m_error_flag = true;
2881 bool jpeg_decoder_mem_stream::open(const uint8_t *pSrc_data, uint32_t size)
2884 m_pSrc_data = pSrc_data;
2891 int jpeg_decoder_mem_stream::read(uint8_t *pBuf, int max_bytes_to_read, bool *pEOF_flag)
2894 if (!m_pSrc_data) return -1;
2896 uint32_t bytes_remaining = m_size - m_ofs;
2897 if ((uint32_t)max_bytes_to_read > bytes_remaining) {
2898 max_bytes_to_read = bytes_remaining;
2901 memcpy(pBuf, m_pSrc_data + m_ofs, max_bytes_to_read);
2902 m_ofs += max_bytes_to_read;
2904 return max_bytes_to_read;
2908 /************************************************************************/
2909 /* External Class Implementation */
2910 /************************************************************************/
2913 jpeg_decoder* jpgdHeader(const char* data, int size, int* width, int* height)
2915 auto decoder = new jpeg_decoder(new jpeg_decoder_mem_stream((const uint8_t*)data, size));
2916 if (decoder->get_error_code() != JPGD_SUCCESS) {
2921 if (width) *width = decoder->get_width();
2922 if (height) *height = decoder->get_height();
2928 jpeg_decoder* jpgdHeader(const char* filename, int* width, int* height)
2930 auto fileStream = new jpeg_decoder_file_stream();
2931 if (!fileStream->open(filename)) return nullptr;
2933 auto decoder = new jpeg_decoder(fileStream);
2934 if (decoder->get_error_code() != JPGD_SUCCESS) {
2939 if (width) *width = decoder->get_width();
2940 if (height) *height = decoder->get_height();
2946 void jpgdDelete(jpeg_decoder* decoder)
2952 unsigned char* jpgdDecompress(jpeg_decoder* decoder)
2954 if (!decoder) return nullptr;
2956 int req_comps = 4; //TODO: fixed 4 channel components now?
2957 if ((req_comps != 1) && (req_comps != 3) && (req_comps != 4)) return nullptr;
2959 auto image_width = decoder->get_width();
2960 auto image_height = decoder->get_height();
2961 //auto actual_comps = decoder->get_num_components();
2963 if (decoder->begin_decoding() != JPGD_SUCCESS) return nullptr;
2965 const int dst_bpl = image_width * req_comps;
2966 uint8_t *pImage_data = (uint8_t*)malloc(dst_bpl * image_height);
2967 if (!pImage_data) return nullptr;
2969 for (int y = 0; y < image_height; y++) {
2970 const uint8_t* pScan_line;
2971 uint32_t scan_line_len;
2972 if (decoder->decode((const void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS) {
2977 uint8_t *pDst = pImage_data + y * dst_bpl;
2980 if ((req_comps == 4) && (decoder->get_num_components() == 3)) {
2981 for (int x = 0; x < image_width; x++) {
2982 pDst[0] = pScan_line[x*4+2];
2983 pDst[1] = pScan_line[x*4+1];
2984 pDst[2] = pScan_line[x*4+0];
2988 } else if (((req_comps == 1) && (decoder->get_num_components() == 1)) || ((req_comps == 4) && (decoder->get_num_components() == 3))) {
2989 memcpy(pDst, pScan_line, dst_bpl);
2990 } else if (decoder->get_num_components() == 1) {
2991 if (req_comps == 3) {
2992 for (int x = 0; x < image_width; x++) {
2993 uint8_t luma = pScan_line[x];
3000 for (int x = 0; x < image_width; x++) {
3001 uint8_t luma = pScan_line[x];
3009 } else if (decoder->get_num_components() == 3) {
3010 if (req_comps == 1) {
3011 const int YR = 19595, YG = 38470, YB = 7471;
3012 for (int x = 0; x < image_width; x++) {
3013 int r = pScan_line[x*4+0];
3014 int g = pScan_line[x*4+1];
3015 int b = pScan_line[x*4+2];
3016 *pDst++ = static_cast<uint8_t>((r * YR + g * YG + b * YB + 32768) >> 16);
3019 for (int x = 0; x < image_width; x++) {
3020 pDst[0] = pScan_line[x*4+0];
3021 pDst[1] = pScan_line[x*4+1];
3022 pDst[2] = pScan_line[x*4+2];