4 * This file was part of the Independent JPEG Group's software:
5 * Copyright (C) 1995-1997, Thomas G. Lane.
6 * libjpeg-turbo Modifications:
7 * Copyright (C) 2011, 2015, 2018, 2021-2022, D. R. Commander.
8 * Copyright (C) 2016, 2018, Matthieu Darbois.
9 * Copyright (C) 2020, Arm Limited.
10 * Copyright (C) 2021, Alex Richardson.
11 * For conditions of distribution and use, see the accompanying README.ijg
14 * This file contains Huffman entropy encoding routines for progressive JPEG.
16 * We do not support output suspension in this module, since the library
17 * currently does not allow multiple-scan files to be written with output
21 #define JPEG_INTERNALS
25 #include "jconfigint.h"
31 #ifdef HAVE_BITSCANFORWARD64
32 #pragma intrinsic(_BitScanForward64)
34 #ifdef HAVE_BITSCANFORWARD
35 #pragma intrinsic(_BitScanForward)
40 #ifdef C_PROGRESSIVE_SUPPORTED
43 * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be
44 * used for bit counting rather than the lookup table. This will reduce the
45 * memory footprint by 64k, which is important for some mobile applications
46 * that create many isolated instances of libjpeg-turbo (web browsers, for
47 * instance.) This may improve performance on some mobile platforms as well.
48 * This feature is enabled by default only on Arm processors, because some x86
49 * chips have a slow implementation of bsr, and the use of clz/bsr cannot be
50 * shown to have a significant performance impact even on the x86 chips that
51 * have a fast implementation of it. When building for Armv6, you can
52 * explicitly disable the use of clz/bsr by adding -mthumb to the compiler
53 * flags (this defines __thumb__).
56 /* NOTE: Both GCC and Clang define __GNUC__ */
57 #if (defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))) || \
58 defined(_M_ARM) || defined(_M_ARM64)
59 #if !defined(__thumb__) || defined(__thumb2__)
60 #define USE_CLZ_INTRINSIC
64 #ifdef USE_CLZ_INTRINSIC
65 #if defined(_MSC_VER) && !defined(__clang__)
66 #define JPEG_NBITS_NONZERO(x) (32 - _CountLeadingZeros(x))
68 #define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x))
70 #define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0)
72 #include "jpeg_nbits_table.h"
73 #define JPEG_NBITS(x) (jpeg_nbits_table[x])
74 #define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x)
78 /* Expanded entropy encoder object for progressive Huffman encoding. */
81 struct jpeg_entropy_encoder pub; /* public fields */
83 /* Pointer to routine to prepare data for encode_mcu_AC_first() */
84 void (*AC_first_prepare) (const JCOEF *block,
85 const int *jpeg_natural_order_start, int Sl,
86 int Al, JCOEF *values, size_t *zerobits);
87 /* Pointer to routine to prepare data for encode_mcu_AC_refine() */
88 int (*AC_refine_prepare) (const JCOEF *block,
89 const int *jpeg_natural_order_start, int Sl,
90 int Al, JCOEF *absvalues, size_t *bits);
92 /* Mode flag: TRUE for optimization, FALSE for actual data output */
93 boolean gather_statistics;
95 /* Bit-level coding status.
96 * next_output_byte/free_in_buffer are local copies of cinfo->dest fields.
98 JOCTET *next_output_byte; /* => next byte to write in buffer */
99 size_t free_in_buffer; /* # of byte spaces remaining in buffer */
100 size_t put_buffer; /* current bit-accumulation buffer */
101 int put_bits; /* # of bits now in it */
102 j_compress_ptr cinfo; /* link to cinfo (needed for dump_buffer) */
104 /* Coding status for DC components */
105 int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
107 /* Coding status for AC components */
108 int ac_tbl_no; /* the table number of the single component */
109 unsigned int EOBRUN; /* run length of EOBs */
110 unsigned int BE; /* # of buffered correction bits before MCU */
111 char *bit_buffer; /* buffer for correction bits (1 per char) */
112 /* packing correction bits tightly would save some space but cost time... */
114 unsigned int restarts_to_go; /* MCUs left in this restart interval */
115 int next_restart_num; /* next restart number to write (0-7) */
117 /* Pointers to derived tables (these workspaces have image lifespan).
118 * Since any one scan codes only DC or only AC, we only need one set
119 * of tables, not one for DC and one for AC.
121 c_derived_tbl *derived_tbls[NUM_HUFF_TBLS];
123 /* Statistics tables for optimization; again, one set is enough */
124 long *count_ptrs[NUM_HUFF_TBLS];
125 } phuff_entropy_encoder;
127 typedef phuff_entropy_encoder *phuff_entropy_ptr;
129 /* MAX_CORR_BITS is the number of bits the AC refinement correction-bit
130 * buffer can hold. Larger sizes may slightly improve compression, but
131 * 1000 is already well into the realm of overkill.
132 * The minimum safe size is 64 bits.
135 #define MAX_CORR_BITS 1000 /* Max # of correction bits I can buffer */
137 /* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than JLONG.
138 * We assume that int right shift is unsigned if JLONG right shift is,
139 * which should be safe.
142 #ifdef RIGHT_SHIFT_IS_UNSIGNED
143 #define ISHIFT_TEMPS int ishift_temp;
144 #define IRIGHT_SHIFT(x, shft) \
145 ((ishift_temp = (x)) < 0 ? \
146 (ishift_temp >> (shft)) | ((~0) << (16 - (shft))) : \
147 (ishift_temp >> (shft)))
150 #define IRIGHT_SHIFT(x, shft) ((x) >> (shft))
153 #define PAD(v, p) ((v + (p) - 1) & (~((p) - 1)))
155 /* Forward declarations */
156 METHODDEF(boolean) encode_mcu_DC_first(j_compress_ptr cinfo,
157 JBLOCKROW *MCU_data);
158 METHODDEF(void) encode_mcu_AC_first_prepare
159 (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
160 JCOEF *values, size_t *zerobits);
161 METHODDEF(boolean) encode_mcu_AC_first(j_compress_ptr cinfo,
162 JBLOCKROW *MCU_data);
163 METHODDEF(boolean) encode_mcu_DC_refine(j_compress_ptr cinfo,
164 JBLOCKROW *MCU_data);
165 METHODDEF(int) encode_mcu_AC_refine_prepare
166 (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
167 JCOEF *absvalues, size_t *bits);
168 METHODDEF(boolean) encode_mcu_AC_refine(j_compress_ptr cinfo,
169 JBLOCKROW *MCU_data);
170 METHODDEF(void) finish_pass_phuff(j_compress_ptr cinfo);
171 METHODDEF(void) finish_pass_gather_phuff(j_compress_ptr cinfo);
174 /* Count bit loop zeroes */
177 count_zeroes(size_t *x)
179 #if defined(HAVE_BUILTIN_CTZL)
181 result = __builtin_ctzl(*x);
183 #elif defined(HAVE_BITSCANFORWARD64)
184 unsigned long result;
185 _BitScanForward64(&result, *x);
187 #elif defined(HAVE_BITSCANFORWARD)
188 unsigned long result;
189 _BitScanForward(&result, *x);
193 while ((*x & 1) == 0) {
203 * Initialize for a Huffman-compressed scan using progressive JPEG.
207 start_pass_phuff(j_compress_ptr cinfo, boolean gather_statistics)
209 phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
212 jpeg_component_info *compptr;
214 entropy->cinfo = cinfo;
215 entropy->gather_statistics = gather_statistics;
217 is_DC_band = (cinfo->Ss == 0);
219 /* We assume jcmaster.c already validated the scan parameters. */
221 /* Select execution routines */
222 if (cinfo->Ah == 0) {
224 entropy->pub.encode_mcu = encode_mcu_DC_first;
226 entropy->pub.encode_mcu = encode_mcu_AC_first;
227 if (jsimd_can_encode_mcu_AC_first_prepare())
228 entropy->AC_first_prepare = jsimd_encode_mcu_AC_first_prepare;
230 entropy->AC_first_prepare = encode_mcu_AC_first_prepare;
233 entropy->pub.encode_mcu = encode_mcu_DC_refine;
235 entropy->pub.encode_mcu = encode_mcu_AC_refine;
236 if (jsimd_can_encode_mcu_AC_refine_prepare())
237 entropy->AC_refine_prepare = jsimd_encode_mcu_AC_refine_prepare;
239 entropy->AC_refine_prepare = encode_mcu_AC_refine_prepare;
240 /* AC refinement needs a correction bit buffer */
241 if (entropy->bit_buffer == NULL)
242 entropy->bit_buffer = (char *)
243 (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
244 MAX_CORR_BITS * sizeof(char));
247 if (gather_statistics)
248 entropy->pub.finish_pass = finish_pass_gather_phuff;
250 entropy->pub.finish_pass = finish_pass_phuff;
252 /* Only DC coefficients may be interleaved, so cinfo->comps_in_scan = 1
253 * for AC coefficients.
255 for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
256 compptr = cinfo->cur_comp_info[ci];
257 /* Initialize DC predictions to 0 */
258 entropy->last_dc_val[ci] = 0;
259 /* Get table index */
261 if (cinfo->Ah != 0) /* DC refinement needs no table */
263 tbl = compptr->dc_tbl_no;
265 entropy->ac_tbl_no = tbl = compptr->ac_tbl_no;
267 if (gather_statistics) {
268 /* Check for invalid table index */
269 /* (make_c_derived_tbl does this in the other path) */
270 if (tbl < 0 || tbl >= NUM_HUFF_TBLS)
271 ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tbl);
272 /* Allocate and zero the statistics tables */
273 /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */
274 if (entropy->count_ptrs[tbl] == NULL)
275 entropy->count_ptrs[tbl] = (long *)
276 (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
278 memset(entropy->count_ptrs[tbl], 0, 257 * sizeof(long));
280 /* Compute derived values for Huffman table */
281 /* We may do this more than once for a table, but it's not expensive */
282 jpeg_make_c_derived_tbl(cinfo, is_DC_band, tbl,
283 &entropy->derived_tbls[tbl]);
287 /* Initialize AC stuff */
291 /* Initialize bit buffer to empty */
292 entropy->put_buffer = 0;
293 entropy->put_bits = 0;
295 /* Initialize restart stuff */
296 entropy->restarts_to_go = cinfo->restart_interval;
297 entropy->next_restart_num = 0;
301 /* Outputting bytes to the file.
302 * NB: these must be called only when actually outputting,
303 * that is, entropy->gather_statistics == FALSE.
307 #define emit_byte(entropy, val) { \
308 *(entropy)->next_output_byte++ = (JOCTET)(val); \
309 if (--(entropy)->free_in_buffer == 0) \
310 dump_buffer(entropy); \
315 dump_buffer(phuff_entropy_ptr entropy)
316 /* Empty the output buffer; we do not support suspension in this module. */
318 struct jpeg_destination_mgr *dest = entropy->cinfo->dest;
320 if (!(*dest->empty_output_buffer) (entropy->cinfo))
321 ERREXIT(entropy->cinfo, JERR_CANT_SUSPEND);
322 /* After a successful buffer dump, must reset buffer pointers */
323 entropy->next_output_byte = dest->next_output_byte;
324 entropy->free_in_buffer = dest->free_in_buffer;
328 /* Outputting bits to the file */
330 /* Only the right 24 bits of put_buffer are used; the valid bits are
331 * left-justified in this part. At most 16 bits can be passed to emit_bits
332 * in one call, and we never retain more than 7 bits in put_buffer
333 * between calls, so 24 bits are sufficient.
337 emit_bits(phuff_entropy_ptr entropy, unsigned int code, int size)
338 /* Emit some bits, unless we are in gather mode */
340 /* This routine is heavily used, so it's worth coding tightly. */
341 register size_t put_buffer = (size_t)code;
342 register int put_bits = entropy->put_bits;
344 /* if size is 0, caller used an invalid Huffman table entry */
346 ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
348 if (entropy->gather_statistics)
349 return; /* do nothing if we're only getting stats */
351 put_buffer &= (((size_t)1) << size) - 1; /* mask off any extra bits in code */
353 put_bits += size; /* new number of bits in buffer */
355 put_buffer <<= 24 - put_bits; /* align incoming bits */
357 put_buffer |= entropy->put_buffer; /* and merge with old buffer contents */
359 while (put_bits >= 8) {
360 int c = (int)((put_buffer >> 16) & 0xFF);
362 emit_byte(entropy, c);
363 if (c == 0xFF) { /* need to stuff a zero byte? */
364 emit_byte(entropy, 0);
370 entropy->put_buffer = put_buffer; /* update variables */
371 entropy->put_bits = put_bits;
376 flush_bits(phuff_entropy_ptr entropy)
378 emit_bits(entropy, 0x7F, 7); /* fill any partial byte with ones */
379 entropy->put_buffer = 0; /* and reset bit-buffer to empty */
380 entropy->put_bits = 0;
385 * Emit (or just count) a Huffman symbol.
389 emit_symbol(phuff_entropy_ptr entropy, int tbl_no, int symbol)
391 if (entropy->gather_statistics)
392 entropy->count_ptrs[tbl_no][symbol]++;
394 c_derived_tbl *tbl = entropy->derived_tbls[tbl_no];
395 emit_bits(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]);
401 * Emit bits from a correction bit buffer.
405 emit_buffered_bits(phuff_entropy_ptr entropy, char *bufstart,
408 if (entropy->gather_statistics)
409 return; /* no real work */
412 emit_bits(entropy, (unsigned int)(*bufstart), 1);
420 * Emit any pending EOBRUN symbol.
424 emit_eobrun(phuff_entropy_ptr entropy)
426 register int temp, nbits;
428 if (entropy->EOBRUN > 0) { /* if there is any pending EOBRUN */
429 temp = entropy->EOBRUN;
430 nbits = JPEG_NBITS_NONZERO(temp) - 1;
431 /* safety check: shouldn't happen given limited correction-bit buffer */
433 ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
435 emit_symbol(entropy, entropy->ac_tbl_no, nbits << 4);
437 emit_bits(entropy, entropy->EOBRUN, nbits);
441 /* Emit any buffered correction bits */
442 emit_buffered_bits(entropy, entropy->bit_buffer, entropy->BE);
449 * Emit a restart marker & resynchronize predictions.
453 emit_restart(phuff_entropy_ptr entropy, int restart_num)
457 emit_eobrun(entropy);
459 if (!entropy->gather_statistics) {
461 emit_byte(entropy, 0xFF);
462 emit_byte(entropy, JPEG_RST0 + restart_num);
465 if (entropy->cinfo->Ss == 0) {
466 /* Re-initialize DC predictions to 0 */
467 for (ci = 0; ci < entropy->cinfo->comps_in_scan; ci++)
468 entropy->last_dc_val[ci] = 0;
470 /* Re-initialize all AC-related fields to 0 */
478 * MCU encoding for DC initial scan (either spectral selection,
479 * or first pass of successive approximation).
483 encode_mcu_DC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
485 phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
486 register int temp, temp2, temp3;
491 jpeg_component_info *compptr;
494 entropy->next_output_byte = cinfo->dest->next_output_byte;
495 entropy->free_in_buffer = cinfo->dest->free_in_buffer;
497 /* Emit restart marker if needed */
498 if (cinfo->restart_interval)
499 if (entropy->restarts_to_go == 0)
500 emit_restart(entropy, entropy->next_restart_num);
502 /* Encode the MCU data blocks */
503 for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
504 block = MCU_data[blkn];
505 ci = cinfo->MCU_membership[blkn];
506 compptr = cinfo->cur_comp_info[ci];
508 /* Compute the DC value after the required point transform by Al.
509 * This is simply an arithmetic right shift.
511 temp2 = IRIGHT_SHIFT((int)((*block)[0]), Al);
513 /* DC differences are figured on the point-transformed values. */
514 temp = temp2 - entropy->last_dc_val[ci];
515 entropy->last_dc_val[ci] = temp2;
517 /* Encode the DC coefficient difference per section G.1.2.1 */
519 /* This is a well-known technique for obtaining the absolute value without
520 * a branch. It is derived from an assembly language technique presented
521 * in "How to Optimize for the Pentium Processors", Copyright (c) 1996,
524 temp3 = temp >> (CHAR_BIT * sizeof(int) - 1);
526 temp -= temp3; /* temp is abs value of input */
527 /* For a negative input, want temp2 = bitwise complement of abs(input) */
528 temp2 = temp ^ temp3;
530 /* Find the number of bits needed for the magnitude of the coefficient */
531 nbits = JPEG_NBITS(temp);
532 /* Check for out-of-range coefficient values.
533 * Since we're encoding a difference, the range limit is twice as much.
535 if (nbits > MAX_COEF_BITS + 1)
536 ERREXIT(cinfo, JERR_BAD_DCT_COEF);
538 /* Count/emit the Huffman-coded symbol for the number of bits */
539 emit_symbol(entropy, compptr->dc_tbl_no, nbits);
541 /* Emit that number of bits of the value, if positive, */
542 /* or the complement of its magnitude, if negative. */
543 if (nbits) /* emit_bits rejects calls with size 0 */
544 emit_bits(entropy, (unsigned int)temp2, nbits);
547 cinfo->dest->next_output_byte = entropy->next_output_byte;
548 cinfo->dest->free_in_buffer = entropy->free_in_buffer;
550 /* Update restart-interval state too */
551 if (cinfo->restart_interval) {
552 if (entropy->restarts_to_go == 0) {
553 entropy->restarts_to_go = cinfo->restart_interval;
554 entropy->next_restart_num++;
555 entropy->next_restart_num &= 7;
557 entropy->restarts_to_go--;
565 * Data preparation for encode_mcu_AC_first().
568 #define COMPUTE_ABSVALUES_AC_FIRST(Sl) { \
569 for (k = 0; k < Sl; k++) { \
570 temp = block[jpeg_natural_order_start[k]]; \
573 /* We must apply the point transform by Al. For AC coefficients this \
574 * is an integer division with rounding towards 0. To do this portably \
575 * in C, we shift after obtaining the absolute value; so the code is \
576 * interwoven with finding the abs value (temp) and output bits (temp2). \
578 temp2 = temp >> (CHAR_BIT * sizeof(int) - 1); \
580 temp -= temp2; /* temp is abs value of input */ \
581 temp >>= Al; /* apply the point transform */ \
582 /* Watch out for case that nonzero coef is zero after point transform */ \
585 /* For a negative coef, want temp2 = bitwise complement of abs(coef) */ \
587 values[k] = (JCOEF)temp; \
588 values[k + DCTSIZE2] = (JCOEF)temp2; \
589 zerobits |= ((size_t)1U) << k; \
594 encode_mcu_AC_first_prepare(const JCOEF *block,
595 const int *jpeg_natural_order_start, int Sl,
596 int Al, JCOEF *values, size_t *bits)
598 register int k, temp, temp2;
599 size_t zerobits = 0U;
602 #if SIZEOF_SIZE_T == 4
607 COMPUTE_ABSVALUES_AC_FIRST(Sl0);
610 #if SIZEOF_SIZE_T == 4
615 jpeg_natural_order_start += 32;
618 COMPUTE_ABSVALUES_AC_FIRST(Sl);
625 * MCU encoding for AC initial scan (either spectral selection,
626 * or first pass of successive approximation).
629 #define ENCODE_COEFS_AC_FIRST(label) { \
631 r = count_zeroes(&zerobits); \
635 temp2 = cvalue[DCTSIZE2]; \
637 /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \
639 emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \
643 /* Find the number of bits needed for the magnitude of the coefficient */ \
644 nbits = JPEG_NBITS_NONZERO(temp); /* there must be at least one 1 bit */ \
645 /* Check for out-of-range coefficient values */ \
646 if (nbits > MAX_COEF_BITS) \
647 ERREXIT(cinfo, JERR_BAD_DCT_COEF); \
649 /* Count/emit Huffman symbol for run length / number of bits */ \
650 emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + nbits); \
652 /* Emit that number of bits of the value, if positive, */ \
653 /* or the complement of its magnitude, if negative. */ \
654 emit_bits(entropy, (unsigned int)temp2, nbits); \
662 encode_mcu_AC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
664 phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
665 register int temp, temp2;
666 register int nbits, r;
667 int Sl = cinfo->Se - cinfo->Ss + 1;
669 JCOEF values_unaligned[2 * DCTSIZE2 + 15];
673 size_t bits[8 / SIZEOF_SIZE_T];
675 entropy->next_output_byte = cinfo->dest->next_output_byte;
676 entropy->free_in_buffer = cinfo->dest->free_in_buffer;
678 /* Emit restart marker if needed */
679 if (cinfo->restart_interval)
680 if (entropy->restarts_to_go == 0)
681 emit_restart(entropy, entropy->next_restart_num);
684 cvalue = values = (JCOEF *)PAD((JUINTPTR)values_unaligned, 16);
686 /* Not using SIMD, so alignment is not needed */
687 cvalue = values = values_unaligned;
691 entropy->AC_first_prepare(MCU_data[0][0], jpeg_natural_order + cinfo->Ss,
692 Sl, Al, values, bits);
695 #if SIZEOF_SIZE_T == 4
699 /* Emit any pending EOBRUN */
700 if (zerobits && (entropy->EOBRUN > 0))
701 emit_eobrun(entropy);
703 #if SIZEOF_SIZE_T == 4
707 /* Encode the AC coefficients per section G.1.2.2, fig. G.3 */
709 ENCODE_COEFS_AC_FIRST((void)0;);
711 #if SIZEOF_SIZE_T == 4
714 int diff = ((values + DCTSIZE2 / 2) - cvalue);
715 r = count_zeroes(&zerobits);
718 goto first_iter_ac_first;
721 ENCODE_COEFS_AC_FIRST(first_iter_ac_first:);
724 if (cvalue < (values + Sl)) { /* If there are trailing zeroes, */
725 entropy->EOBRUN++; /* count an EOB */
726 if (entropy->EOBRUN == 0x7FFF)
727 emit_eobrun(entropy); /* force it out to avoid overflow */
730 cinfo->dest->next_output_byte = entropy->next_output_byte;
731 cinfo->dest->free_in_buffer = entropy->free_in_buffer;
733 /* Update restart-interval state too */
734 if (cinfo->restart_interval) {
735 if (entropy->restarts_to_go == 0) {
736 entropy->restarts_to_go = cinfo->restart_interval;
737 entropy->next_restart_num++;
738 entropy->next_restart_num &= 7;
740 entropy->restarts_to_go--;
748 * MCU encoding for DC successive approximation refinement scan.
749 * Note: we assume such scans can be multi-component, although the spec
750 * is not very clear on the point.
754 encode_mcu_DC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
756 phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
762 entropy->next_output_byte = cinfo->dest->next_output_byte;
763 entropy->free_in_buffer = cinfo->dest->free_in_buffer;
765 /* Emit restart marker if needed */
766 if (cinfo->restart_interval)
767 if (entropy->restarts_to_go == 0)
768 emit_restart(entropy, entropy->next_restart_num);
770 /* Encode the MCU data blocks */
771 for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
772 block = MCU_data[blkn];
774 /* We simply emit the Al'th bit of the DC coefficient value. */
776 emit_bits(entropy, (unsigned int)(temp >> Al), 1);
779 cinfo->dest->next_output_byte = entropy->next_output_byte;
780 cinfo->dest->free_in_buffer = entropy->free_in_buffer;
782 /* Update restart-interval state too */
783 if (cinfo->restart_interval) {
784 if (entropy->restarts_to_go == 0) {
785 entropy->restarts_to_go = cinfo->restart_interval;
786 entropy->next_restart_num++;
787 entropy->next_restart_num &= 7;
789 entropy->restarts_to_go--;
797 * Data preparation for encode_mcu_AC_refine().
800 #define COMPUTE_ABSVALUES_AC_REFINE(Sl, koffset) { \
801 /* It is convenient to make a pre-pass to determine the transformed \
802 * coefficients' absolute values and the EOB position. \
804 for (k = 0; k < Sl; k++) { \
805 temp = block[jpeg_natural_order_start[k]]; \
806 /* We must apply the point transform by Al. For AC coefficients this \
807 * is an integer division with rounding towards 0. To do this portably \
808 * in C, we shift after obtaining the absolute value. \
810 temp2 = temp >> (CHAR_BIT * sizeof(int) - 1); \
812 temp -= temp2; /* temp is abs value of input */ \
813 temp >>= Al; /* apply the point transform */ \
815 zerobits |= ((size_t)1U) << k; \
816 signbits |= ((size_t)(temp2 + 1)) << k; \
818 absvalues[k] = (JCOEF)temp; /* save abs value for main pass */ \
820 EOB = k + koffset; /* EOB = index of last newly-nonzero coef */ \
825 encode_mcu_AC_refine_prepare(const JCOEF *block,
826 const int *jpeg_natural_order_start, int Sl,
827 int Al, JCOEF *absvalues, size_t *bits)
829 register int k, temp, temp2;
831 size_t zerobits = 0U, signbits = 0U;
834 #if SIZEOF_SIZE_T == 4
839 COMPUTE_ABSVALUES_AC_REFINE(Sl0, 0);
842 #if SIZEOF_SIZE_T == 8
852 jpeg_natural_order_start += 32;
855 COMPUTE_ABSVALUES_AC_REFINE(Sl, 32);
867 * MCU encoding for AC successive approximation refinement scan.
870 #define ENCODE_COEFS_AC_REFINE(label) { \
872 idx = count_zeroes(&zerobits); \
877 /* Emit any required ZRLs, but not if they can be folded into EOB */ \
878 while (r > 15 && (cabsvalue <= EOBPTR)) { \
879 /* emit any pending EOBRUN and the BE correction bits */ \
880 emit_eobrun(entropy); \
882 emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \
884 /* Emit buffered correction bits that must be associated with ZRL */ \
885 emit_buffered_bits(entropy, BR_buffer, BR); \
886 BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ \
890 temp = *cabsvalue++; \
892 /* If the coef was previously nonzero, it only needs a correction bit. \
893 * NOTE: a straight translation of the spec's figure G.7 would suggest \
894 * that we also need to test r > 15. But if r > 15, we can only get here \
895 * if k > EOB, which implies that this coefficient is not 1. \
898 /* The correction bit is the next bit of the absolute value. */ \
899 BR_buffer[BR++] = (char)(temp & 1); \
905 /* Emit any pending EOBRUN and the BE correction bits */ \
906 emit_eobrun(entropy); \
908 /* Count/emit Huffman symbol for run length / number of bits */ \
909 emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + 1); \
911 /* Emit output bit for newly-nonzero coef */ \
912 temp = signbits & 1; /* ((*block)[jpeg_natural_order_start[k]] < 0) ? 0 : 1 */ \
913 emit_bits(entropy, (unsigned int)temp, 1); \
915 /* Emit buffered correction bits that must be associated with this code */ \
916 emit_buffered_bits(entropy, BR_buffer, BR); \
917 BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ \
919 r = 0; /* reset zero run length */ \
926 encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
928 phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
929 register int temp, r, idx;
932 int Sl = cinfo->Se - cinfo->Ss + 1;
934 JCOEF absvalues_unaligned[DCTSIZE2 + 15];
936 const JCOEF *cabsvalue, *EOBPTR;
937 size_t zerobits, signbits;
938 size_t bits[16 / SIZEOF_SIZE_T];
940 entropy->next_output_byte = cinfo->dest->next_output_byte;
941 entropy->free_in_buffer = cinfo->dest->free_in_buffer;
943 /* Emit restart marker if needed */
944 if (cinfo->restart_interval)
945 if (entropy->restarts_to_go == 0)
946 emit_restart(entropy, entropy->next_restart_num);
949 cabsvalue = absvalues = (JCOEF *)PAD((JUINTPTR)absvalues_unaligned, 16);
951 /* Not using SIMD, so alignment is not needed */
952 cabsvalue = absvalues = absvalues_unaligned;
957 entropy->AC_refine_prepare(MCU_data[0][0], jpeg_natural_order + cinfo->Ss,
958 Sl, Al, absvalues, bits);
960 /* Encode the AC coefficients per section G.1.2.3, fig. G.7 */
962 r = 0; /* r = run length of zeros */
963 BR = 0; /* BR = count of buffered bits added now */
964 BR_buffer = entropy->bit_buffer + entropy->BE; /* Append bits to buffer */
967 #if SIZEOF_SIZE_T == 8
972 ENCODE_COEFS_AC_REFINE((void)0;);
974 #if SIZEOF_SIZE_T == 4
979 int diff = ((absvalues + DCTSIZE2 / 2) - cabsvalue);
980 idx = count_zeroes(&zerobits);
985 goto first_iter_ac_refine;
988 ENCODE_COEFS_AC_REFINE(first_iter_ac_refine:);
991 r |= (int)((absvalues + Sl) - cabsvalue);
993 if (r > 0 || BR > 0) { /* If there are trailing zeroes, */
994 entropy->EOBRUN++; /* count an EOB */
995 entropy->BE += BR; /* concat my correction bits to older ones */
996 /* We force out the EOB if we risk either:
997 * 1. overflow of the EOB counter;
998 * 2. overflow of the correction bit buffer during the next MCU.
1000 if (entropy->EOBRUN == 0x7FFF ||
1001 entropy->BE > (MAX_CORR_BITS - DCTSIZE2 + 1))
1002 emit_eobrun(entropy);
1005 cinfo->dest->next_output_byte = entropy->next_output_byte;
1006 cinfo->dest->free_in_buffer = entropy->free_in_buffer;
1008 /* Update restart-interval state too */
1009 if (cinfo->restart_interval) {
1010 if (entropy->restarts_to_go == 0) {
1011 entropy->restarts_to_go = cinfo->restart_interval;
1012 entropy->next_restart_num++;
1013 entropy->next_restart_num &= 7;
1015 entropy->restarts_to_go--;
1023 * Finish up at the end of a Huffman-compressed progressive scan.
1027 finish_pass_phuff(j_compress_ptr cinfo)
1029 phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
1031 entropy->next_output_byte = cinfo->dest->next_output_byte;
1032 entropy->free_in_buffer = cinfo->dest->free_in_buffer;
1034 /* Flush out any buffered data */
1035 emit_eobrun(entropy);
1036 flush_bits(entropy);
1038 cinfo->dest->next_output_byte = entropy->next_output_byte;
1039 cinfo->dest->free_in_buffer = entropy->free_in_buffer;
1044 * Finish up a statistics-gathering pass and create the new Huffman tables.
1048 finish_pass_gather_phuff(j_compress_ptr cinfo)
1050 phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
1053 jpeg_component_info *compptr;
1054 JHUFF_TBL **htblptr;
1055 boolean did[NUM_HUFF_TBLS];
1057 /* Flush out buffered data (all we care about is counting the EOB symbol) */
1058 emit_eobrun(entropy);
1060 is_DC_band = (cinfo->Ss == 0);
1062 /* It's important not to apply jpeg_gen_optimal_table more than once
1063 * per table, because it clobbers the input frequency counts!
1065 memset(did, 0, sizeof(did));
1067 for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
1068 compptr = cinfo->cur_comp_info[ci];
1070 if (cinfo->Ah != 0) /* DC refinement needs no table */
1072 tbl = compptr->dc_tbl_no;
1074 tbl = compptr->ac_tbl_no;
1078 htblptr = &cinfo->dc_huff_tbl_ptrs[tbl];
1080 htblptr = &cinfo->ac_huff_tbl_ptrs[tbl];
1081 if (*htblptr == NULL)
1082 *htblptr = jpeg_alloc_huff_table((j_common_ptr)cinfo);
1083 jpeg_gen_optimal_table(cinfo, *htblptr, entropy->count_ptrs[tbl]);
1091 * Module initialization routine for progressive Huffman entropy encoding.
1095 jinit_phuff_encoder(j_compress_ptr cinfo)
1097 phuff_entropy_ptr entropy;
1100 entropy = (phuff_entropy_ptr)
1101 (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
1102 sizeof(phuff_entropy_encoder));
1103 cinfo->entropy = (struct jpeg_entropy_encoder *)entropy;
1104 entropy->pub.start_pass = start_pass_phuff;
1106 /* Mark tables unallocated */
1107 for (i = 0; i < NUM_HUFF_TBLS; i++) {
1108 entropy->derived_tbls[i] = NULL;
1109 entropy->count_ptrs[i] = NULL;
1111 entropy->bit_buffer = NULL; /* needed only in AC refinement scan */
1114 #endif /* C_PROGRESSIVE_SUPPORTED */