3 * @author Mathis Rosenhauer, Deutsches Klimarechenzentrum
6 * Adaptive Entropy Encoder
7 * Based on CCSDS documents 121.0-B-2 and 120.0-G-2
24 #include "encode_accessors.h"
26 /* Marker for Remainder Of Segment condition in zero block encoding */
29 static int m_get_block(struct aec_stream *strm);
30 static int m_get_block_cautious(struct aec_stream *strm);
31 static int m_check_zero_block(struct aec_stream *strm);
32 static int m_select_code_option(struct aec_stream *strm);
33 static int m_flush_block(struct aec_stream *strm);
34 static int m_flush_block_cautious(struct aec_stream *strm);
35 static int m_encode_splitting(struct aec_stream *strm);
36 static int m_encode_uncomp(struct aec_stream *strm);
37 static int m_encode_se(struct aec_stream *strm);
38 static int m_encode_zero(struct aec_stream *strm);
40 static inline void emit(struct internal_state *state,
41 uint32_t data, int bits)
44 Emit sequence of bits.
47 if (bits <= state->bit_p) {
49 *state->cds_p += data << state->bit_p;
52 *state->cds_p++ += (uint64_t)data >> bits;
56 *state->cds_p++ = data >> bits;
59 state->bit_p = 8 - bits;
60 *state->cds_p = data << state->bit_p;
64 static inline void emitfs(struct internal_state *state, int fs)
67 Emits a fundamental sequence.
69 fs zero bits followed by one 1 bit.
73 if (fs < state->bit_p) {
74 state->bit_p -= fs + 1;
75 *state->cds_p += 1 << state->bit_p;
85 #define EMITBLOCK(ref) \
86 static inline void emitblock_##ref(struct aec_stream *strm, \
91 struct internal_state *state = strm->state; \
92 uint32_t *in = state->block_p + ref; \
93 uint32_t *in_end = state->block_p + strm->block_size; \
94 uint64_t mask = (1ULL << k) - 1; \
95 uint8_t *o = state->cds_p; \
96 int p = state->bit_p; \
100 while(in < in_end) { \
104 while (p > k && in < in_end) { \
106 a += ((uint64_t)(*in++) & mask) << p; \
109 for (b = 56; b > (p & ~7); b -= 8) \
116 state->bit_p = p % 8; \
122 static void preprocess_unsigned(struct aec_stream *strm)
125 struct internal_state *state = strm->state;
126 uint32_t *x = state->block_buf;
128 uint32_t xmax = state->xmax;
129 uint32_t rsi = strm->rsi * strm->block_size - 1;
142 if (d <= xmax - x1) {
154 static void preprocess_signed(struct aec_stream *strm)
158 struct internal_state *state = strm->state;
159 uint32_t *buf = state->block_buf;
160 uint32_t m = 1ULL << (strm->bit_per_sample - 1);
161 int64_t x1 = (((int64_t)*buf++) ^ m) - m;
162 int64_t xmax = state->xmax;
163 int64_t xmin = state->xmin;
164 uint32_t rsi = strm->rsi * strm->block_size - 1;
167 x = (((int64_t)*buf) ^ m) - m;
192 static int m_get_block(struct aec_stream *strm)
194 struct internal_state *state = strm->state;
196 if (strm->avail_out > state->cds_len) {
197 if (!state->direct_out) {
198 state->direct_out = 1;
199 *strm->next_out = *state->cds_p;
200 state->cds_p = strm->next_out;
203 if (state->zero_blocks == 0 || state->direct_out) {
204 /* copy leftover from last block */
205 *state->cds_buf = *state->cds_p;
206 state->cds_p = state->cds_buf;
208 state->direct_out = 0;
211 if (state->blocks_avail == 0) {
213 state->block_p = state->block_buf;
215 if (strm->avail_in >= state->block_len * strm->rsi) {
216 state->get_rsi(strm);
217 state->blocks_avail = strm->rsi - 1;
219 if (strm->flags & AEC_DATA_PREPROCESS)
220 state->preprocess(strm);
222 return m_check_zero_block(strm);
225 state->mode = m_get_block_cautious;
229 state->block_p += strm->block_size;
230 state->blocks_avail--;
231 return m_check_zero_block(strm);
236 static int m_get_block_cautious(struct aec_stream *strm)
239 struct internal_state *state = strm->state;
242 if (strm->avail_in > 0) {
243 state->block_buf[state->i] = state->get_sample(strm);
245 if (state->flush == AEC_FLUSH) {
247 for (j = state->i; j < strm->rsi * strm->block_size; j++)
248 state->block_buf[j] = state->block_buf[state->i - 1];
249 state->i = strm->rsi * strm->block_size;
251 if (state->zero_blocks) {
252 state->mode = m_encode_zero;
256 emit(state, 0, state->bit_p);
257 if (state->direct_out == 0)
258 *strm->next_out++ = *state->cds_p;
268 } while (++state->i < strm->rsi * strm->block_size);
270 state->blocks_avail = strm->rsi - 1;
271 if (strm->flags & AEC_DATA_PREPROCESS)
272 state->preprocess(strm);
274 return m_check_zero_block(strm);
277 static int m_check_zero_block(struct aec_stream *strm)
279 struct internal_state *state = strm->state;
280 uint32_t *p = state->block_p + state->ref;
281 uint32_t *end = state->block_p + strm->block_size;
283 while(*p == 0 && p < end)
287 if (state->zero_blocks) {
288 /* The current block isn't zero but we have to emit a
289 * previous zero block first. The current block will be
292 state->block_p -= strm->block_size;
293 state->blocks_avail++;
294 state->mode = m_encode_zero;
297 state->mode = m_select_code_option;
300 state->zero_blocks++;
301 if (state->zero_blocks == 1) {
302 state->zero_ref = state->ref;
303 state->zero_ref_sample = state->block_p[0];
305 if (state->blocks_avail == 0
306 || (strm->rsi - state->blocks_avail) % 64 == 0) {
307 if (state->zero_blocks > 4)
308 state->zero_blocks = ROS;
309 state->mode = m_encode_zero;
312 state->mode = m_get_block;
317 static uint64_t block_fs(struct aec_stream *strm, int k)
321 struct internal_state *state = strm->state;
323 fs = (uint64_t)(state->block_p[1] >> k)
324 + (uint64_t)(state->block_p[2] >> k)
325 + (uint64_t)(state->block_p[3] >> k)
326 + (uint64_t)(state->block_p[4] >> k)
327 + (uint64_t)(state->block_p[5] >> k)
328 + (uint64_t)(state->block_p[6] >> k)
329 + (uint64_t)(state->block_p[7] >> k);
331 if (strm->block_size > 8)
332 for (j = 8; j < strm->block_size; j += 8)
334 (uint64_t)(state->block_p[j + 0] >> k)
335 + (uint64_t)(state->block_p[j + 1] >> k)
336 + (uint64_t)(state->block_p[j + 2] >> k)
337 + (uint64_t)(state->block_p[j + 3] >> k)
338 + (uint64_t)(state->block_p[j + 4] >> k)
339 + (uint64_t)(state->block_p[j + 5] >> k)
340 + (uint64_t)(state->block_p[j + 6] >> k)
341 + (uint64_t)(state->block_p[j + 7] >> k);
344 fs += (uint64_t)(state->block_p[0] >> k);
349 static int count_splitting_option(struct aec_stream *strm)
352 Find the best point for splitting samples in a block.
354 In Rice coding each sample in a block of samples is split at
355 the same position into k LSB and bit_per_sample - k MSB. The
356 LSB part is left binary and the MSB part is coded as a
357 fundamental sequence a.k.a. unary (see CCSDS 121.0-B-2). The
358 function of the length of the Coded Data Set (CDS) depending on
359 k has exactly one minimum (see A. Kiely, IPN Progress Report
362 To find that minimum with only a few costly evaluations of the
363 CDS length, we start with the k of the previous CDS. K is
364 increased and the CDS length evaluated. If the CDS length gets
365 smaller, then we are moving towards the minimum. If the length
366 increases, then the minimum will be found with smaller k.
368 For increasing k we know that we will gain block_size bits in
369 length through the larger binary part. If the FS lenth is less
370 than the block size then a reduced FS part can't compensate the
371 larger binary part. So we know that the CDS for k+1 will be
372 larger than for k without actually computing the length. An
373 analogue check can be done for decreasing k.
377 int this_bs; /* Block size of current block */
378 int no_turn; /* 1 if we shouldn't reverse */
379 int dir; /* Direction, 1 means increasing k, 0 decreasing k */
380 uint64_t len; /* CDS length for current k */
381 uint64_t len_min; /* CDS length minimum so far */
382 uint64_t fs_len; /* Length of FS part (not including 1s) */
384 struct internal_state *state = strm->state;
386 this_bs = strm->block_size - state->ref;
387 len_min = UINT64_MAX;
388 k = k_min = state->k;
389 no_turn = (k == 0) ? 1 : 0;
393 fs_len = block_fs(strm, k);
394 len = fs_len + this_bs * (k + 1);
397 if (len_min < UINT64_MAX)
404 if (fs_len < this_bs || k >= state->kmax) {
414 if (fs_len >= this_bs || k == 0)
431 static int count_se_option(uint64_t limit, struct aec_stream *strm)
435 struct internal_state *state = strm->state;
439 for (i = 0; i < strm->block_size; i+= 2) {
440 d = (uint64_t)state->block_p[i]
441 + (uint64_t)state->block_p[i + 1];
442 /* we have to worry about overflow here */
447 len += d * (d + 1) / 2
448 + (uint64_t)state->block_p[i + 1];
454 static int m_select_code_option(struct aec_stream *strm)
456 uint64_t uncomp_len, split_len, se_len;
457 struct internal_state *state = strm->state;
459 uncomp_len = (strm->block_size - state->ref)
460 * strm->bit_per_sample;
461 split_len = count_splitting_option(strm);
462 se_len = count_se_option(split_len, strm);
464 if (split_len < uncomp_len) {
465 if (split_len < se_len)
466 return m_encode_splitting(strm);
468 return m_encode_se(strm);
470 if (uncomp_len <= se_len)
471 return m_encode_uncomp(strm);
473 return m_encode_se(strm);
477 static int m_encode_splitting(struct aec_stream *strm)
480 struct internal_state *state = strm->state;
483 emit(state, k + 1, state->id_len);
487 emit(state, state->block_p[0], strm->bit_per_sample);
488 for (i = 1; i < strm->block_size; i++)
489 emitfs(state, state->block_p[i] >> k);
491 emitblock_1(strm, k);
495 for (i = 0; i < strm->block_size; i++)
496 emitfs(state, state->block_p[i] >> k);
498 emitblock_0(strm, k);
501 return m_flush_block(strm);
504 static int m_encode_uncomp(struct aec_stream *strm)
506 struct internal_state *state = strm->state;
508 emit(state, (1U << state->id_len) - 1, state->id_len);
509 emitblock_0(strm, strm->bit_per_sample);
511 return m_flush_block(strm);
514 static int m_encode_se(struct aec_stream *strm)
518 struct internal_state *state = strm->state;
520 emit(state, 1, state->id_len + 1);
522 emit(state, state->block_p[0], strm->bit_per_sample);
524 for (i = 0; i < strm->block_size; i+= 2) {
525 d = state->block_p[i] + state->block_p[i + 1];
526 emitfs(state, d * (d + 1) / 2 + state->block_p[i + 1]);
529 return m_flush_block(strm);
532 static int m_encode_zero(struct aec_stream *strm)
534 struct internal_state *state = strm->state;
536 emit(state, 0, state->id_len + 1);
539 emit(state, state->zero_ref_sample, strm->bit_per_sample);
541 if (state->zero_blocks == ROS)
543 else if (state->zero_blocks >= 5)
544 emitfs(state, state->zero_blocks);
546 emitfs(state, state->zero_blocks - 1);
548 state->zero_blocks = 0;
549 return m_flush_block(strm);
552 static int m_flush_block(struct aec_stream *strm)
555 Flush block in direct_out mode by updating counters.
557 Fall back to slow flushing if in buffered mode.
560 struct internal_state *state = strm->state;
562 if (state->direct_out) {
563 n = state->cds_p - strm->next_out;
565 strm->avail_out -= n;
566 strm->total_out += n;
567 state->mode = m_get_block;
572 state->mode = m_flush_block_cautious;
576 static int m_flush_block_cautious(struct aec_stream *strm)
579 Slow and restartable flushing
581 struct internal_state *state = strm->state;
583 while(state->cds_buf + state->i < state->cds_p) {
584 if (strm->avail_out == 0)
587 *strm->next_out++ = state->cds_buf[state->i];
592 state->mode = m_get_block;
602 int aec_encode_init(struct aec_stream *strm)
604 struct internal_state *state;
606 if (strm->bit_per_sample > 32 || strm->bit_per_sample == 0)
607 return AEC_CONF_ERROR;
609 if (strm->block_size != 8
610 && strm->block_size != 16
611 && strm->block_size != 32
612 && strm->block_size != 64)
613 return AEC_CONF_ERROR;
615 if (strm->rsi > 4096)
616 return AEC_CONF_ERROR;
618 state = (struct internal_state *)malloc(sizeof(struct internal_state));
620 return AEC_MEM_ERROR;
622 memset(state, 0, sizeof(struct internal_state));
625 if (strm->bit_per_sample > 16) {
626 /* 24/32 input bit settings */
629 if (strm->bit_per_sample <= 24
630 && strm->flags & AEC_DATA_3BYTE) {
631 state->block_len = 3 * strm->block_size;
632 if (strm->flags & AEC_DATA_MSB) {
633 state->get_sample = get_msb_24;
634 state->get_rsi = get_rsi_msb_24;
636 state->get_sample = get_lsb_24;
637 state->get_rsi = get_rsi_lsb_24;
640 state->block_len = 4 * strm->block_size;
641 if (strm->flags & AEC_DATA_MSB) {
642 state->get_sample = get_msb_32;
643 state->get_rsi = get_rsi_msb_32;
645 state->get_sample = get_lsb_32;
646 state->get_rsi = get_rsi_lsb_32;
650 else if (strm->bit_per_sample > 8) {
651 /* 16 bit settings */
653 state->block_len = 2 * strm->block_size;
655 if (strm->flags & AEC_DATA_MSB) {
656 state->get_sample = get_msb_16;
657 state->get_rsi = get_rsi_msb_16;
659 state->get_sample = get_lsb_16;
660 state->get_rsi = get_rsi_lsb_16;
665 state->block_len = strm->block_size;
667 state->get_sample = get_8;
668 state->get_rsi = get_rsi_8;
671 if (strm->flags & AEC_DATA_SIGNED) {
672 state->xmin = -(1ULL << (strm->bit_per_sample - 1));
673 state->xmax = (1ULL << (strm->bit_per_sample - 1)) - 1;
674 state->preprocess = preprocess_signed;
677 state->xmax = (1ULL << strm->bit_per_sample) - 1;
678 state->preprocess = preprocess_unsigned;
681 state->kmax = (1U << state->id_len) - 3;
683 state->block_buf = (uint32_t *)malloc(strm->rsi
686 if (state->block_buf == NULL)
687 return AEC_MEM_ERROR;
689 state->block_p = state->block_buf;
691 /* Largest possible CDS according to specs */
692 state->cds_len = (5 + 64 * 32) / 8 + 3;
693 state->cds_buf = (uint8_t *)malloc(state->cds_len);
694 if (state->cds_buf == NULL)
695 return AEC_MEM_ERROR;
700 state->cds_p = state->cds_buf;
703 state->mode = m_get_block;
708 int aec_encode(struct aec_stream *strm, int flush)
711 Finite-state machine implementation of the adaptive entropy
715 struct internal_state *state;
717 state->flush = flush;
719 while (state->mode(strm) == M_CONTINUE);
721 if (state->direct_out) {
722 n = state->cds_p - strm->next_out;
724 strm->avail_out -= n;
725 strm->total_out += n;
727 *state->cds_buf = *state->cds_p;
728 state->cds_p = state->cds_buf;
729 state->direct_out = 0;
734 int aec_encode_end(struct aec_stream *strm)
736 struct internal_state *state = strm->state;
738 free(state->block_buf);
739 free(state->cds_buf);
744 int aec_buffer_encode(struct aec_stream *strm)
748 status = aec_encode_init(strm);
749 if (status != AEC_OK)
751 status = aec_encode(strm, AEC_FLUSH);
752 if (strm->avail_in > 0 || strm->avail_out == 0)
753 status = AEC_DATA_ERROR;
755 aec_encode_end(strm);