4 * @author Mathis Rosenhauer, Deutsches Klimarechenzentrum
5 * @author Moritz Hanke, Deutsches Klimarechenzentrum
6 * @author Joerg Behrens, Deutsches Klimarechenzentrum
7 * @author Luis Kornblueh, Max-Planck-Institut fuer Meteorologie
10 * Copyright 2012 - 2014
12 * Mathis Rosenhauer, Luis Kornblueh
16 * Deutsches Klimarechenzentrum GmbH Max-Planck-Institut fuer Meteorologie
17 * Bundesstr. 45a Bundesstr. 53
18 * 20146 Hamburg 20146 Hamburg
21 * All rights reserved.
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
27 * 1. Redistributions of source code must retain the above copyright
28 * notice, this list of conditions and the following disclaimer.
29 * 2. Redistributions in binary form must reproduce the above
30 * copyright notice, this list of conditions and the following
31 * disclaimer in the documentation and/or other materials provided
32 * with the distribution.
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
37 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
38 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
39 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
40 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
41 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
42 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
43 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
44 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
45 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * @section DESCRIPTION
49 * Adaptive Entropy Encoder
50 * Based on CCSDS documents 121.0-B-2 and 120.0-G-3
60 #include "encode_accessors.h"
62 static int m_get_block(struct aec_stream *strm);
64 static inline void emit(struct internal_state *state,
65 uint32_t data, int bits)
68 Emit sequence of bits.
71 if (bits <= state->bits) {
73 *state->cds += data << state->bits;
76 *state->cds++ += (uint64_t)data >> bits;
80 *state->cds++ = data >> bits;
83 state->bits = 8 - bits;
84 *state->cds = data << state->bits;
88 static inline void emitfs(struct internal_state *state, int fs)
91 Emits a fundamental sequence.
93 fs zero bits followed by one 1 bit.
97 if (fs < state->bits) {
98 state->bits -= fs + 1;
99 *state->cds += 1U << state->bits;
109 static inline void copy64(uint8_t *dst, uint64_t src)
121 static inline void emitblock_fs(struct aec_stream *strm, int k, int ref)
124 int used; /* used bits in 64 bit accumulator */
125 uint64_t acc; /* accumulator */
126 struct internal_state *state = strm->state;
128 acc = (uint64_t)*state->cds << 56;
129 used = 7 - state->bits;
131 for (i = ref; i < strm->block_size; i++) {
132 used += (state->block[i] >> k) + 1;
134 copy64(state->cds, acc);
139 acc |= 1ULL << (63 - used);
142 copy64(state->cds, acc);
143 state->cds += used >> 3;
144 state->bits = 7 - (used & 7);
147 static inline void emitblock(struct aec_stream *strm, int k, int ref)
150 Emit the k LSB of a whole block of input data.
154 struct internal_state *state = strm->state;
155 uint32_t *in = state->block + ref;
156 uint32_t *in_end = state->block + strm->block_size;
157 uint64_t mask = (1ULL << k) - 1;
158 uint8_t *o = state->cds;
167 while (p > k && in < in_end) {
169 a += ((uint64_t)(*in++) & mask) << p;
238 static void preprocess_unsigned(struct aec_stream *strm)
241 Preprocess RSI of unsigned samples.
243 Combining preprocessing and converting to uint32_t in one loop
244 is slower due to the data dependance on x_i-1.
248 struct internal_state *state = strm->state;
249 const uint32_t *restrict x = state->data_raw;
250 uint32_t *restrict d = state->data_pp;
251 uint32_t xmax = state->xmax;
252 uint32_t rsi = strm->rsi * strm->block_size - 1;
256 for (i = 0; i < rsi; i++) {
257 if (x[i + 1] >= x[i]) {
265 if (D <= xmax - x[i])
266 d[i + 1] = 2 * D - 1;
268 d[i + 1] = xmax - x[i + 1];
272 state->uncomp_len = (strm->block_size - 1) * strm->bits_per_sample;
275 static void preprocess_signed(struct aec_stream *strm)
278 Preprocess RSI of signed samples.
282 struct internal_state *state = strm->state;
283 uint32_t *restrict d = state->data_pp;
284 int32_t *restrict x = (int32_t *)state->data_raw;
285 uint64_t m = 1ULL << (strm->bits_per_sample - 1);
286 int64_t xmax = state->xmax;
287 int64_t xmin = state->xmin;
288 uint32_t rsi = strm->rsi * strm->block_size - 1;
291 d[0] = (uint32_t)x[0];
292 x[0] = (x[0] ^ m) - m;
294 for (i = 0; i < rsi; i++) {
295 x[i + 1] = (x[i + 1] ^ m) - m;
296 if (x[i + 1] < x[i]) {
297 D = (int64_t)x[i] - x[i + 1];
298 if (D <= xmax - x[i])
299 d[i + 1] = 2 * D - 1;
301 d[i + 1] = xmax - x[i + 1];
303 D = (int64_t)x[i + 1] - x[i];
304 if (D <= x[i] - xmin)
307 d[i + 1] = x[i + 1] - xmin;
311 state->uncomp_len = (strm->block_size - 1) * strm->bits_per_sample;
314 static inline uint64_t block_fs(struct aec_stream *strm, int k)
317 Sum FS of all samples in block for given splitting position.
322 struct internal_state *state = strm->state;
324 for (i = 0; i < strm->block_size; i++)
325 fs += (uint64_t)(state->block[i] >> k);
328 fs -= (uint64_t)(state->block[0] >> k);
333 static uint32_t assess_splitting_option(struct aec_stream *strm)
336 Length of CDS encoded with splitting option and optimal k.
338 In Rice coding each sample in a block of samples is split at
339 the same position into k LSB and bits_per_sample - k MSB. The
340 LSB part is left binary and the MSB part is coded as a
341 fundamental sequence a.k.a. unary (see CCSDS 121.0-B-2). The
342 function of the length of the Coded Data Set (CDS) depending on
343 k has exactly one minimum (see A. Kiely, IPN Progress Report
346 To find that minimum with only a few costly evaluations of the
347 CDS length, we start with the k of the previous CDS. K is
348 increased and the CDS length evaluated. If the CDS length gets
349 smaller, then we are moving towards the minimum. If the length
350 increases, then the minimum will be found with smaller k.
352 For increasing k we know that we will gain block_size bits in
353 length through the larger binary part. If the FS lenth is less
354 than the block size then a reduced FS part can't compensate the
355 larger binary part. So we know that the CDS for k+1 will be
356 larger than for k without actually computing the length. An
357 analogue check can be done for decreasing k.
362 int this_bs; /* Block size of current block */
363 int no_turn; /* 1 if we shouldn't reverse */
364 int dir; /* Direction, 1 means increasing k, 0 decreasing k */
365 uint64_t len; /* CDS length for current k */
366 uint64_t len_min; /* CDS length minimum so far */
367 uint64_t fs_len; /* Length of FS part (not including 1s) */
369 struct internal_state *state = strm->state;
371 this_bs = strm->block_size - state->ref;
372 len_min = UINT64_MAX;
373 k = k_min = state->k;
378 fs_len = block_fs(strm, k);
379 len = fs_len + this_bs * (k + 1);
382 if (len_min < UINT64_MAX)
389 if (fs_len < this_bs || k >= state->kmax) {
399 if (fs_len >= this_bs || k == 0)
416 static uint32_t assess_se_option(struct aec_stream *strm)
419 Length of CDS encoded with Second Extension option.
421 If length is above limit just return UINT32_MAX.
427 struct internal_state *state = strm->state;
431 for (i = 0; i < strm->block_size; i+= 2) {
432 d = (uint64_t)state->block[i]
433 + (uint64_t)state->block[i + 1];
434 /* we have to worry about overflow here */
435 if (d > state->uncomp_len) {
439 len += d * (d + 1) / 2 + state->block[i + 1] + 1;
445 static void init_output(struct aec_stream *strm)
448 Direct output to next_out if next_out can hold a Coded Data
449 Set, use internal buffer otherwise.
452 struct internal_state *state = strm->state;
454 if (strm->avail_out > CDSLEN) {
455 if (!state->direct_out) {
456 state->direct_out = 1;
457 *strm->next_out = *state->cds;
458 state->cds = strm->next_out;
461 if (state->zero_blocks == 0 || state->direct_out) {
462 /* copy leftover from last block */
463 *state->cds_buf = *state->cds;
464 state->cds = state->cds_buf;
466 state->direct_out = 0;
476 static int m_flush_block_resumable(struct aec_stream *strm)
479 Slow and restartable flushing
481 struct internal_state *state = strm->state;
483 int n = MIN(state->cds - state->cds_buf - state->i, strm->avail_out);
484 memcpy(strm->next_out, state->cds_buf + state->i, n);
486 strm->avail_out -= n;
489 if (strm->avail_out == 0) {
492 state->mode = m_get_block;
497 static int m_flush_block(struct aec_stream *strm)
500 Flush block in direct_out mode by updating counters.
502 Fall back to slow flushing if in buffered mode.
505 struct internal_state *state = strm->state;
507 #ifdef ENABLE_RSI_PADDING
508 if (state->blocks_avail == 0
509 && strm->flags & AEC_PAD_RSI
510 && state->block_nonzero == 0
512 emit(state, 0, state->bits % 8);
515 if (state->direct_out) {
516 n = state->cds - strm->next_out;
518 strm->avail_out -= n;
519 state->mode = m_get_block;
524 state->mode = m_flush_block_resumable;
528 static int m_encode_splitting(struct aec_stream *strm)
530 struct internal_state *state = strm->state;
533 emit(state, k + 1, state->id_len);
536 emit(state, state->block[0], strm->bits_per_sample);
538 emitblock_fs(strm, k, state->ref);
540 emitblock(strm, k, state->ref);
542 return m_flush_block(strm);
545 static int m_encode_uncomp(struct aec_stream *strm)
547 struct internal_state *state = strm->state;
549 emit(state, (1U << state->id_len) - 1, state->id_len);
550 emitblock(strm, strm->bits_per_sample, 0);
552 return m_flush_block(strm);
555 static int m_encode_se(struct aec_stream *strm)
559 struct internal_state *state = strm->state;
561 emit(state, 1, state->id_len + 1);
563 emit(state, state->block[0], strm->bits_per_sample);
565 for (i = 0; i < strm->block_size; i+= 2) {
566 d = state->block[i] + state->block[i + 1];
567 emitfs(state, d * (d + 1) / 2 + state->block[i + 1]);
570 return m_flush_block(strm);
573 static int m_encode_zero(struct aec_stream *strm)
575 struct internal_state *state = strm->state;
577 emit(state, 0, state->id_len + 1);
580 emit(state, state->zero_ref_sample, strm->bits_per_sample);
582 if (state->zero_blocks == ROS)
584 else if (state->zero_blocks >= 5)
585 emitfs(state, state->zero_blocks);
587 emitfs(state, state->zero_blocks - 1);
589 state->zero_blocks = 0;
590 return m_flush_block(strm);
593 static int m_select_code_option(struct aec_stream *strm)
596 Decide which code option to use.
601 struct internal_state *state = strm->state;
603 if (state->id_len > 1)
604 split_len = assess_splitting_option(strm);
606 split_len = UINT32_MAX;
607 se_len = assess_se_option(strm);
609 if (split_len < state->uncomp_len) {
610 if (split_len < se_len)
611 return m_encode_splitting(strm);
613 return m_encode_se(strm);
615 if (state->uncomp_len <= se_len)
616 return m_encode_uncomp(strm);
618 return m_encode_se(strm);
622 static int m_check_zero_block(struct aec_stream *strm)
625 Check if input block is all zero.
627 Aggregate consecutive zero blocks until we find !0 or reach the
628 end of a segment or RSI.
632 struct internal_state *state = strm->state;
633 uint32_t *p = state->block;
635 for (i = state->ref; i < strm->block_size; i++)
639 if (i < strm->block_size) {
640 if (state->zero_blocks) {
641 /* The current block isn't zero but we have to emit a
642 * previous zero block first. The current block will be
643 * flagged and handled later.
645 state->block_nonzero = 1;
646 state->mode = m_encode_zero;
649 state->mode = m_select_code_option;
652 state->zero_blocks++;
653 if (state->zero_blocks == 1) {
654 state->zero_ref = state->ref;
655 state->zero_ref_sample = state->block[0];
657 if (state->blocks_avail == 0
658 || (strm->rsi - state->blocks_avail) % 64 == 0) {
659 if (state->zero_blocks > 4)
660 state->zero_blocks = ROS;
661 state->mode = m_encode_zero;
664 state->mode = m_get_block;
669 static int m_get_rsi_resumable(struct aec_stream *strm)
672 Get RSI while input buffer is short.
674 Let user provide more input. Once we got all input pad buffer
678 struct internal_state *state = strm->state;
681 if (strm->avail_in >= state->bytes_per_sample) {
682 state->data_raw[state->i] = state->get_sample(strm);
684 if (state->flush == AEC_FLUSH) {
686 state->blocks_avail = state->i / strm->block_size - 1;
687 if (state->i % strm->block_size)
688 state->blocks_avail++;
690 state->data_raw[state->i] =
691 state->data_raw[state->i - 1];
692 while(++state->i < strm->rsi * strm->block_size);
694 emit(state, 0, state->bits);
695 if (strm->avail_out > 0) {
696 if (!state->direct_out)
697 *strm->next_out++ = *state->cds;
706 } while (++state->i < strm->rsi * strm->block_size);
708 if (strm->flags & AEC_DATA_PREPROCESS)
709 state->preprocess(strm);
711 return m_check_zero_block(strm);
714 static int m_get_block(struct aec_stream *strm)
717 Provide the next block of preprocessed input data.
719 Pull in a whole Reference Sample Interval (RSI) of data if
720 block buffer is empty.
723 struct internal_state *state = strm->state;
727 if (state->block_nonzero) {
728 state->block_nonzero = 0;
729 state->mode = m_select_code_option;
733 if (state->blocks_avail == 0) {
734 state->blocks_avail = strm->rsi - 1;
735 state->block = state->data_pp;
737 if (strm->avail_in >= state->rsi_len) {
738 state->get_rsi(strm);
739 if (strm->flags & AEC_DATA_PREPROCESS)
740 state->preprocess(strm);
742 return m_check_zero_block(strm);
745 state->mode = m_get_rsi_resumable;
750 state->uncomp_len = strm->block_size * strm->bits_per_sample;
752 state->block += strm->block_size;
753 state->blocks_avail--;
754 return m_check_zero_block(strm);
765 int aec_encode_init(struct aec_stream *strm)
767 struct internal_state *state;
769 if (strm->bits_per_sample > 32 || strm->bits_per_sample == 0)
770 return AEC_CONF_ERROR;
772 if (strm->block_size != 8
773 && strm->block_size != 16
774 && strm->block_size != 32
775 && strm->block_size != 64)
776 return AEC_CONF_ERROR;
778 if (strm->rsi > 4096)
779 return AEC_CONF_ERROR;
781 state = malloc(sizeof(struct internal_state));
783 return AEC_MEM_ERROR;
785 memset(state, 0, sizeof(struct internal_state));
788 if (strm->bits_per_sample > 16) {
789 /* 24/32 input bit settings */
792 if (strm->bits_per_sample <= 24
793 && strm->flags & AEC_DATA_3BYTE) {
794 state->bytes_per_sample = 3;
795 if (strm->flags & AEC_DATA_MSB) {
796 state->get_sample = aec_get_msb_24;
797 state->get_rsi = aec_get_rsi_msb_24;
799 state->get_sample = aec_get_lsb_24;
800 state->get_rsi = aec_get_rsi_lsb_24;
803 state->bytes_per_sample = 4;
804 if (strm->flags & AEC_DATA_MSB) {
805 state->get_sample = aec_get_msb_32;
806 state->get_rsi = aec_get_rsi_msb_32;
808 state->get_sample = aec_get_lsb_32;
809 state->get_rsi = aec_get_rsi_lsb_32;
813 else if (strm->bits_per_sample > 8) {
814 /* 16 bit settings */
816 state->bytes_per_sample = 2;
818 if (strm->flags & AEC_DATA_MSB) {
819 state->get_sample = aec_get_msb_16;
820 state->get_rsi = aec_get_rsi_msb_16;
822 state->get_sample = aec_get_lsb_16;
823 state->get_rsi = aec_get_rsi_lsb_16;
827 if (strm->flags & AEC_RESTRICTED) {
828 if (strm->bits_per_sample <= 4) {
829 if (strm->bits_per_sample <= 2)
834 return AEC_CONF_ERROR;
839 state->bytes_per_sample = 1;
841 state->get_sample = aec_get_8;
842 state->get_rsi = aec_get_rsi_8;
844 state->rsi_len = strm->rsi * strm->block_size * state->bytes_per_sample;
846 if (strm->flags & AEC_DATA_SIGNED) {
847 state->xmin = -(1ULL << (strm->bits_per_sample - 1));
848 state->xmax = (1ULL << (strm->bits_per_sample - 1)) - 1;
849 state->preprocess = preprocess_signed;
852 state->xmax = (1ULL << strm->bits_per_sample) - 1;
853 state->preprocess = preprocess_unsigned;
856 state->kmax = (1U << state->id_len) - 3;
858 state->data_pp = malloc(strm->rsi
861 if (state->data_pp == NULL)
862 return AEC_MEM_ERROR;
864 if (strm->flags & AEC_DATA_PREPROCESS) {
865 state->data_raw = malloc(strm->rsi
868 if (state->data_raw == NULL)
869 return AEC_MEM_ERROR;
871 state->data_raw = state->data_pp;
874 state->block = state->data_pp;
879 state->cds = state->cds_buf;
882 state->mode = m_get_block;
887 int aec_encode(struct aec_stream *strm, int flush)
890 Finite-state machine implementation of the adaptive entropy
894 struct internal_state *state = strm->state;
896 state->flush = flush;
897 strm->total_in += strm->avail_in;
898 strm->total_out += strm->avail_out;
900 while (state->mode(strm) == M_CONTINUE);
902 if (state->direct_out) {
903 n = state->cds - strm->next_out;
905 strm->avail_out -= n;
907 *state->cds_buf = *state->cds;
908 state->cds = state->cds_buf;
909 state->direct_out = 0;
911 strm->total_in -= strm->avail_in;
912 strm->total_out -= strm->avail_out;
916 int aec_encode_end(struct aec_stream *strm)
918 struct internal_state *state = strm->state;
920 if (strm->flags & AEC_DATA_PREPROCESS)
921 free(state->data_raw);
922 free(state->data_pp);
927 int aec_buffer_encode(struct aec_stream *strm)
931 status = aec_encode_init(strm);
932 if (status != AEC_OK)
934 status = aec_encode(strm, AEC_FLUSH);
935 if (strm->avail_in > 0)
936 status = AEC_DATA_ERROR;
938 aec_encode_end(strm);