4 * @author Mathis Rosenhauer, Deutsches Klimarechenzentrum
5 * @author Moritz Hanke, Deutsches Klimarechenzentrum
6 * @author Joerg Behrens, Deutsches Klimarechenzentrum
7 * @author Luis Kornblueh, Max-Planck-Institut fuer Meteorologie
10 * Copyright 2012 - 2014
12 * Mathis Rosenhauer, Luis Kornblueh
16 * Deutsches Klimarechenzentrum GmbH Max-Planck-Institut fuer Meteorologie
17 * Bundesstr. 45a Bundesstr. 53
18 * 20146 Hamburg 20146 Hamburg
21 * All rights reserved.
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
27 * 1. Redistributions of source code must retain the above copyright
28 * notice, this list of conditions and the following disclaimer.
29 * 2. Redistributions in binary form must reproduce the above
30 * copyright notice, this list of conditions and the following
31 * disclaimer in the documentation and/or other materials provided
32 * with the distribution.
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
37 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
38 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
39 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
40 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
41 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
42 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
43 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
44 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
45 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * @section DESCRIPTION
49 * Adaptive Entropy Encoder
50 * Based on CCSDS documents 121.0-B-2 and 120.0-G-2
67 #include "encode_accessors.h"
69 static int m_get_block(struct aec_stream *strm);
71 static inline void emit(struct internal_state *state,
72 uint32_t data, int bits)
75 Emit sequence of bits.
78 if (bits <= state->bits) {
80 *state->cds += data << state->bits;
83 *state->cds++ += (uint64_t)data >> bits;
87 *state->cds++ = data >> bits;
90 state->bits = 8 - bits;
91 *state->cds = data << state->bits;
95 static inline void emitfs(struct internal_state *state, int fs)
98 Emits a fundamental sequence.
100 fs zero bits followed by one 1 bit.
104 if (fs < state->bits) {
105 state->bits -= fs + 1;
106 *state->cds += 1U << state->bits;
116 static inline void copy64(uint8_t *dst, uint64_t src)
128 static inline void emitblock_fs(struct aec_stream *strm, int k, int ref)
131 int used; /* used bits in 64 bit accumulator */
132 uint64_t acc; /* accumulator */
133 struct internal_state *state = strm->state;
135 acc = (uint64_t)*state->cds << 56;
136 used = 7 - state->bits;
138 for (i = ref; i < strm->block_size; i++) {
139 used += (state->block[i] >> k) + 1;
141 copy64(state->cds, acc);
146 acc |= 1ULL << (63 - used);
149 copy64(state->cds, acc);
150 state->cds += used >> 3;
151 state->bits = 7 - (used & 7);
154 static inline void emitblock(struct aec_stream *strm, int k, int ref)
157 Emit the k LSB of a whole block of input data.
161 struct internal_state *state = strm->state;
162 uint32_t *in = state->block + ref;
163 uint32_t *in_end = state->block + strm->block_size;
164 uint64_t mask = (1ULL << k) - 1;
165 uint8_t *o = state->cds;
174 while (p > k && in < in_end) {
176 a += ((uint64_t)(*in++) & mask) << p;
245 static void preprocess_unsigned(struct aec_stream *strm)
248 Preprocess RSI of unsigned samples.
250 Combining preprocessing and converting to uint32_t in one loop
251 is slower due to the data dependance on x_i-1.
255 struct internal_state *state = strm->state;
256 const uint32_t *restrict x = state->data_raw;
257 uint32_t *restrict d = state->data_pp;
258 uint32_t xmax = state->xmax;
259 uint32_t rsi = strm->rsi * strm->block_size - 1;
263 for (i = 0; i < rsi; i++) {
264 if (x[i + 1] >= x[i]) {
272 if (D <= xmax - x[i])
273 d[i + 1] = 2 * D - 1;
275 d[i + 1] = xmax - x[i + 1];
279 state->uncomp_len = (strm->block_size - 1) * strm->bits_per_sample;
282 static void preprocess_signed(struct aec_stream *strm)
285 Preprocess RSI of signed samples.
289 struct internal_state *state = strm->state;
290 uint32_t *restrict d = state->data_pp;
291 int32_t *restrict x = (int32_t *)state->data_raw;
292 uint64_t m = 1ULL << (strm->bits_per_sample - 1);
293 int64_t xmax = state->xmax;
294 int64_t xmin = state->xmin;
295 uint32_t rsi = strm->rsi * strm->block_size - 1;
298 d[0] = (uint32_t)x[0];
299 x[0] = (x[0] ^ m) - m;
301 for (i = 0; i < rsi; i++) {
302 x[i + 1] = (x[i + 1] ^ m) - m;
303 if (x[i + 1] < x[i]) {
304 D = (int64_t)x[i] - x[i + 1];
305 if (D <= xmax - x[i])
306 d[i + 1] = 2 * D - 1;
308 d[i + 1] = xmax - x[i + 1];
310 D = (int64_t)x[i + 1] - x[i];
311 if (D <= x[i] - xmin)
314 d[i + 1] = x[i + 1] - xmin;
318 state->uncomp_len = (strm->block_size - 1) * strm->bits_per_sample;
321 static inline uint64_t block_fs(struct aec_stream *strm, int k)
324 Sum FS of all samples in block for given splitting position.
329 struct internal_state *state = strm->state;
331 for (i = 0; i < strm->block_size; i++)
332 fs += (uint64_t)(state->block[i] >> k);
335 fs -= (uint64_t)(state->block[0] >> k);
340 static uint32_t assess_splitting_option(struct aec_stream *strm)
343 Length of CDS encoded with splitting option and optimal k.
345 In Rice coding each sample in a block of samples is split at
346 the same position into k LSB and bits_per_sample - k MSB. The
347 LSB part is left binary and the MSB part is coded as a
348 fundamental sequence a.k.a. unary (see CCSDS 121.0-B-2). The
349 function of the length of the Coded Data Set (CDS) depending on
350 k has exactly one minimum (see A. Kiely, IPN Progress Report
353 To find that minimum with only a few costly evaluations of the
354 CDS length, we start with the k of the previous CDS. K is
355 increased and the CDS length evaluated. If the CDS length gets
356 smaller, then we are moving towards the minimum. If the length
357 increases, then the minimum will be found with smaller k.
359 For increasing k we know that we will gain block_size bits in
360 length through the larger binary part. If the FS lenth is less
361 than the block size then a reduced FS part can't compensate the
362 larger binary part. So we know that the CDS for k+1 will be
363 larger than for k without actually computing the length. An
364 analogue check can be done for decreasing k.
369 int this_bs; /* Block size of current block */
370 int no_turn; /* 1 if we shouldn't reverse */
371 int dir; /* Direction, 1 means increasing k, 0 decreasing k */
372 uint64_t len; /* CDS length for current k */
373 uint64_t len_min; /* CDS length minimum so far */
374 uint64_t fs_len; /* Length of FS part (not including 1s) */
376 struct internal_state *state = strm->state;
378 this_bs = strm->block_size - state->ref;
379 len_min = UINT64_MAX;
380 k = k_min = state->k;
385 fs_len = block_fs(strm, k);
386 len = fs_len + this_bs * (k + 1);
389 if (len_min < UINT64_MAX)
396 if (fs_len < this_bs || k >= state->kmax) {
406 if (fs_len >= this_bs || k == 0)
423 static uint32_t assess_se_option(struct aec_stream *strm)
426 Length of CDS encoded with Second Extension option.
428 If length is above limit just return UINT32_MAX.
434 struct internal_state *state = strm->state;
438 for (i = 0; i < strm->block_size; i+= 2) {
439 d = (uint64_t)state->block[i]
440 + (uint64_t)state->block[i + 1];
441 /* we have to worry about overflow here */
442 if (d > state->uncomp_len) {
446 len += d * (d + 1) / 2 + state->block[i + 1];
452 static void init_output(struct aec_stream *strm)
455 Direct output to next_out if next_out can hold a Coded Data
456 Set, use internal buffer otherwise.
459 struct internal_state *state = strm->state;
461 if (strm->avail_out > CDSLEN) {
462 if (!state->direct_out) {
463 state->direct_out = 1;
464 *strm->next_out = *state->cds;
465 state->cds = strm->next_out;
468 if (state->zero_blocks == 0 || state->direct_out) {
469 /* copy leftover from last block */
470 *state->cds_buf = *state->cds;
471 state->cds = state->cds_buf;
473 state->direct_out = 0;
483 static int m_flush_block_resumable(struct aec_stream *strm)
486 Slow and restartable flushing
488 struct internal_state *state = strm->state;
490 int n = MIN(state->cds - state->cds_buf - state->i, strm->avail_out);
491 memcpy(strm->next_out, state->cds_buf + state->i, n);
493 strm->avail_out -= n;
496 if (strm->avail_out == 0) {
499 state->mode = m_get_block;
504 static int m_flush_block(struct aec_stream *strm)
507 Flush block in direct_out mode by updating counters.
509 Fall back to slow flushing if in buffered mode.
512 struct internal_state *state = strm->state;
514 #ifdef ENABLE_RSI_PADDING
515 if (state->blocks_avail == 0
516 && strm->flags & AEC_PAD_RSI
517 && state->block_nonzero == 0
519 emit(state, 0, state->bits % 8);
522 if (state->direct_out) {
523 n = state->cds - strm->next_out;
525 strm->avail_out -= n;
526 state->mode = m_get_block;
531 state->mode = m_flush_block_resumable;
535 static int m_encode_splitting(struct aec_stream *strm)
537 struct internal_state *state = strm->state;
540 emit(state, k + 1, state->id_len);
543 emit(state, state->block[0], strm->bits_per_sample);
545 emitblock_fs(strm, k, state->ref);
547 emitblock(strm, k, state->ref);
549 return m_flush_block(strm);
552 static int m_encode_uncomp(struct aec_stream *strm)
554 struct internal_state *state = strm->state;
556 emit(state, (1U << state->id_len) - 1, state->id_len);
557 emitblock(strm, strm->bits_per_sample, 0);
559 return m_flush_block(strm);
562 static int m_encode_se(struct aec_stream *strm)
566 struct internal_state *state = strm->state;
568 emit(state, 1, state->id_len + 1);
570 emit(state, state->block[0], strm->bits_per_sample);
572 for (i = 0; i < strm->block_size; i+= 2) {
573 d = state->block[i] + state->block[i + 1];
574 emitfs(state, d * (d + 1) / 2 + state->block[i + 1]);
577 return m_flush_block(strm);
580 static int m_encode_zero(struct aec_stream *strm)
582 struct internal_state *state = strm->state;
584 emit(state, 0, state->id_len + 1);
587 emit(state, state->zero_ref_sample, strm->bits_per_sample);
589 if (state->zero_blocks == ROS)
591 else if (state->zero_blocks >= 5)
592 emitfs(state, state->zero_blocks);
594 emitfs(state, state->zero_blocks - 1);
596 state->zero_blocks = 0;
597 return m_flush_block(strm);
600 static int m_select_code_option(struct aec_stream *strm)
603 Decide which code option to use.
608 struct internal_state *state = strm->state;
610 split_len = assess_splitting_option(strm);
611 se_len = assess_se_option(strm);
613 if (split_len < state->uncomp_len) {
614 if (split_len < se_len)
615 return m_encode_splitting(strm);
617 return m_encode_se(strm);
619 if (state->uncomp_len <= se_len)
620 return m_encode_uncomp(strm);
622 return m_encode_se(strm);
626 static int m_check_zero_block(struct aec_stream *strm)
629 Check if input block is all zero.
631 Aggregate consecutive zero blocks until we find !0 or reach the
632 end of a segment or RSI.
636 struct internal_state *state = strm->state;
637 uint32_t *p = state->block;
639 for (i = state->ref; i < strm->block_size; i++)
643 if (i < strm->block_size) {
644 if (state->zero_blocks) {
645 /* The current block isn't zero but we have to emit a
646 * previous zero block first. The current block will be
647 * flagged and handled later.
649 state->block_nonzero = 1;
650 state->mode = m_encode_zero;
653 state->mode = m_select_code_option;
656 state->zero_blocks++;
657 if (state->zero_blocks == 1) {
658 state->zero_ref = state->ref;
659 state->zero_ref_sample = state->block[0];
661 if (state->blocks_avail == 0
662 || (strm->rsi - state->blocks_avail) % 64 == 0) {
663 if (state->zero_blocks > 4)
664 state->zero_blocks = ROS;
665 state->mode = m_encode_zero;
668 state->mode = m_get_block;
673 static int m_get_rsi_resumable(struct aec_stream *strm)
676 Get RSI while input buffer is short.
678 Let user provide more input. Once we got all input pad buffer
682 struct internal_state *state = strm->state;
685 if (strm->avail_in >= state->bytes_per_sample) {
686 state->data_raw[state->i] = state->get_sample(strm);
688 if (state->flush == AEC_FLUSH) {
691 state->data_raw[state->i] =
692 state->data_raw[state->i - 1];
693 while(++state->i < strm->rsi * strm->block_size);
695 emit(state, 0, state->bits);
696 if (strm->avail_out > 0) {
697 if (!state->direct_out)
698 *strm->next_out++ = *state->cds;
707 } while (++state->i < strm->rsi * strm->block_size);
709 if (strm->flags & AEC_DATA_PREPROCESS)
710 state->preprocess(strm);
712 return m_check_zero_block(strm);
715 static int m_get_block(struct aec_stream *strm)
718 Provide the next block of preprocessed input data.
720 Pull in a whole Reference Sample Interval (RSI) of data if
721 block buffer is empty.
724 struct internal_state *state = strm->state;
728 if (state->block_nonzero) {
729 state->block_nonzero = 0;
730 state->mode = m_select_code_option;
734 if (state->blocks_avail == 0) {
735 state->blocks_avail = strm->rsi - 1;
736 state->block = state->data_pp;
738 if (strm->avail_in >= state->rsi_len) {
739 state->get_rsi(strm);
740 if (strm->flags & AEC_DATA_PREPROCESS)
741 state->preprocess(strm);
743 return m_check_zero_block(strm);
746 state->mode = m_get_rsi_resumable;
751 state->uncomp_len = strm->block_size * strm->bits_per_sample;
753 state->block += strm->block_size;
754 state->blocks_avail--;
755 return m_check_zero_block(strm);
766 int aec_encode_init(struct aec_stream *strm)
768 struct internal_state *state;
770 if (strm->bits_per_sample > 32 || strm->bits_per_sample == 0)
771 return AEC_CONF_ERROR;
773 if (strm->block_size != 8
774 && strm->block_size != 16
775 && strm->block_size != 32
776 && strm->block_size != 64)
777 return AEC_CONF_ERROR;
779 if (strm->rsi > 4096)
780 return AEC_CONF_ERROR;
782 state = malloc(sizeof(struct internal_state));
784 return AEC_MEM_ERROR;
786 memset(state, 0, sizeof(struct internal_state));
789 if (strm->bits_per_sample > 16) {
790 /* 24/32 input bit settings */
793 if (strm->bits_per_sample <= 24
794 && strm->flags & AEC_DATA_3BYTE) {
795 state->bytes_per_sample = 3;
796 if (strm->flags & AEC_DATA_MSB) {
797 state->get_sample = aec_get_msb_24;
798 state->get_rsi = aec_get_rsi_msb_24;
800 state->get_sample = aec_get_lsb_24;
801 state->get_rsi = aec_get_rsi_lsb_24;
804 state->bytes_per_sample = 4;
805 if (strm->flags & AEC_DATA_MSB) {
806 state->get_sample = aec_get_msb_32;
807 state->get_rsi = aec_get_rsi_msb_32;
809 state->get_sample = aec_get_lsb_32;
810 state->get_rsi = aec_get_rsi_lsb_32;
814 else if (strm->bits_per_sample > 8) {
815 /* 16 bit settings */
817 state->bytes_per_sample = 2;
819 if (strm->flags & AEC_DATA_MSB) {
820 state->get_sample = aec_get_msb_16;
821 state->get_rsi = aec_get_rsi_msb_16;
823 state->get_sample = aec_get_lsb_16;
824 state->get_rsi = aec_get_rsi_lsb_16;
828 if (strm->flags & AEC_RESTRICTED) {
829 if (strm->bits_per_sample <= 4) {
830 if (strm->bits_per_sample <= 2)
835 return AEC_CONF_ERROR;
840 state->bytes_per_sample = 1;
842 state->get_sample = aec_get_8;
843 state->get_rsi = aec_get_rsi_8;
845 state->rsi_len = strm->rsi * strm->block_size * state->bytes_per_sample;
847 if (strm->flags & AEC_DATA_SIGNED) {
848 state->xmin = -(1ULL << (strm->bits_per_sample - 1));
849 state->xmax = (1ULL << (strm->bits_per_sample - 1)) - 1;
850 state->preprocess = preprocess_signed;
853 state->xmax = (1ULL << strm->bits_per_sample) - 1;
854 state->preprocess = preprocess_unsigned;
857 state->kmax = (1U << state->id_len) - 3;
859 state->data_pp = malloc(strm->rsi
862 if (state->data_pp == NULL)
863 return AEC_MEM_ERROR;
865 if (strm->flags & AEC_DATA_PREPROCESS) {
866 state->data_raw = malloc(strm->rsi
869 if (state->data_raw == NULL)
870 return AEC_MEM_ERROR;
872 state->data_raw = state->data_pp;
875 state->block = state->data_pp;
880 state->cds = state->cds_buf;
883 state->mode = m_get_block;
888 int aec_encode(struct aec_stream *strm, int flush)
891 Finite-state machine implementation of the adaptive entropy
895 struct internal_state *state = strm->state;
897 state->flush = flush;
898 strm->total_in += strm->avail_in;
899 strm->total_out += strm->avail_out;
901 while (state->mode(strm) == M_CONTINUE);
903 if (state->direct_out) {
904 n = state->cds - strm->next_out;
906 strm->avail_out -= n;
908 *state->cds_buf = *state->cds;
909 state->cds = state->cds_buf;
910 state->direct_out = 0;
912 strm->total_in -= strm->avail_in;
913 strm->total_out -= strm->avail_out;
917 int aec_encode_end(struct aec_stream *strm)
919 struct internal_state *state = strm->state;
921 if (strm->flags & AEC_DATA_PREPROCESS)
922 free(state->data_raw);
923 free(state->data_pp);
928 int aec_buffer_encode(struct aec_stream *strm)
932 status = aec_encode_init(strm);
933 if (status != AEC_OK)
935 status = aec_encode(strm, AEC_FLUSH);
936 if (strm->avail_in > 0)
937 status = AEC_DATA_ERROR;
939 aec_encode_end(strm);