Obvious performance enhancements for encoding
authorMathis Rosenhauer <rosenhauer@dkrz.de>
Wed, 15 Aug 2012 15:43:50 +0000 (17:43 +0200)
committerMathis Rosenhauer <rosenhauer@dkrz.de>
Fri, 24 Aug 2012 11:54:26 +0000 (13:54 +0200)
src/Makefile
src/aee.c
src/aee.h [new file with mode: 0644]
src/aee_mutators.c [new file with mode: 0644]
src/aee_mutators.h [new file with mode: 0644]
src/decode.c
src/encode.c
src/libae.h

index 7a90bbf..c3877cd 100644 (file)
@@ -1,9 +1,8 @@
 CC = gcc
-#CFLAGS = -g -pg -lc -O2 -Wall -fprofile-arcs -ftest-coverage -DPROFILE
-#CFLAGS = -g -pg -lc -O2 -Wall -fprofile-arcs -ftest-coverage -DUNROLL_BLOCK_8
-CFLAGS = -g -O3 -Wall
+#CFLAGS = -g -pg -lc -O0 -Wall -fprofile-arcs -ftest-coverage
+CFLAGS = -g -O3 -Wall -std=c99
 
-OBJS = aee.o aed.o sz_compat.o
+OBJS = aee.o aee_mutators.o aed.o sz_compat.o
 
 .PHONY : all clean check
 
@@ -23,7 +22,9 @@ libae.a: $(OBJS)
        -@ ($(RANLIB) $@ || true) >/dev/null 2>&1
 
 aed.o: libae.h
-aee.o: libae.h
+aee.o: aee_mutators.h aee.h libae.h
+aee_mutators.o: aee.h libae.h
+sz_compat.o: libae.h
 
 install: libae.a
        mkdir -p ../lib ../include
index 7cac51c..a404ec0 100644 (file)
--- a/src/aee.c
+++ b/src/aee.c
@@ -1,5 +1,12 @@
-/* Adaptive Entropy Encoder            */
-/* CCSDS 121.0-B-1 and CCSDS 120.0-G-2 */
+/**
+ * @file aee.c
+ * @author Mathis Rosenhauer, Deutsches Klimarechenzentrum
+ * @section DESCRIPTION
+ *
+ * Adaptive Entropy Encoder
+ * Based on CCSDS documents 121.0-B-1 and 120.0-G-2
+ *
+ */
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
 #include "libae.h"
+#include "aee.h"
+#include "aee_mutators.h"
 
+/* Marker for Remainder Of Segment condition in zero block encoding */
 #define ROS -1
 
 #define MIN(a, b) (((a) < (b))? (a): (b))
 #define MAX(a, b) (((a) > (b))? (a): (b))
 
-enum
-{
-    M_NEW_BLOCK,
-    M_GET_BLOCK,
-    M_CHECK_ZERO_BLOCK,
-    M_SELECT_CODE_OPTION,
-    M_ENCODE_SPLIT,
-    M_FLUSH_BLOCK,
-    M_FLUSH_BLOCK_LOOP,
-    M_ENCODE_UNCOMP,
-    M_ENCODE_SE,
-    M_ENCODE_ZERO,
-};
-
-typedef struct internal_state {
-    int id_len;             /* bit length of code option identification key */
-    int64_t last_in;        /* previous input for preprocessing */
-    int64_t (*get_sample)(ae_streamp);
-    int64_t xmin;           /* minimum integer for preprocessing */
-    int64_t xmax;           /* maximum integer for preprocessing */
-    int mode;               /* current mode of FSM */
-    int i;                  /* counter for samples */
-    int64_t *block_in;      /* input block buffer */
-    uint8_t *block_out;     /* output block buffer */
-    uint8_t *bp_out;        /* pointer to current output */
-    int64_t total_blocks;
-    int bitp;               /* bit pointer to the next unused bit in accumulator */
-    int block_deferred;     /* there is a block in the input buffer
-                               but we first have to emit a zero block */
-    int ref;                /* length of reference sample in current block
-                               i.e. 0 or 1 depending on whether the block has
-                               a reference sample or not */
-    int zero_ref;           /* current zero block has a reference sample */
-    int64_t zero_ref_sample;/* reference sample of zero block */
-    int zero_blocks;        /* number of contiguous zero blocks */
-#ifdef PROFILE
-    int *prof;
-#endif
-} encode_state;
-
-static int64_t get_lsb_32(ae_streamp strm)
-{
-    int64_t data;
-
-    data = (strm->next_in[3] << 24)
-        | (strm->next_in[2] << 16)
-        | (strm->next_in[1] << 8)
-        | strm->next_in[0];
-
-    strm->next_in += 4;
-    strm->total_in += 4;
-    strm->avail_in -= 4;
-    return data;
-}
-
-static int64_t get_lsb_16(ae_streamp strm)
-{
-    int64_t data;
-
-    data = (strm->next_in[1] << 8) | strm->next_in[0];
-
-    strm->next_in += 2;
-    strm->total_in += 2;
-    strm->avail_in -= 2;
-    return data;
-}
+static int m_get_block(ae_streamp strm);
+static int m_get_block_cautious(ae_streamp strm);
+static int m_check_zero_block(ae_streamp strm);
+static int m_select_code_option(ae_streamp strm);
+static int m_flush_block(ae_streamp strm);
+static int m_flush_block_cautious(ae_streamp strm);
+static int m_encode_splitting(ae_streamp strm);
+static int m_encode_uncomp(ae_streamp strm);
+static int m_encode_se(ae_streamp strm);
+static int m_encode_zero(ae_streamp strm);
+
+/*
+ *
+ * Bit emitters
+ *
+ */
 
-static int64_t get_msb_32(ae_streamp strm)
+static inline void emit(encode_state *state, uint64_t data, int bits)
 {
-    int64_t data;
-
-    data = (strm->next_in[0] << 24)
-        | (strm->next_in[1] << 16)
-        | (strm->next_in[2] << 8)
-        | strm->next_in[3];
-
-    strm->next_in += 4;
-    strm->total_in += 4;
-    strm->avail_in -= 4;
-    return data;
+    for(;;)
+    {
+        data &= ((1ULL << bits) - 1);
+        if (bits <= state->bitp)
+        {
+            state->bitp -= bits;
+            *state->out_bp += data << state->bitp;
+            break;
+        }
+        else
+        {
+            bits -= state->bitp;
+            *state->out_bp += data >> bits;
+            *++state->out_bp = 0;
+            state->bitp = 8;
+        }
+    }
 }
 
-static int64_t get_msb_16(ae_streamp strm)
+static inline void emitfs(encode_state *state, int fs)
 {
-    int64_t data;
-
-    data = (strm->next_in[0] << 8) | strm->next_in[1];
+    /**
+       Emits a fundamental sequence.
 
-    strm->next_in += 2;
-    strm->total_in += 2;
-    strm->avail_in -= 2;
-    return data;
-}
+       fs zero bits followed by one 1 bit.
+     */
 
-static int64_t get_8(ae_streamp strm)
-{
-    strm->avail_in--;
-    strm->total_in++;
-    return *strm->next_in++;
+    fs++;
+    for(;;)
+    {
+        if (fs <= state->bitp)
+        {
+            state->bitp -= fs;
+            *state->out_bp += 1 << state->bitp;
+            break;
+        }
+        else
+        {
+            fs -= state->bitp;
+            *++state->out_bp = 0;
+            state->bitp = 8;
+        }
+    }
 }
 
-int ae_encode_init(ae_streamp strm)
+static inline void preprocess(ae_streamp strm)
 {
-    int blklen;
-    encode_state *state;
+    int i;
+    int64_t theta, d, Delta;
+    encode_state *state = strm->state;
 
-    /* Some sanity checks */
-    if (strm->bit_per_sample > 32 || strm->bit_per_sample == 0)
+    /* If this is the first block between reference 
+       samples then we need to insert one.
+    */
+    if(state->in_total_blocks % strm->rsi == 0)
     {
-        return AE_ERRNO;
+        state->ref = 1;
+        state->last_in = state->in_block[0];
     }
-
-    /* Internal state for encoder */
-    state = (encode_state *) malloc(sizeof(encode_state));
-    if (state == NULL)
+    else
     {
-        return AE_MEM_ERROR;
+        state->ref = 0;
     }
-    strm->state = state;
 
-    if (strm->bit_per_sample > 16)
-    {
-        state->id_len = 5;
-        if (strm->flags & AE_DATA_MSB)
-            state->get_sample = get_msb_32;
-        else
-            state->get_sample = get_lsb_32;
-    }
-    else if (strm->bit_per_sample > 8)
+    for (i = state->ref; i < strm->block_size; i++)
     {
-        state->id_len = 4;
-        if (strm->flags & AE_DATA_MSB)
-            state->get_sample = get_msb_16;
+        theta = MIN(state->last_in - state->xmin,
+                    state->xmax - state->last_in);
+        Delta = state->in_block[i] - state->last_in;
+        state->last_in = state->in_block[i];
+        if (0 <= Delta && Delta <= theta)
+        {
+            state->in_block[i] = 2 * Delta;
+        }
+        else if (-theta <= Delta && Delta < 0)
+        {
+            d = Delta < 0 ? -(uint64_t)Delta : Delta;
+            state->in_block[i] = 2 * d - 1;
+        }
         else
-            state->get_sample = get_lsb_16;
-    }
-    else
-    {
-        state->id_len = 3;
-        state->get_sample = get_8;
+        {
+            state->in_block[i] = theta +
+                (Delta < 0 ? -(uint64_t)Delta : Delta);
+        }
     }
+}
 
-    if (strm->flags & AE_DATA_SIGNED)
+/*
+ *
+ * FSM functions
+ *
+ */
+
+static int m_get_block(ae_streamp strm)
+{
+    encode_state *state = strm->state;
+
+    if (strm->avail_out > state->out_blklen)
     {
-        state->xmin = -(1ULL << (strm->bit_per_sample - 1));
-        state->xmax = (1ULL << (strm->bit_per_sample - 1)) - 1;
+        if (!state->out_direct)
+        {
+            state->out_direct = 1;
+            *strm->next_out = *state->out_bp;
+            state->out_bp = strm->next_out;
+        }
     }
     else
     {
-        state->xmin = 0;
-        state->xmax = (1ULL << strm->bit_per_sample) - 1;
+        if (state->zero_blocks == 0 || state->out_direct)
+        {
+            /* copy leftover from last block */
+            *state->out_block = *state->out_bp;
+            state->out_bp = state->out_block;
+        }
+        state->out_direct = 0;
     }
 
-#ifdef PROFILE
-    state->prof = (int *)malloc((strm->bit_per_sample + 2) * sizeof(int));
-    if (state->prof == NULL)
+    if(state->block_deferred)
     {
-        return AE_MEM_ERROR;
+        state->block_deferred = 0;
+        state->mode = m_select_code_option;
+        return M_CONTINUE;
     }
-    memset(state->prof, 0, (strm->bit_per_sample + 2) * sizeof(int));
-#endif
 
-    state->block_in = (int64_t *)malloc(strm->block_size * sizeof(int64_t));
-    if (state->block_in == NULL)
+    if (strm->avail_in >= state->in_blklen)
     {
-        return AE_MEM_ERROR;
-    }
+        state->get_block(strm);
 
-    /* Largest possible block according to specs */
-    blklen = (5 + 16 * 32) / 8 + 3;
-    state->block_out = (uint8_t *)malloc(blklen);
-    if (state->block_out == NULL)
+        if (strm->flags & AE_DATA_PREPROCESS)
+            preprocess(strm);
+
+        state->in_total_blocks++;
+        return m_check_zero_block(strm);
+    }
+    else
     {
-        return AE_MEM_ERROR;
+        state->i = 0;
+        state->mode = m_get_block_cautious;
     }
-    state->bp_out = state->block_out;
-    *state->bp_out = 0;
-    state->bitp = 8;
-
-    strm->total_in = 0;
-    strm->total_out = 0;
-
-    state->mode = M_NEW_BLOCK;
-
-    state->total_blocks = 0;
-    state->block_deferred = 0;
-    state->zero_blocks = 0;
-    state->zero_ref = 0;
-    state->ref = 0;
-
-    return AE_OK;
+    return M_CONTINUE;
 }
 
-int ae_encode_end(ae_streamp strm)
+static int m_get_block_cautious(ae_streamp strm)
 {
-    encode_state *state;
-
-    state = strm->state;
-#ifdef PROFILE
-    free(state->prof);
-#endif
-    free(state->block_in);
-    free(state->block_out);
-    free(state);
-    return AE_OK;
-}
+    encode_state *state = strm->state;
 
-static inline void emit(encode_state *state, uint64_t data, int bits)
-{
-    while(bits)
+    do
     {
-        data &= ((1ULL << bits) - 1);
-        if (bits <= state->bitp)
+        if (strm->avail_in == 0)
         {
-            state->bitp -= bits;
-            *state->bp_out += data << state->bitp;
-            bits = 0;
+            if (state->flush == AE_FLUSH)
+            {
+                if (state->i > 0)
+                {
+                    /* pad block with last sample if we have
+                       a partial block */
+                    state->in_block[state->i] = state->in_block[state->i - 1];
+                }
+                else
+                {
+                    if (state->zero_blocks)
+                    {
+                        /* Output any remaining zero blocks */
+                        state->mode = m_encode_zero;
+                        return M_CONTINUE;
+                    }
+                    /* Pad last output byte with 0 bits
+                       if user wants to flush, i.e. we got
+                       all input there is.
+                    */
+                    emit(state, 0, state->bitp);
+                    if (state->out_direct == 0)
+                        *strm->next_out++ = *state->out_bp;
+                    strm->avail_out--;
+                    strm->total_out++;
+                    return M_EXIT;
+                }
+            }
+            else
+            {
+                return M_EXIT;
+            }
         }
         else
         {
-            bits -= state->bitp;
-            *state->bp_out += data >> bits;
-            *++state->bp_out = 0;
-            state->bitp = 8;
+            state->in_block[state->i] = state->get_sample(strm);
         }
     }
+    while (++state->i < strm->block_size);
+
+    if (strm->flags & AE_DATA_PREPROCESS)
+        preprocess(strm);
+
+    state->in_total_blocks++;
+    return m_check_zero_block(strm);
 }
 
-static inline void emitfs(encode_state *state, int fs)
+static inline int m_check_zero_block(ae_streamp strm)
 {
-    if (fs < 63)
-    {
-        emit(state, 1, fs + 1);
-    }
-    else
+    int i;
+    encode_state *state = strm->state;
+
+    i = state->ref;
+    while(i < strm->block_size && state->in_block[i] == 0)
+        i++;
+
+    if (i == strm->block_size)
     {
-        emit(state, 0, fs);
-        emit(state, 1, 1);
-    }
-}
+        /* remember ref on first zero block */
+        if (state->zero_blocks == 0)
+        {
+            state->zero_ref = state->ref;
+            state->zero_ref_sample = state->in_block[0];
+        }
 
-#ifdef PROFILE
-static inline void profile_print(ae_streamp strm)
-{
-    int i, total;
-    encode_state *state;
+        state->zero_blocks++;
 
-    state = strm->state;
-    fprintf(stderr, "Blocks encoded by each coding option\n");
-    fprintf(stderr, "Zero blocks:  %i\n", state->prof[0]);
-    total = state->prof[0];
-    fprintf(stderr, "Second Ext.:  %i\n", state->prof[strm->bit_per_sample+1]);
-    total += state->prof[strm->bit_per_sample+1];
-    fprintf(stderr, "FS:           %i\n", state->prof[1]);
-    total += state->prof[1];
-    for (i = 2; i < strm->bit_per_sample - 1; i++)
+        if (state->in_total_blocks % strm->rsi % 64 == 0)
+        {
+            if (state->zero_blocks > 4)
+                state->zero_blocks = ROS;
+            state->mode = m_encode_zero;
+            return M_CONTINUE;
+        }
+        state->mode = m_get_block;
+        return M_CONTINUE;
+    }
+    else if (state->zero_blocks)
     {
-        fprintf(stderr, "k = %02i:       %i\n", i-1, state->prof[i]);
-        total += state->prof[i];
+        /* The current block isn't zero but we have to
+           emit a previous zero block first. The
+           current block will be handled later.
+        */
+        state->block_deferred = 1;
+        state->mode = m_encode_zero;
+        return M_CONTINUE;
     }
-    fprintf(stderr, "Uncompressed: %i\n", state->prof[strm->bit_per_sample]);
-    total += state->prof[strm->bit_per_sample];
-    fprintf(stderr, "Total blocks: %i\n", total);
+    state->mode = m_select_code_option;
+    return M_CONTINUE;
 }
-#endif
 
-int ae_encode(ae_streamp strm, int flush)
+static inline int m_select_code_option(ae_streamp strm)
 {
-    /**
-       Finite-state machine implementation of the adaptive entropy
-       encoder.
-    */
-
-    int i, j, k, zb, this_bs;
-    int64_t split_len;
+    int i, k, this_bs, looked_bothways, direction;
+    int64_t d, split_len, uncomp_len;
     int64_t split_len_min, se_len, fs_len;
-    int64_t d;
-    int64_t theta, Delta;
-    size_t avail_out, total_out;
+    encode_state *state = strm->state;
 
-    encode_state *state;
+    /* Length of this block minus reference sample (if present) */
+    this_bs = strm->block_size - state->ref;
 
-    state = strm->state;
-    total_out = strm->total_out;
-    avail_out = strm->avail_out;
+    split_len_min = INT64_MAX;
+    i = state->k;
+    direction = 1;
+    looked_bothways = 0;
 
+    /* Starting with splitting position of last block look left
+       and possibly right to find new minimum.*/
     for (;;)
     {
-        switch(state->mode)
+        fs_len = (state->in_block[1] >> i)
+            + (state->in_block[2] >> i)
+            + (state->in_block[3] >> i)
+            + (state->in_block[4] >> i)
+            + (state->in_block[5] >> i)
+            + (state->in_block[6] >> i)
+            + (state->in_block[7] >> i);
+
+        if (state->ref == 0)
+            fs_len += (state->in_block[0] >> i);
+
+        if (strm->block_size == 16)
+            fs_len += (state->in_block[8] >> i)
+                + (state->in_block[9] >> i)
+                + (state->in_block[10] >> i)
+                + (state->in_block[11] >> i)
+                + (state->in_block[12] >> i)
+                + (state->in_block[13] >> i)
+                + (state->in_block[14] >> i)
+                + (state->in_block[15] >> i);
+
+        split_len = fs_len + this_bs * (i + 1);
+
+        if (split_len < split_len_min)
         {
-        case M_NEW_BLOCK:
-            if (state->zero_blocks == 0)
+            if (split_len_min < INT64_MAX)
             {
-                /* copy leftover from last block */
-                *state->block_out = *state->bp_out;
-                state->bp_out = state->block_out;
+                /* We are moving towards the minimum so it cant be in
+                   the other direction.*/
+                looked_bothways = 1;
             }
+            split_len_min = split_len;
+            k = i;
 
-            if(state->block_deferred)
+            if (direction == 1)
             {
-                state->block_deferred = 0;
-                state->mode = M_SELECT_CODE_OPTION;
-                break;
-            }
-
-            state->i = 0;
-            state->mode = M_GET_BLOCK;
-
-        case M_GET_BLOCK:
-            do
-            {
-                if (strm->avail_in == 0)
+                if (fs_len < this_bs)
                 {
-                    if (flush == AE_FLUSH)
+                    /* Next can't get better because what we lose by
+                       additional uncompressed bits isn't compensated by a
+                       smaller FS part. Vice versa if we are coming from
+                       the other direction.*/
+                    if (looked_bothways)
                     {
-                        if (state->i > 0)
-                        {
-                            /* pad block with last sample if we have
-                               a partial block */
-                            state->block_in[state->i] = state->block_in[state->i - 1];
-                        }
-                        else
-                        {
-                            if (state->zero_blocks)
-                            {
-                                /* Output any remaining zero blocks */
-                                state->mode=M_ENCODE_ZERO;
-                                break;
-                            }
-                            /* Pad last output byte with 0 bits
-                               if user wants to flush, i.e. we got
-                               all input there is.
-                            */
-                            emit(state, 0, state->bitp);
-                            *strm->next_out++ = *state->bp_out;
-                            avail_out--;
-                            total_out++;
-#ifdef PROFILE
-                            profile_print(strm);
-#endif
-                            goto req_buffer;
-                        }
+                        break;
                     }
                     else
                     {
-                        goto req_buffer;
+                        direction = -direction;
+                        looked_bothways = 1;
+                        i = state->k;
                     }
                 }
                 else
                 {
-                    state->block_in[state->i] = state->get_sample(strm);
+                    while (fs_len > 5 * this_bs)
+                    {
+                        i++;
+                        fs_len /= 5;
+                    }
                 }
             }
-            while (++state->i < strm->block_size);
-
-            state->total_blocks++;
-
-            /* preprocess block if needed */
-            if (strm->flags & AE_DATA_PREPROCESS)
+            else if (fs_len > this_bs)
             {
-                /* If this is the first block in a segment
-                   then we need to insert a reference sample.
-                */
-                if(state->total_blocks % strm->rsi == 1)
+                /* Since we started looking the other way there is no
+                   need to turn back.*/
+                break;
+            }
+        }
+        else
+        {
+            /* Stop looking for better option if we
+               don't see any improvement. */
+                if (looked_bothways)
                 {
-                    state->ref = 1;
-                    state->last_in = state->block_in[0];
+                    break;
                 }
                 else
                 {
-                    state->ref = 0;
+                    direction = -direction;
+                    looked_bothways = 1;
+                    i = state->k;
                 }
+        }
+        if (i + direction < 0
+            || i + direction >= strm->bit_per_sample - 2)
+        {
+            if (looked_bothways)
+                break;
 
-                for (i = state->ref; i < strm->block_size; i++)
-                {
-                    theta = MIN(state->last_in - state->xmin,
-                                state->xmax - state->last_in);
-                    Delta = state->block_in[i] - state->last_in;
-                    state->last_in = state->block_in[i];
-                    if (0 <= Delta && Delta <= theta)
-                    {
-                        state->block_in[i] = 2 * Delta;
-                    }
-                    else if (-theta <= Delta && Delta < 0)
-                    {
-                        d = Delta < 0 ? -(uint64_t)Delta : Delta;
-                        state->block_in[i] = 2 * d - 1;
-                    }
-                    else
-                    {
-                        state->block_in[i] = theta +
-                            (Delta < 0 ? -(uint64_t)Delta : Delta);
-                    }
-                }
-            }
-            state->mode = M_CHECK_ZERO_BLOCK;
+            direction = -direction;
+            looked_bothways = 1;
+            i = state->k;
+        }
 
-        case M_CHECK_ZERO_BLOCK:
-            zb = 1;
-            for (i = state->ref; i < strm->block_size && zb; i++)
-                if (state->block_in[i] != 0) zb = 0;
+        i += direction;
+    }
+    state->k = k;
 
-            if (zb)
-            {
-                /* remember ref on first zero block */
-                if (state->zero_blocks == 0)
-                {
-                    state->zero_ref = state->ref;
-                    state->zero_ref_sample = state->block_in[0];
-                }
+    /* Count bits for 2nd extension */
+    se_len = 1;
+    for (i = 0; i < strm->block_size; i+= 2)
+    {
+        d = state->in_block[i] + state->in_block[i + 1];
+        /* we have to worry about overflow here */
+        if (d > split_len_min)
+        {
+            se_len = d;
+            break;
+        }
+        else
+        {
+            se_len += d * (d + 1) / 2 + state->in_block[i + 1];
+        }
+    }
 
-                state->zero_blocks++;
+    /* Length of uncompressed block */
+    uncomp_len = this_bs * strm->bit_per_sample;
 
-                if (state->total_blocks % strm->rsi % 64 == 0)
-                {
-#ifdef PROFILE
-                    state->prof[0] += state->zero_blocks;
-#endif
-                    if (state->zero_blocks > 4)
-                        state->zero_blocks = ROS;
-                    state->mode = M_ENCODE_ZERO;
-                    break;
-                }
-                state->mode = M_NEW_BLOCK;
-                break;
-            }
-            else if (state->zero_blocks)
-            {
-#ifdef PROFILE
-                state->prof[0] += state->zero_blocks;
-#endif
-                state->mode = M_ENCODE_ZERO;
-                /* The current block isn't zero but we have to
-                   emit a previous zero block first. The
-                   current block has to be handled later.
-                */
-                state->block_deferred = 1;
-                break;
-            }
+    /* Decide which option to use */
+    if (split_len_min < uncomp_len)
+    {
+        if (split_len_min <= se_len)
+        {
+            /* Splitting won - the most common case. */
+            return m_encode_splitting(strm);
+        }
+        else
+        {
+            return m_encode_se(strm);
+        }
+    }
+    else
+    {
+        if (uncomp_len <= se_len)
+        {
+            return m_encode_uncomp(strm);
+        }
+        else
+        {
+            return m_encode_se(strm);
+        }
+    }
+}
 
-            state->mode = M_SELECT_CODE_OPTION;
+static inline int m_encode_splitting(ae_streamp strm)
+{
+    int i;
+    encode_state *state = strm->state;
+    int k = state->k;
 
-        case M_SELECT_CODE_OPTION:
-            /* If zero block isn't an option then count length of
-               sample splitting options */
+    emit(state, k + 1, state->id_len);
+    if (state->ref)
+        emit(state, state->in_block[0], strm->bit_per_sample);
 
-            /* Baseline is the size of an uncompressed block */
-            split_len_min = (strm->block_size - state->ref) * strm->bit_per_sample;
-            k = strm->bit_per_sample;
+    for (i = state->ref; i < strm->block_size; i++)
+        emitfs(state, state->in_block[i] >> k);
 
-            /* Length of this block minus reference sample if present */
-            this_bs = strm->block_size - state->ref;
+    for (i = state->ref; i < strm->block_size; i++)
+        emit(state, state->in_block[i], k);
 
-            /* Add FS encoded to unencoded parts */
-            for (j = 0; j < strm->bit_per_sample - 2; j++)
-            {
-#ifdef UNROLL_BLOCK_8
-                fs_len = (state->block_in[1] >> j)
-                    + (state->block_in[2] >> j)
-                    + (state->block_in[3] >> j)
-                    + (state->block_in[4] >> j)
-                    + (state->block_in[5] >> j)
-                    + (state->block_in[6] >> j)
-                    + (state->block_in[7] >> j);
-                if (state->ref == 0)
-                    fs_len += (state->block_in[0] >> j);
-#else
-                fs_len = 0;
-                for (i = state->ref; i < strm->block_size; i++)
-                    fs_len += state->block_in[i] >> j;
-#endif
-                split_len = fs_len + this_bs * (j + 1);
-                if (split_len < split_len_min)
-                {
-                    split_len_min = split_len;
-                    k = j;
-
-                    /* if (fs_len < this_bs) */
-                    /* { */
-                    /*     /\* Next can't get better because what we lose */
-                    /*        by additional uncompressed bits isn't */
-                    /*        compensated by a smaller FS part. *\/ */
-                    /*     break; */
-                    /* } */
-                }
-                /* else */
-                /*     break; */
-            }
+    return m_flush_block(strm);
+}
 
-            /* Count bits for 2nd extension */
-            se_len = 1;
-            for (i = 0; i < strm->block_size && split_len_min > se_len; i+= 2)
-            {
-                d = state->block_in[i] + state->block_in[i + 1];
-                /* we have to worry about overflow here */
-                if (d > split_len_min)
-                    se_len = d;
-                else
-                    se_len += d * (d + 1) / 2 + state->block_in[i + 1];
-            }
+static inline int m_encode_uncomp(ae_streamp strm)
+{
+    int i;
+    encode_state *state = strm->state;
 
-            /* Decide which option to use */
-            if (split_len_min <= se_len)
-            {
-                if (k == strm->bit_per_sample)
-                {
-#ifdef PROFILE
-                    state->prof[k]++;
-#endif
-                    state->mode = M_ENCODE_UNCOMP;
-                    break;
-                }
-                else
-                {
-#ifdef PROFILE
-                    state->prof[k + 1]++;
-#endif
-                    state->mode = M_ENCODE_SPLIT;
-                }
-            }
-            else
-            {
-#ifdef PROFILE
-                state->prof[strm->bit_per_sample + 1]++;
-#endif
-                state->mode = M_ENCODE_SE;
-                break;
-            }
+    emit(state, 0x1f, state->id_len);
+    for (i = 0; i < strm->block_size; i++)
+        emit(state, state->in_block[i], strm->bit_per_sample);
 
-            emit(state, k + 1, state->id_len);
-            if (state->ref)
-                emit(state, state->block_in[0], strm->bit_per_sample);
+    return m_flush_block(strm);
+}
 
-            for (i = state->ref; i < strm->block_size; i++)
-                emitfs(state, state->block_in[i] >> k);
+static inline int m_encode_se(ae_streamp strm)
+{
+    int i;
+    int64_t d;
+    encode_state *state = strm->state;
 
-            for (i = state->ref; i < strm->block_size; i++)
-                emit(state, state->block_in[i], k);
+    emit(state, 1, state->id_len + 1);
+    if (state->ref)
+        emit(state, state->in_block[0], strm->bit_per_sample);
 
-            state->mode = M_FLUSH_BLOCK;
+    for (i = 0; i < strm->block_size; i+= 2)
+    {
+        d = state->in_block[i] + state->in_block[i + 1];
+        emitfs(state, d * (d + 1) / 2 + state->in_block[i + 1]);
+    }
 
-        case M_FLUSH_BLOCK:
-            state->i = 0;
-            state->mode = M_FLUSH_BLOCK_LOOP;
+    return m_flush_block(strm);
+}
 
-        case M_FLUSH_BLOCK_LOOP:
-            while(state->block_out + state->i < state->bp_out)
-            {
-                if (avail_out == 0)
-                {
-#ifdef PROFILE
-                    profile_print(strm);
-#endif
-                    goto req_buffer;
-                }
+static inline int m_encode_zero(ae_streamp strm)
+{
+    encode_state *state = strm->state;
 
-                *strm->next_out++ = state->block_out[state->i];
-                avail_out--;
-                total_out++;
-                state->i++;
-            }
-            state->mode = M_NEW_BLOCK;
-            break;
+    emit(state, 0, state->id_len + 1);
 
-        case M_ENCODE_UNCOMP:
-            emit(state, 0x1f, state->id_len);
-            for (i = 0; i < strm->block_size; i++)
-                emit(state, state->block_in[i], strm->bit_per_sample);
+    if (state->zero_ref)
+        emit(state, state->zero_ref_sample, strm->bit_per_sample);
 
-            state->mode = M_FLUSH_BLOCK;
-            break;
+    if (state->zero_blocks == ROS)
+        emitfs(state, 4);
+    else if (state->zero_blocks >= 5)
+        emitfs(state, state->zero_blocks);
+    else
+        emitfs(state, state->zero_blocks - 1);
 
-        case M_ENCODE_SE:
-            emit(state, 1, state->id_len + 1);
-            if (state->ref)
-                emit(state, state->block_in[0], strm->bit_per_sample);
+    state->zero_blocks = 0;
+    return m_flush_block(strm);
+}
 
-            for (i = 0; i < strm->block_size; i+= 2)
-            {
-                d = state->block_in[i] + state->block_in[i + 1];
-                emitfs(state, d * (d + 1) / 2 + state->block_in[i + 1]);
-            }
+static inline int m_flush_block(ae_streamp strm)
+{
+    int n;
+    encode_state *state = strm->state;
 
-            state->mode = M_FLUSH_BLOCK;
-            break;
+    if (state->out_direct)
+    {
+        n = state->out_bp - strm->next_out;
+        strm->next_out += n;
+        strm->avail_out -= n;
+        strm->total_out += n;
+        state->mode = m_get_block;
+        return M_CONTINUE;
+    }
 
-        case M_ENCODE_ZERO:
-            emit(state, 0, state->id_len + 1);
-            if (state->zero_ref)
-            {
-                emit(state, state->zero_ref_sample, strm->bit_per_sample);
-            }
-            if (state->zero_blocks == ROS)
-            {
-                emitfs(state, 4);
-            }
-            else if (state->zero_blocks >= 5)
-                emitfs(state, state->zero_blocks);
-            else
-                emitfs(state, state->zero_blocks - 1);
-            state->zero_blocks = 0;
-            state->mode = M_FLUSH_BLOCK;
-            break;
+    state->i = 0;
+    state->mode = m_flush_block_cautious;
+    return M_CONTINUE;
+}
+
+static inline int m_flush_block_cautious(ae_streamp strm)
+{
+    encode_state *state = strm->state;
+
+    /* Slow restartable flushing */
+    while(state->out_block + state->i < state->out_bp)
+    {
+        if (strm->avail_out == 0)
+            return M_EXIT;
+
+        *strm->next_out++ = state->out_block[state->i];
+        strm->avail_out--;
+        strm->total_out++;
+        state->i++;
+    }
+    state->mode = m_get_block;
+    return M_CONTINUE;
+}
+
+/*
+ *
+ * API functions
+ *
+ */
 
-        default:
-            return AE_STREAM_ERROR;
+int ae_encode_init(ae_streamp strm)
+{
+    encode_state *state;
+
+    /* Some sanity checks */
+    if (strm->bit_per_sample > 32 || strm->bit_per_sample == 0)
+    {
+        return AE_ERRNO;
+    }
+
+    /* Internal state for encoder */
+    state = (encode_state *) malloc(sizeof(encode_state));
+    if (state == NULL)
+    {
+        return AE_MEM_ERROR;
+    }
+    memset(state, 0, sizeof(encode_state));
+    strm->state = state;
+
+    if (strm->bit_per_sample > 16)
+    {
+        /* 32 bit settings */
+        state->id_len = 5;
+        state->in_blklen = 4 * strm->block_size;
+
+        if (strm->flags & AE_DATA_MSB)
+            state->get_sample = get_msb_32;
+        else
+            state->get_sample = get_lsb_32;
+    }
+    else if (strm->bit_per_sample > 8)
+    {
+        /* 16 bit settings */
+        state->id_len = 4;
+        state->in_blklen = 2 * strm->block_size;
+
+        if (strm->flags & AE_DATA_MSB)
+        {
+            state->get_sample = get_msb_16;
+
+            if (strm->block_size == 8)
+                state->get_block = get_block_msb_16_bs_8;
+            else
+                state->get_block = get_block_msb_16_bs_16;
         }
+        else
+            state->get_sample = get_lsb_16;
+    }
+    else
+    {
+        /* 8 bit settings */
+        state->in_blklen = strm->block_size;
+        state->id_len = 3;
+
+        state->get_sample = get_8;
+
+        if (strm->block_size == 8)
+            state->get_block = get_block_8_bs_8;
+        else
+            state->get_block = get_block_8_bs_16;
+    }
+
+    if (strm->flags & AE_DATA_SIGNED)
+    {
+        state->xmin = -(1ULL << (strm->bit_per_sample - 1));
+        state->xmax = (1ULL << (strm->bit_per_sample - 1)) - 1;
+    }
+    else
+    {
+        state->xmin = 0;
+        state->xmax = (1ULL << strm->bit_per_sample) - 1;
+    }
+
+    state->in_block = (int64_t *)malloc(strm->block_size * sizeof(int64_t));
+    if (state->in_block == NULL)
+    {
+        return AE_MEM_ERROR;
+    }
+
+    /* Largest possible block according to specs */
+    state->out_blklen = (5 + 16 * 32) / 8 + 3;
+    /* Output buffer */
+    state->out_block = (uint8_t *)malloc(state->out_blklen);
+    if (state->out_block == NULL)
+    {
+        return AE_MEM_ERROR;
     }
 
-req_buffer:
-    strm->total_out = total_out;
-    strm->avail_out = avail_out;
+    strm->total_in = 0;
+    strm->total_out = 0;
+
+    state->out_bp = state->out_block;
+    *state->out_bp = 0;
+    state->bitp = 8;
+    state->mode = m_get_block;
+
+    return AE_OK;
+}
+
+int ae_encode(ae_streamp strm, int flush)
+{
+    /**
+       Finite-state machine implementation of the adaptive entropy
+       encoder.
+    */
+
+    encode_state *state;
+    state = strm->state;
+    state->flush = flush;
+
+    while (state->mode(strm) == M_CONTINUE);
+
+    if (state->out_direct)
+    {
+        m_flush_block(strm);
+        *state->out_block = *state->out_bp;
+        state->out_bp = state->out_block;
+        state->out_direct = 0;
+    }
+    return AE_OK;
+}
+
+int ae_encode_end(ae_streamp strm)
+{
+    encode_state *state = strm->state;
+
+    free(state->in_block);
+    free(state->out_block);
+    free(state);
     return AE_OK;
 }
diff --git a/src/aee.h b/src/aee.h
new file mode 100644 (file)
index 0000000..5d5a3fe
--- /dev/null
+++ b/src/aee.h
@@ -0,0 +1,44 @@
+#ifndef AAE_H
+#define AAE_H
+
+#include <inttypes.h>
+#include "libae.h"
+
+#define M_CONTINUE 1
+#define M_EXIT 0
+
+typedef struct internal_state {
+    int (*mode)(ae_streamp);
+    void (*get_block)(ae_streamp);
+    int64_t (*get_sample)(ae_streamp);
+
+    int id_len;             /* bit length of code option identification key */
+    int64_t last_in;        /* previous input for preprocessing */
+    int64_t xmin;           /* minimum integer for preprocessing */
+    int64_t xmax;           /* maximum integer for preprocessing */
+    int i;                  /* counter for samples */
+    int64_t *in_block;      /* input block buffer */
+    int in_blklen;          /* input block length in byte */
+    int64_t in_total_blocks;/* total blocks in */
+    uint8_t *out_block;     /* output block buffer */
+    int out_blklen;         /* output block length in byte */
+    uint8_t *out_bp;        /* pointer to current output */
+    int out_direct;         /* output to strm->next_out (1)
+                               or out_block (0) */
+    int bitp;               /* bit pointer to the next unused bit in accumulator */
+    int block_deferred;     /* there is a block in the input buffer
+                               but we first have to emit a zero block */
+    int ref;                /* length of reference sample in current block
+                               i.e. 0 or 1 depending on whether the block has
+                               a reference sample or not */
+    int zero_ref;           /* current zero block has a reference sample */
+    int64_t zero_ref_sample;/* reference sample of zero block */
+    int zero_blocks;        /* number of contiguous zero blocks */
+    int k;                  /* splitting position */
+    int flush;
+#ifdef PROFILE
+    int *prof;
+#endif
+} encode_state;
+
+#endif
diff --git a/src/aee_mutators.c b/src/aee_mutators.c
new file mode 100644 (file)
index 0000000..f6a88e8
--- /dev/null
@@ -0,0 +1,142 @@
+#include <inttypes.h>
+#include "libae.h"
+#include "aee.h"
+#include "aee_mutators.h"
+
+int64_t get_lsb_32(ae_streamp strm)
+{
+    int64_t data;
+
+    data = (strm->next_in[3] << 24)
+        | (strm->next_in[2] << 16)
+        | (strm->next_in[1] << 8)
+        | strm->next_in[0];
+
+    strm->next_in += 4;
+    strm->total_in += 4;
+    strm->avail_in -= 4;
+    return data;
+}
+
+int64_t get_lsb_16(ae_streamp strm)
+{
+    int64_t data;
+
+    data = (strm->next_in[1] << 8) | strm->next_in[0];
+
+    strm->next_in += 2;
+    strm->total_in += 2;
+    strm->avail_in -= 2;
+    return data;
+}
+
+int64_t get_msb_32(ae_streamp strm)
+{
+    int64_t data;
+
+    data = (strm->next_in[0] << 24)
+        | (strm->next_in[1] << 16)
+        | (strm->next_in[2] << 8)
+        | strm->next_in[3];
+
+    strm->next_in += 4;
+    strm->total_in += 4;
+    strm->avail_in -= 4;
+    return data;
+}
+
+int64_t get_msb_16(ae_streamp strm)
+{
+    int64_t data;
+
+    data = (strm->next_in[0] << 8) | strm->next_in[1];
+
+    strm->next_in += 2;
+    strm->total_in += 2;
+    strm->avail_in -= 2;
+    return data;
+}
+
+int64_t get_8(ae_streamp strm)
+{
+    strm->avail_in--;
+    strm->total_in++;
+    return *strm->next_in++;
+}
+
+void get_block_msb_16_bs_8(ae_streamp strm)
+{
+    int64_t *block = strm->state->in_block;
+
+    block[0] = (strm->next_in[0] << 8) | strm->next_in[1];
+    block[1] = (strm->next_in[2] << 8) | strm->next_in[3];
+    block[2] = (strm->next_in[4] << 8) | strm->next_in[5];
+    block[3] = (strm->next_in[6] << 8) | strm->next_in[7];
+    block[4] = (strm->next_in[8] << 8) | strm->next_in[9];
+    block[5] = (strm->next_in[10] << 8) | strm->next_in[11];
+    block[6] = (strm->next_in[12] << 8) | strm->next_in[13];
+    block[7] = (strm->next_in[14] << 8) | strm->next_in[15];
+
+    strm->next_in += 16;
+    strm->total_in += 16;
+    strm->avail_in -= 16;
+}
+
+void get_block_msb_16_bs_16(ae_streamp strm)
+{
+    int i;
+    int64_t *block = strm->state->in_block;
+
+    for (i = 0; i < strm->block_size; i++)
+    {
+        block[i] = (strm->next_in[2 * i] << 8)
+            | strm->next_in[2 * i + 1];
+    }
+    strm->next_in += 32;
+    strm->total_in += 32;
+    strm->avail_in -= 32;
+}
+
+void get_block_8_bs_8(ae_streamp strm)
+{
+    int64_t *block = strm->state->in_block;
+
+    block[0] = strm->next_in[0];
+    block[1] = strm->next_in[1];
+    block[2] = strm->next_in[2];
+    block[3] = strm->next_in[3];
+    block[4] = strm->next_in[4];
+    block[5] = strm->next_in[5];
+    block[6] = strm->next_in[6];
+    block[7] = strm->next_in[7];
+
+    strm->next_in += 8;
+    strm->total_in += 8;
+    strm->avail_in -= 8;
+}
+
+void get_block_8_bs_16(ae_streamp strm)
+{
+    int64_t *block = strm->state->in_block;
+
+    block[0] = strm->next_in[0];
+    block[1] = strm->next_in[1];
+    block[2] = strm->next_in[2];
+    block[3] = strm->next_in[3];
+    block[4] = strm->next_in[4];
+    block[5] = strm->next_in[5];
+    block[6] = strm->next_in[6];
+    block[7] = strm->next_in[7];
+    block[8] = strm->next_in[8];
+    block[9] = strm->next_in[9];
+    block[10] = strm->next_in[10];
+    block[11] = strm->next_in[11];
+    block[12] = strm->next_in[12];
+    block[13] = strm->next_in[13];
+    block[14] = strm->next_in[14];
+    block[15] = strm->next_in[15];
+
+    strm->next_in += 16;
+    strm->total_in += 16;
+    strm->avail_in -= 16;
+}
diff --git a/src/aee_mutators.h b/src/aee_mutators.h
new file mode 100644 (file)
index 0000000..84ae370
--- /dev/null
@@ -0,0 +1,18 @@
+#ifndef AEE_MUTATORS_H
+#define AEE_MUTATORS_H
+
+#include <inttypes.h>
+#include "libae.h"
+
+int64_t get_lsb_32(ae_streamp);
+int64_t get_lsb_16(ae_streamp);
+int64_t get_msb_32(ae_streamp);
+int64_t get_msb_16(ae_streamp);
+int64_t get_8(ae_streamp);
+
+void get_block_msb_16_bs_8(ae_streamp);
+void get_block_msb_16_bs_16(ae_streamp);
+void get_block_8_bs_8(ae_streamp);
+void get_block_8_bs_16(ae_streamp);
+
+#endif
index d187fa6..b76015d 100644 (file)
@@ -4,6 +4,7 @@
 #include <stdlib.h>
 #include <inttypes.h>
 #include <string.h>
+#include <getopt.h>
 #include "libae.h"
 
 #define CHUNK 1024
index 42c6a8c..d574fbb 100644 (file)
@@ -4,6 +4,7 @@
 #include <unistd.h>
 #include <inttypes.h>
 #include <string.h>
+#include <getopt.h>
 #include "libae.h"
 
 #define CHUNK 1024
index 44eb6f7..349cdd9 100644 (file)
@@ -2,6 +2,7 @@
 #define LIBAE_H
 
 #include <inttypes.h>
+#include <stddef.h>
 
 struct internal_state;
 
@@ -21,7 +22,7 @@ typedef struct _ae_stream
     uint32_t rsi;            /* Reference sample interval, the number of
                                 blocks between consecutive reference
                                 samples. */
-    uint32_t flags; 
+    uint32_t flags;
 
     struct internal_state *state;
 } ae_stream;