Buffer complete RSI of preprocessed input data for substantial speedup
authorMathis Rosenhauer <rosenhauer@dkrz.de>
Tue, 4 Sep 2012 09:31:50 +0000 (11:31 +0200)
committerMathis Rosenhauer <rosenhauer@dkrz.de>
Tue, 4 Sep 2012 09:32:24 +0000 (11:32 +0200)
src/Makefile
src/aee.c
src/aee.h
src/aee_mutators.c
src/libae.h
src/sz_compat.c

index 8da34dc..6abdc33 100644 (file)
@@ -43,7 +43,7 @@ check: encode decode test_szcomp
         ./encode -c -b1 -B8 -R128 -J8 ../data/example_data > ../data/test.aee
         ./decode -b1 -B8 -R128 -J8 ../data/test.aee
         diff ../data/test ../data/example_data
-        ./encode -c -b1024 -B32 -R128 -J64 ../data/example_data > ../data/test.aee
-        ./decode -b1024 -B32 -R128 -J64 ../data/test.aee
+        ./encode -c -b 512 -B8 -R128 -J8 ../data/example_data > ../data/test.aee
+        ./decode -b 512 -B8 -R128 -J8 ../data/test.aee
         diff ../data/test ../data/example_data
         ./test_szcomp 65536 ../data/example_data_16
index 818d1e9..454018c 100644 (file)
--- a/src/aee.c
+++ b/src/aee.c
@@ -42,24 +42,24 @@ static int m_encode_zero(ae_streamp strm);
 
 static inline void emit(encode_state *state, uint64_t data, int bits)
 {
-    if (bits <= state->bitp)
+    if (bits <= state->bit_p)
     {
-        state->bitp -= bits;
-        *state->out_bp += data << state->bitp;
+        state->bit_p -= bits;
+        *state->cds_p += data << state->bit_p;
     }
     else
     {
-        bits -= state->bitp;
-        *state->out_bp++ += data >> bits;
+        bits -= state->bit_p;
+        *state->cds_p++ += data >> bits;
 
         while (bits & ~7)
         {
             bits -= 8;
-            *state->out_bp++ = data >> bits;
+            *state->cds_p++ = data >> bits;
         }
 
-        state->bitp = 8 - bits;
-        *state->out_bp = data << state->bitp;
+        state->bit_p = 8 - bits;
+        *state->cds_p = data << state->bit_p;
     }
 }
 
@@ -73,17 +73,17 @@ static inline void emitfs(encode_state *state, int fs)
 
     for(;;)
     {
-        if (fs < state->bitp)
+        if (fs < state->bit_p)
         {
-            state->bitp -= fs + 1;
-            *state->out_bp += 1 << state->bitp;
+            state->bit_p -= fs + 1;
+            *state->cds_p += 1 << state->bit_p;
             break;
         }
         else
         {
-            fs -= state->bitp;
-            *++state->out_bp = 0;
-            state->bitp = 8;
+            fs -= state->bit_p;
+            *++state->cds_p = 0;
+            state->bit_p = 8;
         }
     }
 }
@@ -93,72 +93,64 @@ static inline void emitblock(ae_streamp strm, int k, int skip)
     int i;
     uint64_t acc;
     encode_state *state = strm->state;
-    int64_t *in = state->in_block + skip;
-    int64_t *in_end = state->in_block + strm->block_size;
+    int64_t *in = state->block_p + skip;
+    int64_t *in_end = state->block_p + strm->block_size;
     int64_t mask = (1ULL << k) - 1;
-    uint8_t *out = state->out_bp;
+    uint8_t *out = state->cds_p;
 
     acc = *out;
 
     while(in < in_end)
     {
         acc <<= 56;
-        state->bitp = (state->bitp % 8) + 56;
+        state->bit_p = (state->bit_p % 8) + 56;
 
-        while (state->bitp > k && in < in_end)
+        while (state->bit_p > k && in < in_end)
         {
-            state->bitp -= k;
-            acc += (*in++ & mask) << state->bitp;
+            state->bit_p -= k;
+            acc += (*in++ & mask) << state->bit_p;
         }
 
-        for (i = 56; i > (state->bitp & ~7); i -= 8)
+        for (i = 56; i > (state->bit_p & ~7); i -= 8)
             *out++ = acc >> i;
         acc >>= i;
     }
 
     *out = acc;
-    state->out_bp = out;
-    state->bitp %= 8;
+    state->cds_p = out;
+    state->bit_p %= 8;
 }
 
 static inline void preprocess(ae_streamp strm)
 {
     int i;
-    int64_t theta, d, Delta;
+    int64_t theta, Delta, last_in;
     encode_state *state = strm->state;
 
     /* Insert reference samples into first block of Reference Sample
      * Interval.
      */
-    if(state->in_total_blocks % strm->rsi == 0)
-    {
-        state->ref = 1;
-        state->last_in = state->in_block[0];
-    }
-    else
-    {
-        state->ref = 0;
-    }
+    last_in = state->block_buf[0];
 
-    for (i = state->ref; i < strm->block_size; i++)
+    for (i = 1; i < strm->rsi * strm->block_size; i++)
     {
-        theta = MIN(state->last_in - state->xmin,
-                    state->xmax - state->last_in);
-        Delta = state->in_block[i] - state->last_in;
-        state->last_in = state->in_block[i];
+        theta = MIN(last_in - state->xmin,
+                    state->xmax - last_in);
+        Delta = state->block_buf[i] - last_in;
+        last_in = state->block_buf[i];
         if (0 <= Delta && Delta <= theta)
         {
-            state->in_block[i] = 2 * Delta;
+            state->block_buf[i] = 2 * Delta;
         }
         else if (-theta <= Delta && Delta < 0)
         {
-            d = Delta < 0 ? -(uint64_t)Delta : Delta;
-            state->in_block[i] = 2 * d - 1;
+            state->block_buf[i] = 2
+                * (Delta < 0 ? -(uint64_t)Delta : Delta) - 1;
         }
         else
         {
-            state->in_block[i] = theta +
-                (Delta < 0 ? -(uint64_t)Delta : Delta);
+            state->block_buf[i] = theta
+                (Delta < 0 ? -(uint64_t)Delta : Delta);
         }
     }
 }
@@ -173,59 +165,69 @@ static int m_get_block(ae_streamp strm)
 {
     encode_state *state = strm->state;
 
-    if (strm->avail_out > state->out_blklen)
+    if (strm->avail_out > state->cds_len)
     {
-        if (!state->out_direct)
+        if (!state->direct_out)
         {
-            state->out_direct = 1;
-            *strm->next_out = *state->out_bp;
-            state->out_bp = strm->next_out;
+            state->direct_out = 1;
+            *strm->next_out = *state->cds_p;
+            state->cds_p = strm->next_out;
         }
     }
     else
     {
-        if (state->zero_blocks == 0 || state->out_direct)
+        if (state->zero_blocks == 0 || state->direct_out)
         {
             /* copy leftover from last block */
-            *state->out_block = *state->out_bp;
-            state->out_bp = state->out_block;
+            *state->cds_buf = *state->cds_p;
+            state->cds_p = state->cds_buf;
         }
-        state->out_direct = 0;
+        state->direct_out = 0;
     }
 
-    if(state->block_deferred)
+    if (state->blocks_avail == 0)
     {
-        state->block_deferred = 0;
-        state->mode = m_select_code_option;
-        return M_CONTINUE;
-    }
+        state->ref = 1;
+        state->blocks_avail = strm->rsi - 1;
+        state->block_p = state->block_buf;
 
-    if (strm->avail_in >= state->in_blklen)
-    {
-        state->get_block(strm);
+        if (strm->avail_in >= state->block_len * strm->rsi)
+        {
+            state->get_block(strm);
 
-        if (strm->flags & AE_DATA_PREPROCESS)
-            preprocess(strm);
+            if (strm->flags & AE_DATA_PREPROCESS)
+                preprocess(strm);
 
-        state->in_total_blocks++;
-        return m_check_zero_block(strm);
+            return m_check_zero_block(strm);
+        }
+        else
+        {
+            state->i = 0;
+            state->mode = m_get_block_cautious;
+        }
     }
     else
     {
-        state->i = 0;
-        state->mode = m_get_block_cautious;
+        state->ref = 0;
+        state->block_p += strm->block_size;
+        state->blocks_avail--;
+        return m_check_zero_block(strm);
     }
     return M_CONTINUE;
 }
 
 static int m_get_block_cautious(ae_streamp strm)
 {
-    int pad;
+    int j;
     encode_state *state = strm->state;
 
     do
     {
-        if (strm->avail_in == 0)
+        if (strm->avail_in > 0)
+        {
+            state->block_buf[state->i] = state->get_sample(strm);
+        }
+        else
         {
             if (state->flush == AE_FLUSH)
             {
@@ -234,7 +236,9 @@ static int m_get_block_cautious(ae_streamp strm)
                     /* Pad block with last sample if we have a partial
                      * block.
                      */
-                    state->in_block[state->i] = state->in_block[state->i - 1];
+                    for (j = state->i; j < strm->rsi * strm->block_size; j++)
+                        state->block_buf[j] = state->block_buf[state->i - 1];
+                    state->i = strm->rsi * strm->block_size;
                 }
                 else
                 {
@@ -245,26 +249,12 @@ static int m_get_block_cautious(ae_streamp strm)
                         return M_CONTINUE;
                     }
 
-                    if ((strm->flags & AE_DATA_SZ_COMPAT)
-                        && (state->in_total_blocks % strm->rsi != 0))
-                    {
-                        /* If user wants szip copatibility then we
-                         * have to pad until but not including the
-                         * next reference sample.
-                         */
-                        pad = 64 - (state->in_total_blocks % strm->rsi % 64);
-                        state->in_total_blocks += pad;
-                        state->zero_blocks = (pad > 4)? ROS: pad;
-                        state->mode = m_encode_zero;
-                        return M_CONTINUE;
-                    }
-
                     /* Pad last output byte with 0 bits if user wants
                      * to flush, i.e. we got all input there is.
                      */
-                    emit(state, 0, state->bitp);
-                    if (state->out_direct == 0)
-                        *strm->next_out++ = *state->out_bp;
+                    emit(state, 0, state->bit_p);
+                    if (state->direct_out == 0)
+                        *strm->next_out++ = *state->cds_p;
                     strm->avail_out--;
                     strm->total_out++;
 
@@ -276,17 +266,12 @@ static int m_get_block_cautious(ae_streamp strm)
                 return M_EXIT;
             }
         }
-        else
-        {
-            state->in_block[state->i] = state->get_sample(strm);
-        }
     }
-    while (++state->i < strm->block_size);
+    while (++state->i < strm->rsi * strm->block_size);
 
     if (strm->flags & AE_DATA_PREPROCESS)
         preprocess(strm);
 
-    state->in_total_blocks++;
     return m_check_zero_block(strm);
 }
 
@@ -296,7 +281,7 @@ static inline int m_check_zero_block(ae_streamp strm)
     encode_state *state = strm->state;
 
     i = state->ref;
-    while(i < strm->block_size && state->in_block[i] == 0)
+    while(i < strm->block_size && state->block_p[i] == 0)
         i++;
 
     if (i == strm->block_size)
@@ -305,12 +290,12 @@ static inline int m_check_zero_block(ae_streamp strm)
         if (state->zero_blocks == 0)
         {
             state->zero_ref = state->ref;
-            state->zero_ref_sample = state->in_block[0];
+            state->zero_ref_sample = state->block_p[0];
         }
 
         state->zero_blocks++;
 
-        if (state->in_total_blocks % strm->rsi % 64 == 0)
+        if ((strm->rsi - state->blocks_avail) % 64 == 0)
         {
             if (state->zero_blocks > 4)
                 state->zero_blocks = ROS;
@@ -326,7 +311,8 @@ static inline int m_check_zero_block(ae_streamp strm)
          * zero block first. The current block will be handled
          * later.
          */
-        state->block_deferred = 1;
+        state->block_p -= strm->block_size;
+        state->blocks_avail++;
         state->mode = m_encode_zero;
         return M_CONTINUE;
     }
@@ -354,20 +340,20 @@ static inline int m_select_code_option(ae_streamp strm)
      */
     for (;;)
     {
-        fs_len = (state->in_block[1] >> i)
-            + (state->in_block[2] >> i)
-            + (state->in_block[3] >> i)
-            + (state->in_block[4] >> i)
-            + (state->in_block[5] >> i)
-            + (state->in_block[6] >> i)
-            + (state->in_block[7] >> i);
+        fs_len = (state->block_p[1] >> i)
+            + (state->block_p[2] >> i)
+            + (state->block_p[3] >> i)
+            + (state->block_p[4] >> i)
+            + (state->block_p[5] >> i)
+            + (state->block_p[6] >> i)
+            + (state->block_p[7] >> i);
 
         if (state->ref == 0)
-            fs_len += (state->in_block[0] >> i);
+            fs_len += (state->block_p[0] >> i);
 
         if (strm->block_size > 8)
             for (j = 8; j < strm->block_size; j++)
-                fs_len += state->in_block[j] >> i;
+                fs_len += state->block_p[j] >> i;
 
         split_len = fs_len + this_bs * (i + 1);
 
@@ -455,7 +441,7 @@ static inline int m_select_code_option(ae_streamp strm)
     se_len = 1;
     for (i = 0; i < strm->block_size; i+= 2)
     {
-        d = state->in_block[i] + state->in_block[i + 1];
+        d = state->block_p[i] + state->block_p[i + 1];
         /* we have to worry about overflow here */
         if (d > split_len_min)
         {
@@ -464,7 +450,7 @@ static inline int m_select_code_option(ae_streamp strm)
         }
         else
         {
-            se_len += d * (d + 1) / 2 + state->in_block[i + 1];
+            se_len += d * (d + 1) / 2 + state->block_p[i + 1];
         }
     }
 
@@ -504,11 +490,12 @@ static inline int m_encode_splitting(ae_streamp strm)
     int k = state->k;
 
     emit(state, k + 1, state->id_len);
+
     if (state->ref)
-        emit(state, state->in_block[0], strm->bit_per_sample);
+        emit(state, state->block_p[0], strm->bit_per_sample);
 
     for (i = state->ref; i < strm->block_size; i++)
-        emitfs(state, state->in_block[i] >> k);
+        emitfs(state, state->block_p[i] >> k);
 
     if (k)
         emitblock(strm, k, state->ref);
@@ -534,12 +521,12 @@ static inline int m_encode_se(ae_streamp strm)
 
     emit(state, 1, state->id_len + 1);
     if (state->ref)
-        emit(state, state->in_block[0], strm->bit_per_sample);
+        emit(state, state->block_p[0], strm->bit_per_sample);
 
     for (i = 0; i < strm->block_size; i+= 2)
     {
-        d = state->in_block[i] + state->in_block[i + 1];
-        emitfs(state, d * (d + 1) / 2 + state->in_block[i + 1]);
+        d = state->block_p[i] + state->block_p[i + 1];
+        emitfs(state, d * (d + 1) / 2 + state->block_p[i + 1]);
     }
 
     return m_flush_block(strm);
@@ -570,9 +557,9 @@ static inline int m_flush_block(ae_streamp strm)
     int n;
     encode_state *state = strm->state;
 
-    if (state->out_direct)
+    if (state->direct_out)
     {
-        n = state->out_bp - strm->next_out;
+        n = state->cds_p - strm->next_out;
         strm->next_out += n;
         strm->avail_out -= n;
         strm->total_out += n;
@@ -590,12 +577,12 @@ static inline int m_flush_block_cautious(ae_streamp strm)
     encode_state *state = strm->state;
 
     /* Slow restartable flushing */
-    while(state->out_block + state->i < state->out_bp)
+    while(state->cds_buf + state->i < state->cds_p)
     {
         if (strm->avail_out == 0)
             return M_EXIT;
 
-        *strm->next_out++ = state->out_block[state->i];
+        *strm->next_out++ = state->cds_buf[state->i];
         strm->avail_out--;
         strm->total_out++;
         state->i++;
@@ -640,7 +627,7 @@ int ae_encode_init(ae_streamp strm)
     {
         /* 32 bit settings */
         state->id_len = 5;
-        state->in_blklen = 4 * strm->block_size;
+        state->block_len = 4 * strm->block_size;
 
         if (strm->flags & AE_DATA_MSB)
         {
@@ -654,7 +641,7 @@ int ae_encode_init(ae_streamp strm)
     {
         /* 16 bit settings */
         state->id_len = 4;
-        state->in_blklen = 2 * strm->block_size;
+        state->block_len = 2 * strm->block_size;
 
         if (strm->flags & AE_DATA_MSB)
         {
@@ -671,7 +658,7 @@ int ae_encode_init(ae_streamp strm)
     else
     {
         /* 8 bit settings */
-        state->in_blklen = strm->block_size;
+        state->block_len = strm->block_size;
         state->id_len = 3;
 
         state->get_sample = get_8;
@@ -693,16 +680,17 @@ int ae_encode_init(ae_streamp strm)
         state->xmax = (1ULL << strm->bit_per_sample) - 1;
     }
 
-    state->in_block = (int64_t *)malloc(strm->block_size * sizeof(int64_t));
-    if (state->in_block == NULL)
+    state->block_buf = (int64_t *)malloc(strm->rsi * strm->block_size * sizeof(int64_t));
+    if (state->block_buf == NULL)
     {
         return AE_MEM_ERROR;
     }
+    state->block_p = state->block_buf;
 
     /* Largest possible block according to specs */
-    state->out_blklen = (5 + 64 * 32) / 8 + 3;
-    state->out_block = (uint8_t *)malloc(state->out_blklen);
-    if (state->out_block == NULL)
+    state->cds_len = (5 + 64 * 32) / 8 + 3;
+    state->cds_buf = (uint8_t *)malloc(state->cds_len);
+    if (state->cds_buf == NULL)
     {
         return AE_MEM_ERROR;
     }
@@ -710,9 +698,9 @@ int ae_encode_init(ae_streamp strm)
     strm->total_in = 0;
     strm->total_out = 0;
 
-    state->out_bp = state->out_block;
-    *state->out_bp = 0;
-    state->bitp = 8;
+    state->cds_p = state->cds_buf;
+    *state->cds_p = 0;
+    state->bit_p = 8;
     state->mode = m_get_block;
 
     return AE_OK;
@@ -724,19 +712,23 @@ int ae_encode(ae_streamp strm, int flush)
        Finite-state machine implementation of the adaptive entropy
        encoder.
     */
-
+    int n;
     encode_state *state;
     state = strm->state;
     state->flush = flush;
 
     while (state->mode(strm) == M_CONTINUE);
 
-    if (state->out_direct)
+    if (state->direct_out)
     {
-        m_flush_block(strm);
-        *state->out_block = *state->out_bp;
-        state->out_bp = state->out_block;
-        state->out_direct = 0;
+        n = state->cds_p - strm->next_out;
+        strm->next_out += n;
+        strm->avail_out -= n;
+        strm->total_out += n;
+
+        *state->cds_buf = *state->cds_p;
+        state->cds_p = state->cds_buf;
+        state->direct_out = 0;
     }
     return AE_OK;
 }
@@ -745,8 +737,8 @@ int ae_encode_end(ae_streamp strm)
 {
     encode_state *state = strm->state;
 
-    free(state->in_block);
-    free(state->out_block);
+    free(state->block_buf);
+    free(state->cds_buf);
     free(state);
     return AE_OK;
 }
index 560470e..e2afe08 100644 (file)
--- a/src/aee.h
+++ b/src/aee.h
@@ -13,21 +13,19 @@ typedef struct internal_state {
     int64_t (*get_sample)(ae_streamp);
 
     int id_len;             /* bit length of code option identification key */
-    int64_t last_in;        /* previous input for preprocessing */
     int64_t xmin;           /* minimum integer for preprocessing */
     int64_t xmax;           /* maximum integer for preprocessing */
-    int i;                  /* counter for samples */
-    int64_t *in_block;      /* input block buffer */
-    int in_blklen;          /* input block length in byte */
-    int64_t in_total_blocks;/* total blocks in */
-    uint8_t *out_block;     /* output block buffer */
-    int out_blklen;         /* output block length in byte */
-    uint8_t *out_bp;        /* pointer to current output */
-    int out_direct;         /* output to strm->next_out (1)
-                               or out_block (0) */
-    int bitp;               /* bit pointer to the next unused bit in accumulator */
-    int block_deferred;     /* there is a block in the input buffer
-                               but we first have to emit a zero block */
+    int i;                  /* counter */
+    int64_t *block_buf;     /* RSI blocks of input */
+    int blocks_avail;       /* remaining blocks in buffer */
+    int64_t *block_p;       /* pointer to current block */
+    int block_len;          /* input block length in byte */
+    uint8_t *cds_buf;       /* Buffer for one Coded Data Set */
+    int cds_len;            /* max cds length in byte */
+    uint8_t *cds_p;         /* pointer to current output */
+    int direct_out;         /* output to strm->next_out (1)
+                               or cds_buf (0) */
+    int bit_p;              /* bit pointer to the next unused bit in accumulator */
     int ref;                /* length of reference sample in current block
                                i.e. 0 or 1 depending on whether the block has
                                a reference sample or not */
index aecf01d..8ac4af2 100644 (file)
@@ -68,81 +68,89 @@ int64_t get_8(ae_streamp strm)
 
 void get_block_msb_16_bs_8(ae_streamp strm)
 {
-    int64_t *block = strm->state->in_block;
-
-    block[0] = ((int64_t)strm->next_in[0] << 8) | (int64_t)strm->next_in[1];
-    block[1] = ((int64_t)strm->next_in[2] << 8) | (int64_t)strm->next_in[3];
-    block[2] = ((int64_t)strm->next_in[4] << 8) | (int64_t)strm->next_in[5];
-    block[3] = ((int64_t)strm->next_in[6] << 8) | (int64_t)strm->next_in[7];
-    block[4] = ((int64_t)strm->next_in[8] << 8) | (int64_t)strm->next_in[9];
-    block[5] = ((int64_t)strm->next_in[10] << 8) | (int64_t)strm->next_in[11];
-    block[6] = ((int64_t)strm->next_in[12] << 8) | (int64_t)strm->next_in[13];
-    block[7] = ((int64_t)strm->next_in[14] << 8) | (int64_t)strm->next_in[15];
-
-    strm->next_in += 16;
-    strm->total_in += 16;
-    strm->avail_in -= 16;
+    int i;
+    int64_t *block = strm->state->block_buf;
+
+    for (i = 0; i < 8 * strm->rsi; i += 8)
+    {
+        block[i + 0] = ((int64_t)strm->next_in[0] << 8) | (int64_t)strm->next_in[1];
+        block[i + 1] = ((int64_t)strm->next_in[2] << 8) | (int64_t)strm->next_in[3];
+        block[i + 2] = ((int64_t)strm->next_in[4] << 8) | (int64_t)strm->next_in[5];
+        block[i + 3] = ((int64_t)strm->next_in[6] << 8) | (int64_t)strm->next_in[7];
+        block[i + 4] = ((int64_t)strm->next_in[8] << 8) | (int64_t)strm->next_in[9];
+        block[i + 5] = ((int64_t)strm->next_in[10] << 8) | (int64_t)strm->next_in[11];
+        block[i + 6] = ((int64_t)strm->next_in[12] << 8) | (int64_t)strm->next_in[13];
+        block[i + 7] = ((int64_t)strm->next_in[14] << 8) | (int64_t)strm->next_in[15];
+
+        strm->next_in += 16;
+    }
+    strm->total_in += 16 * strm->rsi;
+    strm->avail_in -= 16 * strm->rsi;
 }
 
 void get_block_msb_16(ae_streamp strm)
 {
     int i;
-    int64_t *block = strm->state->in_block;
+    int64_t *block = strm->state->block_buf;
 
-    for (i = 0; i < strm->block_size; i++)
+    for (i = 0; i < strm->block_size * strm->rsi; i++)
     {
         block[i] = ((int64_t)strm->next_in[2 * i] << 8)
             | (int64_t)strm->next_in[2 * i + 1];
     }
-    strm->next_in += 2 * strm->block_size;
-    strm->total_in += 2 * strm->block_size;
-    strm->avail_in -= 2 * strm->block_size;
+    strm->next_in += 2 * strm->block_size * strm->rsi;
+    strm->total_in += 2 * strm->block_size * strm->rsi;
+    strm->avail_in -= 2 * strm->block_size * strm->rsi;
 }
 
 void get_block_msb_32(ae_streamp strm)
 {
     int i;
-    int64_t *block = strm->state->in_block;
+    int64_t *block = strm->state->block_buf;
 
-    for (i = 0; i < strm->block_size; i++)
+    for (i = 0; i < strm->block_size * strm->rsi; i++)
     {
         block[i] = ((int64_t)strm->next_in[4 * i] << 24)
             | ((int64_t)strm->next_in[4 * i + 1] << 16)
             | ((int64_t)strm->next_in[4 * i + 2] << 8)
             | (int64_t)strm->next_in[4 * i + 3];
     }
-    strm->next_in += 4 * strm->block_size;
-    strm->total_in += 4 * strm->block_size;
-    strm->avail_in -= 4 * strm->block_size;
+    strm->next_in += 4 * strm->block_size * strm->rsi;
+    strm->total_in += 4 * strm->block_size * strm->rsi;
+    strm->avail_in -= 4 * strm->block_size * strm->rsi;
 }
 
 void get_block_8_bs_8(ae_streamp strm)
 {
-    int64_t *block = strm->state->in_block;
-
-    block[0] = strm->next_in[0];
-    block[1] = strm->next_in[1];
-    block[2] = strm->next_in[2];
-    block[3] = strm->next_in[3];
-    block[4] = strm->next_in[4];
-    block[5] = strm->next_in[5];
-    block[6] = strm->next_in[6];
-    block[7] = strm->next_in[7];
-
-    strm->next_in += 8;
-    strm->total_in += 8;
-    strm->avail_in -= 8;
+    int i;
+    int64_t *block = strm->state->block_buf;
+
+    for (i = 0; i < 8 * strm->rsi; i += 8)
+    {
+        block[i + 0] = strm->next_in[0];
+        block[i + 1] = strm->next_in[1];
+        block[i + 2] = strm->next_in[2];
+        block[i + 3] = strm->next_in[3];
+        block[i + 4] = strm->next_in[4];
+        block[i + 5] = strm->next_in[5];
+        block[i + 6] = strm->next_in[6];
+        block[i + 7] = strm->next_in[7];
+
+        strm->next_in += 8;
+        strm->total_in += 8;
+        strm->avail_in -= 8;
+    }
 }
 
 void get_block_8(ae_streamp strm)
 {
     int i;
-    int64_t *block = strm->state->in_block;
+    int64_t *block = strm->state->block_buf;
 
-    for (i = 0; i < strm->block_size; i++)
+    for (i = 0; i < strm->block_size * strm->rsi; i++)
         block[i] = strm->next_in[i];
 
-    strm->next_in += strm->block_size;
-    strm->total_in += strm->block_size;
-    strm->avail_in -= strm->block_size;
+    strm->next_in += strm->block_size * strm->rsi;
+    strm->total_in += strm->block_size * strm->rsi;
+    strm->avail_in -= strm->block_size * strm->rsi;
 }
index 4e02ae8..f454868 100644 (file)
@@ -35,10 +35,6 @@ typedef ae_stream *ae_streamp;
 #define AE_DATA_LSB          8
 #define AE_DATA_MSB         16
 #define AE_DATA_PREPROCESS  32  /* Set if preprocessor should be used */
-#define AE_DATA_SZ_COMPAT  256  /* Set this if you want szip to decode
-                                 * our output. Increases output
-                                 * slightly.
-                                 */
 
 /* Return codes of library functions */
 #define AE_OK            0
index 947985f..ea6ec03 100644 (file)
@@ -10,7 +10,7 @@ int SZ_BufftoBuffCompress(void *dest, size_t *destLen, const void *source, size_
     strm.bit_per_sample = param->bits_per_pixel;
     strm.block_size = param->pixels_per_block;
     strm.rsi = param->pixels_per_scanline / param->pixels_per_block;
-    strm.flags = param->options_mask | AE_DATA_SZ_COMPAT;
+    strm.flags = param->options_mask;
     strm.avail_in = sourceLen;
     strm.avail_out = *destLen;
     strm.next_out = dest;