Fixed zero block handling somewhat. Problem with zero blocks at end of buffer remains...
authorMathis Rosenhauer <rosenhauer@dkrz.de>
Sun, 12 Aug 2012 19:38:16 +0000 (21:38 +0200)
committerMathis Rosenhauer <rosenhauer@dkrz.de>
Sun, 12 Aug 2012 19:44:26 +0000 (21:44 +0200)
src/Makefile
src/aed.c
src/aee.c
src/sz_compat.c

index 820b943..9e66e12 100644 (file)
@@ -1,7 +1,7 @@
 CC = gcc
 #CFLAGS = -g -pg -lc -O2 -Wall -fprofile-arcs -ftest-coverage -DPROFILE
 #CFLAGS = -g -pg -lc -O2 -Wall -fprofile-arcs -ftest-coverage -DUNROLL_BLOCK_8
-CFLAGS = -g -O0 -Wall -DUNROLL_BLOCK_8
+CFLAGS = -g -O3 -Wall
 
 OBJS = aee.o aed.o sz_compat.o
 
@@ -46,5 +46,6 @@ check: test_encode test_decode test_szcomp
        ./test_decode 101 101 < ../data/test.ae > ../data/test
        diff ../data/test ../data/example_data
         ./test_szcomp 65536 < ../data/example_data_16 > ../data/test
-        diff ../data/test ../data/example_data_16
-
+       diff ../data/test ../data/example_data_16
+       ./test_szcomp 2097257 < ../data/zero_test > ../data/test
+       diff ../data/test ../data/zero_test
index acef5ad..9d4214d 100644 (file)
--- a/src/aed.c
+++ b/src/aed.c
@@ -37,6 +37,7 @@ typedef struct internal_state {
     int fs;            /* last fundamental sequence in accumulator */
     int ref;           /* 1 if current block has reference sample */
     int pp;            /* 1 if postprocessor has to be used */
+    int byte_per_sample;
     size_t samples_out;
 } decode_state;
 
@@ -263,6 +264,7 @@ int ae_decode_init(ae_streamp strm)
 
     if (strm->bit_per_sample > 16)
     {
+        state->byte_per_sample = 4;
         state->id_len = 5;
         state->out_blklen = strm->block_size * 4;
         if (strm->flags & AE_DATA_MSB)
@@ -272,6 +274,7 @@ int ae_decode_init(ae_streamp strm)
     }
     else if (strm->bit_per_sample > 8)
     {
+        state->byte_per_sample = 2;
         state->id_len = 4;
         state->out_blklen = strm->block_size * 2;
         if (strm->flags & AE_DATA_MSB)
@@ -281,6 +284,7 @@ int ae_decode_init(ae_streamp strm)
     }
     else
     {
+        state->byte_per_sample = 1;
         state->id_len = 3;
         state->out_blklen = strm->block_size;
         state->put_sample = put_8;
@@ -498,6 +502,10 @@ int ae_decode(ae_streamp strm, int flush)
                     (state->samples_out / strm->block_size)
                     % strm->segment_size);
             }
+            else if (zero_blocks > ROS)
+            {
+                zero_blocks--;
+            }
 
 
             if (state->ref)
@@ -505,7 +513,7 @@ int ae_decode(ae_streamp strm, int flush)
             else
                 state->i = zero_blocks * strm->block_size;
 
-            if (strm->avail_out >= state->i)
+            if (strm->avail_out >= state->i * state->byte_per_sample)
             {
                 fast_zero(strm);
                 state->mode = M_ID;
index 9943d9e..aa718e7 100644 (file)
--- a/src/aee.c
+++ b/src/aee.c
@@ -9,9 +9,10 @@
 
 #include "libae.h"
 
-#define ROS 5
+#define ROS -1
 
 #define MIN(a, b) (((a) < (b))? (a): (b))
+#define MAX(a, b) (((a) > (b))? (a): (b))
 
 enum
 {
@@ -38,7 +39,7 @@ typedef struct internal_state {
     int64_t *block_in;      /* input block buffer */
     uint8_t *block_out;     /* output block buffer */
     uint8_t *bp_out;        /* pointer to current output */
-    size_t total_blocks;
+    int64_t total_blocks;
     int bitp;               /* bit pointer to the next unused bit in accumulator */
     int block_deferred;     /* there is a block in the input buffer
                                but we first have to emit a zero block */
@@ -181,9 +182,11 @@ int ae_encode_init(ae_streamp strm)
         return AE_MEM_ERROR;
     }
 
-    blklen = (strm->block_size * strm->bit_per_sample
-              + state->id_len) / 8 + 16;
-
+    /* Zero blocks can span a segment and thus need up to segment_size
+       bits in encoded block */
+    blklen = MAX(strm->block_size * strm->bit_per_sample,
+                 strm->segment_size + 10);
+    blklen = (blklen + state->id_len) / 8 + 3;
     state->block_out = (uint8_t *)malloc(blklen);
     if (state->block_out == NULL)
     {
@@ -414,11 +417,11 @@ int ae_encode(ae_streamp strm, int flush)
 
                 if (state->total_blocks % strm->segment_size == 0)
                 {
-                    if (state->zero_blocks > ROS)
-                        state->zero_blocks = ROS;
 #ifdef PROFILE
                     state->prof[0] += state->zero_blocks;
 #endif
+                    if (state->zero_blocks > 4)
+                        state->zero_blocks = ROS;
                     state->mode = M_ENCODE_ZERO;
                     break;
                 }
@@ -476,6 +479,7 @@ int ae_encode(ae_streamp strm, int flush)
                     split_len_min = split_len;
                     k = j;
 
+#if 0
                     if (fs_len < this_bs)
                     {
                         /* Next can't get better because what we lose
@@ -486,6 +490,9 @@ int ae_encode(ae_streamp strm, int flush)
                 }
                 else
                     break;
+#else
+            }
+#endif
             }
 
             /* Count bits for 2nd extension */
@@ -596,7 +603,14 @@ int ae_encode(ae_streamp strm, int flush)
             {
                 emit(state, state->zero_ref_sample, strm->bit_per_sample);
             }
-            emitfs(state, state->zero_blocks - 1);
+            if (state->zero_blocks == ROS)
+            {
+                emitfs(state, 4);
+            }
+            else if (state->zero_blocks >= 5)
+                emitfs(state, state->zero_blocks);
+            else
+                emitfs(state, state->zero_blocks - 1);
             state->zero_blocks = 0;
             state->mode = M_FLUSH_BLOCK;
             break;
index a8b594c..7409828 100644 (file)
@@ -8,7 +8,8 @@ int SZ_BufftoBuffCompress(void *dest, size_t *destLen, const void *source, size_
 
     strm.bit_per_sample = param->bits_per_pixel;
     strm.block_size = param->pixels_per_block;
-    strm.segment_size = param->pixels_per_scanline / param->pixels_per_block;
+//    strm.segment_size = param->pixels_per_scanline / param->pixels_per_block;
+    strm.segment_size = 8;
     strm.flags = param->options_mask;
     strm.avail_in = sourceLen;
     strm.avail_out = *destLen;
@@ -36,7 +37,8 @@ int SZ_BufftoBuffDecompress(void *dest, size_t *destLen, const void *source, siz
 
     strm.bit_per_sample = param->bits_per_pixel;
     strm.block_size = param->pixels_per_block;
-    strm.segment_size = param->pixels_per_scanline / param->pixels_per_block;
+//    strm.segment_size = param->pixels_per_scanline / param->pixels_per_block;
+    strm.segment_size = 8;
     strm.flags = param->options_mask;
     strm.avail_in = sourceLen;
     strm.avail_out = *destLen;