From 082c9c46564bf0003e3d5b945502ac2a8b7718c9 Mon Sep 17 00:00:00 2001 From: Mathis Rosenhauer Date: Tue, 11 Sep 2012 23:06:18 +0200 Subject: [PATCH] Reduce sample buffer to 32bit for 10% speed increase --- src/Makefile | 8 ++--- src/aee.c | 49 +++++++++++++------------ src/aee.h | 6 ++-- src/aee_mutators.c | 103 ++++++++++++++++++++++++++--------------------------- src/aee_mutators.h | 20 +++++------ 5 files changed, 94 insertions(+), 92 deletions(-) diff --git a/src/Makefile b/src/Makefile index 6abdc33..d0e4b2c 100644 --- a/src/Makefile +++ b/src/Makefile @@ -40,10 +40,10 @@ clean: *.gcno *.gcda *.gcov gmon.out check: encode decode test_szcomp - ./encode -c -b1 -B8 -R128 -J8 ../data/example_data > ../data/test.aee - ./decode -b1 -B8 -R128 -J8 ../data/test.aee - diff ../data/test ../data/example_data + ./encode -c -b4 -B32 -R128 -J8 ../data/example_data > ../data/test.aee + ./decode -b4 -B32 -R128 -J8 ../data/test.aee + cmp ../data/test ../data/example_data ./encode -c -b 512 -B8 -R128 -J8 ../data/example_data > ../data/test.aee ./decode -b 512 -B8 -R128 -J8 ../data/test.aee - diff ../data/test ../data/example_data + cmp ../data/test ../data/example_data ./test_szcomp 65536 ../data/example_data_16 diff --git a/src/aee.c b/src/aee.c index 454018c..f2e37b6 100644 --- a/src/aee.c +++ b/src/aee.c @@ -40,7 +40,7 @@ static int m_encode_zero(ae_streamp strm); * */ -static inline void emit(encode_state *state, uint64_t data, int bits) +static inline void emit(encode_state *state, uint32_t data, int bits) { if (bits <= state->bit_p) { @@ -50,7 +50,7 @@ static inline void emit(encode_state *state, uint64_t data, int bits) else { bits -= state->bit_p; - *state->cds_p++ += data >> bits; + *state->cds_p++ += (uint64_t)data >> bits; while (bits & ~7) { @@ -93,9 +93,9 @@ static inline void emitblock(ae_streamp strm, int k, int skip) int i; uint64_t acc; encode_state *state = strm->state; - int64_t *in = state->block_p + skip; - int64_t *in_end = state->block_p + strm->block_size; - int64_t mask = (1ULL << k) - 1; + uint32_t *in = state->block_p + skip; + uint32_t *in_end = state->block_p + strm->block_size; + uint64_t mask = (1ULL << k) - 1; uint8_t *out = state->cds_p; acc = *out; @@ -108,7 +108,7 @@ static inline void emitblock(ae_streamp strm, int k, int skip) while (state->bit_p > k && in < in_end) { state->bit_p -= k; - acc += (*in++ & mask) << state->bit_p; + acc += ((uint64_t)(*in++) & mask) << state->bit_p; } for (i = 56; i > (state->bit_p & ~7); i -= 8) @@ -136,7 +136,7 @@ static inline void preprocess(ae_streamp strm) { theta = MIN(last_in - state->xmin, state->xmax - last_in); - Delta = state->block_buf[i] - last_in; + Delta = (int64_t)state->block_buf[i] - last_in; last_in = state->block_buf[i]; if (0 <= Delta && Delta <= theta) { @@ -323,8 +323,9 @@ static inline int m_check_zero_block(ae_streamp strm) static inline int m_select_code_option(ae_streamp strm) { int i, j, k, this_bs, looked_bothways, direction; - int64_t d, split_len, uncomp_len; - int64_t split_len_min, se_len, fs_len; + int64_t split_len, uncomp_len; + int64_t split_len_min, fs_len; + int64_t d, se_len; encode_state *state = strm->state; /* Length of this block minus reference sample (if present) */ @@ -340,20 +341,20 @@ static inline int m_select_code_option(ae_streamp strm) */ for (;;) { - fs_len = (state->block_p[1] >> i) - + (state->block_p[2] >> i) - + (state->block_p[3] >> i) - + (state->block_p[4] >> i) - + (state->block_p[5] >> i) - + (state->block_p[6] >> i) - + (state->block_p[7] >> i); + fs_len = (int64_t)(state->block_p[1] >> i) + + (int64_t)(state->block_p[2] >> i) + + (int64_t)(state->block_p[3] >> i) + + (int64_t)(state->block_p[4] >> i) + + (int64_t)(state->block_p[5] >> i) + + (int64_t)(state->block_p[6] >> i) + + (int64_t)(state->block_p[7] >> i); if (state->ref == 0) - fs_len += (state->block_p[0] >> i); + fs_len += (int64_t)(state->block_p[0] >> i); if (strm->block_size > 8) for (j = 8; j < strm->block_size; j++) - fs_len += state->block_p[j] >> i; + fs_len += (int64_t)(state->block_p[j] >> i); split_len = fs_len + this_bs * (i + 1); @@ -441,16 +442,16 @@ static inline int m_select_code_option(ae_streamp strm) se_len = 1; for (i = 0; i < strm->block_size; i+= 2) { - d = state->block_p[i] + state->block_p[i + 1]; + d = (int64_t)state->block_p[i] + (int64_t)state->block_p[i + 1]; /* we have to worry about overflow here */ if (d > split_len_min) { - se_len = d; + se_len = INT64_MAX; break; } else { - se_len += d * (d + 1) / 2 + state->block_p[i + 1]; + se_len += d * (d + 1) / 2 + (int64_t)state->block_p[i + 1]; } } @@ -516,7 +517,7 @@ static inline int m_encode_uncomp(ae_streamp strm) static inline int m_encode_se(ae_streamp strm) { int i; - int64_t d; + uint32_t d; encode_state *state = strm->state; emit(state, 1, state->id_len + 1); @@ -680,7 +681,9 @@ int ae_encode_init(ae_streamp strm) state->xmax = (1ULL << strm->bit_per_sample) - 1; } - state->block_buf = (int64_t *)malloc(strm->rsi * strm->block_size * sizeof(int64_t)); + state->block_buf = (uint32_t *)malloc(strm->rsi + * strm->block_size + * sizeof(uint32_t)); if (state->block_buf == NULL) { return AE_MEM_ERROR; diff --git a/src/aee.h b/src/aee.h index e2afe08..259ab7d 100644 --- a/src/aee.h +++ b/src/aee.h @@ -10,15 +10,15 @@ typedef struct internal_state { int (*mode)(ae_streamp); void (*get_block)(ae_streamp); - int64_t (*get_sample)(ae_streamp); + uint32_t (*get_sample)(ae_streamp); int id_len; /* bit length of code option identification key */ int64_t xmin; /* minimum integer for preprocessing */ int64_t xmax; /* maximum integer for preprocessing */ int i; /* counter */ - int64_t *block_buf; /* RSI blocks of input */ + uint32_t *block_buf; /* RSI blocks of input */ int blocks_avail; /* remaining blocks in buffer */ - int64_t *block_p; /* pointer to current block */ + uint32_t *block_p; /* pointer to current block */ int block_len; /* input block length in byte */ uint8_t *cds_buf; /* Buffer for one Coded Data Set */ int cds_len; /* max cds length in byte */ diff --git a/src/aee_mutators.c b/src/aee_mutators.c index 8ac4af2..a94b9e8 100644 --- a/src/aee_mutators.c +++ b/src/aee_mutators.c @@ -3,14 +3,14 @@ #include "aee.h" #include "aee_mutators.h" -int64_t get_lsb_32(ae_streamp strm) +uint32_t get_lsb_32(ae_streamp strm) { - int64_t data; + uint32_t data; - data = ((int64_t)strm->next_in[3] << 24) - | ((int64_t)strm->next_in[2] << 16) - | ((int64_t)strm->next_in[1] << 8) - | (int64_t)strm->next_in[0]; + data = ((uint32_t)strm->next_in[3] << 24) + | ((uint32_t)strm->next_in[2] << 16) + | ((uint32_t)strm->next_in[1] << 8) + | (uint32_t)strm->next_in[0]; strm->next_in += 4; strm->total_in += 4; @@ -18,12 +18,12 @@ int64_t get_lsb_32(ae_streamp strm) return data; } -int64_t get_lsb_16(ae_streamp strm) +uint32_t get_lsb_16(ae_streamp strm) { - int64_t data; + uint32_t data; - data = ((int64_t)strm->next_in[1] << 8) - | (int64_t)strm->next_in[0]; + data = ((uint32_t)strm->next_in[1] << 8) + | (uint32_t)strm->next_in[0]; strm->next_in += 2; strm->total_in += 2; @@ -31,14 +31,14 @@ int64_t get_lsb_16(ae_streamp strm) return data; } -int64_t get_msb_32(ae_streamp strm) +uint32_t get_msb_32(ae_streamp strm) { - int64_t data; + uint32_t data; - data = ((int64_t)strm->next_in[0] << 24) - | ((int64_t)strm->next_in[1] << 16) - | ((int64_t)strm->next_in[2] << 8) - | (int64_t)strm->next_in[3]; + data = ((uint32_t)strm->next_in[0] << 24) + | ((uint32_t)strm->next_in[1] << 16) + | ((uint32_t)strm->next_in[2] << 8) + | (uint32_t)strm->next_in[3]; strm->next_in += 4; strm->total_in += 4; @@ -46,12 +46,12 @@ int64_t get_msb_32(ae_streamp strm) return data; } -int64_t get_msb_16(ae_streamp strm) +uint32_t get_msb_16(ae_streamp strm) { - int64_t data; + uint32_t data; - data = ((int64_t)strm->next_in[0] << 8) - | (int64_t)strm->next_in[1]; + data = ((uint32_t)strm->next_in[0] << 8) + | (uint32_t)strm->next_in[1]; strm->next_in += 2; strm->total_in += 2; @@ -59,7 +59,7 @@ int64_t get_msb_16(ae_streamp strm) return data; } -int64_t get_8(ae_streamp strm) +uint32_t get_8(ae_streamp strm) { strm->avail_in--; strm->total_in++; @@ -69,18 +69,18 @@ int64_t get_8(ae_streamp strm) void get_block_msb_16_bs_8(ae_streamp strm) { int i; - int64_t *block = strm->state->block_buf; + uint32_t *block = strm->state->block_buf; for (i = 0; i < 8 * strm->rsi; i += 8) { - block[i + 0] = ((int64_t)strm->next_in[0] << 8) | (int64_t)strm->next_in[1]; - block[i + 1] = ((int64_t)strm->next_in[2] << 8) | (int64_t)strm->next_in[3]; - block[i + 2] = ((int64_t)strm->next_in[4] << 8) | (int64_t)strm->next_in[5]; - block[i + 3] = ((int64_t)strm->next_in[6] << 8) | (int64_t)strm->next_in[7]; - block[i + 4] = ((int64_t)strm->next_in[8] << 8) | (int64_t)strm->next_in[9]; - block[i + 5] = ((int64_t)strm->next_in[10] << 8) | (int64_t)strm->next_in[11]; - block[i + 6] = ((int64_t)strm->next_in[12] << 8) | (int64_t)strm->next_in[13]; - block[i + 7] = ((int64_t)strm->next_in[14] << 8) | (int64_t)strm->next_in[15]; + block[i + 0] = ((uint32_t)strm->next_in[0] << 8) | (uint32_t)strm->next_in[1]; + block[i + 1] = ((uint32_t)strm->next_in[2] << 8) | (uint32_t)strm->next_in[3]; + block[i + 2] = ((uint32_t)strm->next_in[4] << 8) | (uint32_t)strm->next_in[5]; + block[i + 3] = ((uint32_t)strm->next_in[6] << 8) | (uint32_t)strm->next_in[7]; + block[i + 4] = ((uint32_t)strm->next_in[8] << 8) | (uint32_t)strm->next_in[9]; + block[i + 5] = ((uint32_t)strm->next_in[10] << 8) | (uint32_t)strm->next_in[11]; + block[i + 6] = ((uint32_t)strm->next_in[12] << 8) | (uint32_t)strm->next_in[13]; + block[i + 7] = ((uint32_t)strm->next_in[14] << 8) | (uint32_t)strm->next_in[15]; strm->next_in += 16; } @@ -91,12 +91,12 @@ void get_block_msb_16_bs_8(ae_streamp strm) void get_block_msb_16(ae_streamp strm) { int i; - int64_t *block = strm->state->block_buf; + uint32_t *block = strm->state->block_buf; for (i = 0; i < strm->block_size * strm->rsi; i++) { - block[i] = ((int64_t)strm->next_in[2 * i] << 8) - | (int64_t)strm->next_in[2 * i + 1]; + block[i] = ((uint32_t)strm->next_in[2 * i] << 8) + | (uint32_t)strm->next_in[2 * i + 1]; } strm->next_in += 2 * strm->block_size * strm->rsi; strm->total_in += 2 * strm->block_size * strm->rsi; @@ -106,14 +106,14 @@ void get_block_msb_16(ae_streamp strm) void get_block_msb_32(ae_streamp strm) { int i; - int64_t *block = strm->state->block_buf; + uint32_t *block = strm->state->block_buf; for (i = 0; i < strm->block_size * strm->rsi; i++) { - block[i] = ((int64_t)strm->next_in[4 * i] << 24) - | ((int64_t)strm->next_in[4 * i + 1] << 16) - | ((int64_t)strm->next_in[4 * i + 2] << 8) - | (int64_t)strm->next_in[4 * i + 3]; + block[i] = ((uint32_t)strm->next_in[4 * i] << 24) + | ((uint32_t)strm->next_in[4 * i + 1] << 16) + | ((uint32_t)strm->next_in[4 * i + 2] << 8) + | (uint32_t)strm->next_in[4 * i + 3]; } strm->next_in += 4 * strm->block_size * strm->rsi; strm->total_in += 4 * strm->block_size * strm->rsi; @@ -123,29 +123,28 @@ void get_block_msb_32(ae_streamp strm) void get_block_8_bs_8(ae_streamp strm) { int i; - int64_t *block = strm->state->block_buf; + uint32_t *block = strm->state->block_buf; for (i = 0; i < 8 * strm->rsi; i += 8) { - block[i + 0] = strm->next_in[0]; - block[i + 1] = strm->next_in[1]; - block[i + 2] = strm->next_in[2]; - block[i + 3] = strm->next_in[3]; - block[i + 4] = strm->next_in[4]; - block[i + 5] = strm->next_in[5]; - block[i + 6] = strm->next_in[6]; - block[i + 7] = strm->next_in[7]; - - strm->next_in += 8; - strm->total_in += 8; - strm->avail_in -= 8; + block[i + 0] = strm->next_in[i + 0]; + block[i + 1] = strm->next_in[i + 1]; + block[i + 2] = strm->next_in[i + 2]; + block[i + 3] = strm->next_in[i + 3]; + block[i + 4] = strm->next_in[i + 4]; + block[i + 5] = strm->next_in[i + 5]; + block[i + 6] = strm->next_in[i + 6]; + block[i + 7] = strm->next_in[i + 7]; } + strm->next_in += 8 * strm->rsi; + strm->total_in += 8 * strm->rsi; + strm->avail_in -= 8 * strm->rsi; } void get_block_8(ae_streamp strm) { int i; - int64_t *block = strm->state->block_buf; + uint32_t *block = strm->state->block_buf; for (i = 0; i < strm->block_size * strm->rsi; i++) block[i] = strm->next_in[i]; diff --git a/src/aee_mutators.h b/src/aee_mutators.h index 5139170..8220183 100644 --- a/src/aee_mutators.h +++ b/src/aee_mutators.h @@ -4,16 +4,16 @@ #include #include "libae.h" -int64_t get_lsb_32(ae_streamp); -int64_t get_lsb_16(ae_streamp); -int64_t get_msb_32(ae_streamp); -int64_t get_msb_16(ae_streamp); -int64_t get_8(ae_streamp); +uint32_t get_lsb_32(ae_streamp strm); +uint32_t get_lsb_16(ae_streamp strm); +uint32_t get_msb_32(ae_streamp strm); +uint32_t get_msb_16(ae_streamp strm); +uint32_t get_8(ae_streamp strm); -void get_block_msb_32(ae_streamp); -void get_block_msb_16_bs_8(ae_streamp); -void get_block_msb_16(ae_streamp); -void get_block_8_bs_8(ae_streamp); -void get_block_8(ae_streamp); +void get_block_msb_32(ae_streamp strm); +void get_block_msb_16_bs_8(ae_streamp strm); +void get_block_msb_16(ae_streamp strm); +void get_block_8_bs_8(ae_streamp strm); +void get_block_8(ae_streamp strm); #endif -- 2.7.4