rearrange some of the latest optimizations
authorJosh Coalson <jcoalson@users.sourceforce.net>
Tue, 24 Apr 2001 01:25:42 +0000 (01:25 +0000)
committerJosh Coalson <jcoalson@users.sourceforce.net>
Tue, 24 Apr 2001 01:25:42 +0000 (01:25 +0000)
src/libFLAC/encoder.c

index c50ab93..48e27ad 100644 (file)
@@ -61,7 +61,7 @@ typedef struct FLAC__EncoderPrivate {
        unsigned best_subframe_bits_mid_side[2];
        uint32 *abs_residual;                       /* workspace where abs(candidate residual) is stored */
        uint32 *abs_residual_partition_sums;        /* workspace where the sum of abs(candidate residual) for each partition is stored */
-       unsigned *bits_per_residual_sample;         /* workspace where silog2(candidate residual) is stored */
+       unsigned *raw_bits_per_partition;           /* workspace where the sum of silog2(candidate residual) for each partition is stored */
        FLAC__BitBuffer frame;                      /* the current frame being worked on */
        bool current_frame_can_do_mid_side;         /* encoder sets this false when any given sample of a frame's side channel exceeds 16 bits */
        double loose_mid_side_stereo_frames_exact;  /* exact number of frames the encoder will use before trying both independent and mid/side frames again */
@@ -84,14 +84,14 @@ static bool encoder_process_subframes_(FLAC__Encoder *encoder, bool is_last_fram
 static bool encoder_process_subframe_(FLAC__Encoder *encoder, unsigned max_partition_order, bool verbatim_only, const FLAC__FrameHeader *frame_header, unsigned subframe_bps, const int32 integer_signal[], const real real_signal[], FLAC__Subframe *subframe[2], int32 *residual[2], unsigned *best_subframe, unsigned *best_bits);
 static bool encoder_add_subframe_(FLAC__Encoder *encoder, const FLAC__FrameHeader *frame_header, unsigned subframe_bps, const FLAC__Subframe *subframe, FLAC__BitBuffer *frame);
 static unsigned encoder_evaluate_constant_subframe_(const int32 signal, unsigned subframe_bps, FLAC__Subframe *subframe);
-static unsigned encoder_evaluate_fixed_subframe_(const int32 signal[], int32 residual[], uint32 abs_residual[], uint32 abs_residual_partition_sums[], unsigned bits_per_residual_sample[], unsigned blocksize, unsigned subframe_bps, unsigned order, unsigned rice_parameter, unsigned max_partition_order, FLAC__Subframe *subframe);
-static unsigned encoder_evaluate_lpc_subframe_(const int32 signal[], int32 residual[], uint32 abs_residual[], uint32 abs_residual_partition_sums[], unsigned bits_per_residual_sample[], const real lp_coeff[], unsigned blocksize, unsigned subframe_bps, unsigned order, unsigned qlp_coeff_precision, unsigned rice_parameter, unsigned max_partition_order, FLAC__Subframe *subframe);
+static unsigned encoder_evaluate_fixed_subframe_(const int32 signal[], int32 residual[], uint32 abs_residual[], uint32 abs_residual_partition_sums[], unsigned raw_bits_per_partition[], unsigned blocksize, unsigned subframe_bps, unsigned order, unsigned rice_parameter, unsigned max_partition_order, FLAC__Subframe *subframe);
+static unsigned encoder_evaluate_lpc_subframe_(const int32 signal[], int32 residual[], uint32 abs_residual[], uint32 abs_residual_partition_sums[], unsigned raw_bits_per_partition[], const real lp_coeff[], unsigned blocksize, unsigned subframe_bps, unsigned order, unsigned qlp_coeff_precision, unsigned rice_parameter, unsigned max_partition_order, FLAC__Subframe *subframe);
 static unsigned encoder_evaluate_verbatim_subframe_(const int32 signal[], unsigned blocksize, unsigned subframe_bps, FLAC__Subframe *subframe);
-static unsigned encoder_find_best_partition_order_(const int32 residual[], uint32 abs_residual[], uint32 abs_residual_partition_sums[], unsigned bits_per_residual_sample[], unsigned residual_samples, unsigned predictor_order, unsigned rice_parameter, unsigned max_partition_order, unsigned *best_partition_order, unsigned best_parameters[], unsigned best_raw_bits[]);
-#ifdef PRECOMPUTE_PARTITION_SUMS
-static unsigned encoder_precompute_partition_sums_(uint32 abs_residual[], uint32 abs_residual_partition_sums[], unsigned residual_samples, unsigned predictor_order, unsigned max_partition_order);
+static unsigned encoder_find_best_partition_order_(const int32 residual[], uint32 abs_residual[], uint32 abs_residual_partition_sums[], unsigned raw_bits_per_partition[], unsigned residual_samples, unsigned predictor_order, unsigned rice_parameter, unsigned max_partition_order, unsigned *best_partition_order, unsigned best_parameters[], unsigned best_raw_bits[]);
+#if (defined PRECOMPUTE_PARTITION_SUMS) || (defined SEARCH_FOR_ESCAPES)
+static unsigned encoder_precompute_partition_info_(const int32 residual[], uint32 abs_residual[], uint32 abs_residual_partition_sums[], unsigned raw_bits_per_partition[], unsigned residual_samples, unsigned predictor_order, unsigned max_partition_order);
 #endif
-static bool encoder_set_partitioned_rice_(const uint32 abs_residual[], const uint32 abs_residual_partition_sums[], const unsigned bits_per_residual_sample[], const unsigned residual_samples, const unsigned predictor_order, unsigned rice_parameter, const unsigned partition_order, unsigned parameters[], unsigned raw_bits[], unsigned *bits);
+static bool encoder_set_partitioned_rice_(const uint32 abs_residual[], const uint32 abs_residual_partition_sums[], const unsigned raw_bits_per_partition[], const unsigned residual_samples, const unsigned predictor_order, unsigned rice_parameter, const unsigned partition_order, unsigned parameters[], unsigned raw_bits[], unsigned *bits);
 static unsigned encoder_get_wasted_bits_(int32 signal[], unsigned samples);
 
 const char *FLAC__EncoderWriteStatusString[] = {
@@ -127,7 +127,7 @@ bool encoder_resize_buffers_(FLAC__Encoder *encoder, unsigned new_size)
        real *previous_rs, *current_rs;
        int32 *residual;
        uint32 *abs_residual;
-       unsigned *bits_per_residual_sample;
+       unsigned *raw_bits_per_partition;
 
        assert(new_size > 0);
        assert(encoder->state == FLAC__ENCODER_OK);
@@ -243,15 +243,15 @@ bool encoder_resize_buffers_(FLAC__Encoder *encoder, unsigned new_size)
                                free(encoder->guts->abs_residual_partition_sums);
                        encoder->guts->abs_residual_partition_sums = abs_residual;
                }
-               bits_per_residual_sample = (unsigned*)malloc(sizeof(unsigned) * new_size);
-               if(0 == bits_per_residual_sample) {
+               raw_bits_per_partition = (unsigned*)malloc(sizeof(unsigned) * (new_size * 2));
+               if(0 == raw_bits_per_partition) {
                        encoder->state = FLAC__ENCODER_MEMORY_ALLOCATION_ERROR;
                        ok = 0;
                }
                else {
-                       if(encoder->guts->bits_per_residual_sample != 0)
-                               free(encoder->guts->bits_per_residual_sample);
-                       encoder->guts->bits_per_residual_sample = bits_per_residual_sample;
+                       if(encoder->guts->raw_bits_per_partition != 0)
+                               free(encoder->guts->raw_bits_per_partition);
+                       encoder->guts->raw_bits_per_partition = raw_bits_per_partition;
                }
        }
        if(ok)
@@ -383,7 +383,7 @@ FLAC__EncoderState FLAC__encoder_init(FLAC__Encoder *encoder, FLAC__EncoderWrite
        }
        encoder->guts->abs_residual = 0;
        encoder->guts->abs_residual_partition_sums = 0;
-       encoder->guts->bits_per_residual_sample = 0;
+       encoder->guts->raw_bits_per_partition = 0;
        encoder->guts->current_frame_can_do_mid_side = true;
        encoder->guts->loose_mid_side_stereo_frames_exact = (double)encoder->sample_rate * 0.4 / (double)encoder->blocksize;
        encoder->guts->loose_mid_side_stereo_frames = (unsigned)(encoder->guts->loose_mid_side_stereo_frames_exact + 0.5);
@@ -523,9 +523,9 @@ void FLAC__encoder_finish(FLAC__Encoder *encoder)
                        free(encoder->guts->abs_residual_partition_sums);
                        encoder->guts->abs_residual_partition_sums = 0;
                }
-               if(encoder->guts->bits_per_residual_sample != 0) {
-                       free(encoder->guts->bits_per_residual_sample);
-                       encoder->guts->bits_per_residual_sample = 0;
+               if(encoder->guts->raw_bits_per_partition != 0) {
+                       free(encoder->guts->raw_bits_per_partition);
+                       encoder->guts->raw_bits_per_partition = 0;
                }
                FLAC__bitbuffer_free(&encoder->guts->frame);
                free(encoder->guts);
@@ -964,7 +964,7 @@ bool encoder_process_subframe_(FLAC__Encoder *encoder, unsigned max_partition_or
 #endif
                                if(rice_parameter >= (1u << FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_PARAMETER_LEN))
                                        rice_parameter = (1u << FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_PARAMETER_LEN) - 1;
-                               _candidate_bits = encoder_evaluate_fixed_subframe_(integer_signal, residual[!_best_subframe], encoder->guts->abs_residual, encoder->guts->abs_residual_partition_sums, encoder->guts->bits_per_residual_sample, frame_header->blocksize, subframe_bps, fixed_order, rice_parameter, max_partition_order, subframe[!_best_subframe]);
+                               _candidate_bits = encoder_evaluate_fixed_subframe_(integer_signal, residual[!_best_subframe], encoder->guts->abs_residual, encoder->guts->abs_residual_partition_sums, encoder->guts->raw_bits_per_partition, frame_header->blocksize, subframe_bps, fixed_order, rice_parameter, max_partition_order, subframe[!_best_subframe]);
                                if(_candidate_bits < _best_bits) {
                                        _best_subframe = !_best_subframe;
                                        _best_bits = _candidate_bits;
@@ -1007,7 +1007,7 @@ bool encoder_process_subframe_(FLAC__Encoder *encoder, unsigned max_partition_or
                                                        if(rice_parameter >= (1u << FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_PARAMETER_LEN))
                                                                rice_parameter = (1u << FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_PARAMETER_LEN) - 1;
                                                        for(qlp_coeff_precision = min_qlp_coeff_precision; qlp_coeff_precision <= max_qlp_coeff_precision; qlp_coeff_precision++) {
-                                                               _candidate_bits = encoder_evaluate_lpc_subframe_(integer_signal, residual[!_best_subframe], encoder->guts->abs_residual, encoder->guts->abs_residual_partition_sums, encoder->guts->bits_per_residual_sample, lp_coeff[lpc_order-1], frame_header->blocksize, subframe_bps, lpc_order, qlp_coeff_precision, rice_parameter, max_partition_order, subframe[!_best_subframe]);
+                                                               _candidate_bits = encoder_evaluate_lpc_subframe_(integer_signal, residual[!_best_subframe], encoder->guts->abs_residual, encoder->guts->abs_residual_partition_sums, encoder->guts->raw_bits_per_partition, lp_coeff[lpc_order-1], frame_header->blocksize, subframe_bps, lpc_order, qlp_coeff_precision, rice_parameter, max_partition_order, subframe[!_best_subframe]);
                                                                if(_candidate_bits > 0) { /* if == 0, there was a problem quantizing the lpcoeffs */
                                                                        if(_candidate_bits < _best_bits) {
                                                                                _best_subframe = !_best_subframe;
@@ -1070,7 +1070,7 @@ unsigned encoder_evaluate_constant_subframe_(const int32 signal, unsigned subfra
        return FLAC__SUBFRAME_ZERO_PAD_LEN + FLAC__SUBFRAME_TYPE_LEN + FLAC__SUBFRAME_WASTED_BITS_FLAG_LEN + subframe_bps;
 }
 
-unsigned encoder_evaluate_fixed_subframe_(const int32 signal[], int32 residual[], uint32 abs_residual[], uint32 abs_residual_partition_sums[], unsigned bits_per_residual_sample[], unsigned blocksize, unsigned subframe_bps, unsigned order, unsigned rice_parameter, unsigned max_partition_order, FLAC__Subframe *subframe)
+unsigned encoder_evaluate_fixed_subframe_(const int32 signal[], int32 residual[], uint32 abs_residual[], uint32 abs_residual_partition_sums[], unsigned raw_bits_per_partition[], unsigned blocksize, unsigned subframe_bps, unsigned order, unsigned rice_parameter, unsigned max_partition_order, FLAC__Subframe *subframe)
 {
        unsigned i, residual_bits;
        const unsigned residual_samples = blocksize - order;
@@ -1082,7 +1082,7 @@ unsigned encoder_evaluate_fixed_subframe_(const int32 signal[], int32 residual[]
        subframe->data.fixed.entropy_coding_method.type = FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE;
        subframe->data.fixed.residual = residual;
 
-       residual_bits = encoder_find_best_partition_order_(residual, abs_residual, abs_residual_partition_sums, bits_per_residual_sample, residual_samples, order, rice_parameter, max_partition_order, &subframe->data.fixed.entropy_coding_method.data.partitioned_rice.order, subframe->data.fixed.entropy_coding_method.data.partitioned_rice.parameters, subframe->data.fixed.entropy_coding_method.data.partitioned_rice.raw_bits);
+       residual_bits = encoder_find_best_partition_order_(residual, abs_residual, abs_residual_partition_sums, raw_bits_per_partition, residual_samples, order, rice_parameter, max_partition_order, &subframe->data.fixed.entropy_coding_method.data.partitioned_rice.order, subframe->data.fixed.entropy_coding_method.data.partitioned_rice.parameters, subframe->data.fixed.entropy_coding_method.data.partitioned_rice.raw_bits);
 
        subframe->data.fixed.order = order;
        for(i = 0; i < order; i++)
@@ -1091,7 +1091,7 @@ unsigned encoder_evaluate_fixed_subframe_(const int32 signal[], int32 residual[]
        return FLAC__SUBFRAME_ZERO_PAD_LEN + FLAC__SUBFRAME_TYPE_LEN + FLAC__SUBFRAME_WASTED_BITS_FLAG_LEN + (order * subframe_bps) + residual_bits;
 }
 
-unsigned encoder_evaluate_lpc_subframe_(const int32 signal[], int32 residual[], uint32 abs_residual[], uint32 abs_residual_partition_sums[], unsigned bits_per_residual_sample[], const real lp_coeff[], unsigned blocksize, unsigned subframe_bps, unsigned order, unsigned qlp_coeff_precision, unsigned rice_parameter, unsigned max_partition_order, FLAC__Subframe *subframe)
+unsigned encoder_evaluate_lpc_subframe_(const int32 signal[], int32 residual[], uint32 abs_residual[], uint32 abs_residual_partition_sums[], unsigned raw_bits_per_partition[], const real lp_coeff[], unsigned blocksize, unsigned subframe_bps, unsigned order, unsigned qlp_coeff_precision, unsigned rice_parameter, unsigned max_partition_order, FLAC__Subframe *subframe)
 {
        int32 qlp_coeff[FLAC__MAX_LPC_ORDER];
        unsigned i, residual_bits;
@@ -1109,7 +1109,7 @@ unsigned encoder_evaluate_lpc_subframe_(const int32 signal[], int32 residual[],
        subframe->data.lpc.entropy_coding_method.type = FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE;
        subframe->data.lpc.residual = residual;
 
-       residual_bits = encoder_find_best_partition_order_(residual, abs_residual, abs_residual_partition_sums, bits_per_residual_sample, residual_samples, order, rice_parameter, max_partition_order, &subframe->data.lpc.entropy_coding_method.data.partitioned_rice.order, subframe->data.lpc.entropy_coding_method.data.partitioned_rice.parameters, subframe->data.lpc.entropy_coding_method.data.partitioned_rice.raw_bits);
+       residual_bits = encoder_find_best_partition_order_(residual, abs_residual, abs_residual_partition_sums, raw_bits_per_partition, residual_samples, order, rice_parameter, max_partition_order, &subframe->data.lpc.entropy_coding_method.data.partitioned_rice.order, subframe->data.lpc.entropy_coding_method.data.partitioned_rice.parameters, subframe->data.lpc.entropy_coding_method.data.partitioned_rice.raw_bits);
 
        subframe->data.lpc.order = order;
        subframe->data.lpc.qlp_coeff_precision = qlp_coeff_precision;
@@ -1130,12 +1130,12 @@ unsigned encoder_evaluate_verbatim_subframe_(const int32 signal[], unsigned bloc
        return FLAC__SUBFRAME_ZERO_PAD_LEN + FLAC__SUBFRAME_TYPE_LEN + FLAC__SUBFRAME_WASTED_BITS_FLAG_LEN + (blocksize * subframe_bps);
 }
 
-unsigned encoder_find_best_partition_order_(const int32 residual[], uint32 abs_residual[], uint32 abs_residual_partition_sums[], unsigned bits_per_residual_sample[], unsigned residual_samples, unsigned predictor_order, unsigned rice_parameter, unsigned max_partition_order, unsigned *best_partition_order, unsigned best_parameters[], unsigned best_raw_bits[])
+unsigned encoder_find_best_partition_order_(const int32 residual[], uint32 abs_residual[], uint32 abs_residual_partition_sums[], unsigned raw_bits_per_partition[], unsigned residual_samples, unsigned predictor_order, unsigned rice_parameter, unsigned max_partition_order, unsigned *best_partition_order, unsigned best_parameters[], unsigned best_raw_bits[])
 {
        int32 r;
-#ifdef PRECOMPUTE_PARTITION_SUMS
-       int partition_order;
+#if (defined PRECOMPUTE_PARTITION_SUMS) || (defined SEARCH_FOR_ESCAPES)
        unsigned sum;
+       int partition_order;
 #else
        unsigned partition_order;
 #endif
@@ -1149,20 +1149,12 @@ unsigned encoder_find_best_partition_order_(const int32 residual[], uint32 abs_r
                abs_residual[residual_sample] = (uint32)(r<0? -r : r);
        }
 
-#ifdef SEARCH_FOR_ESCAPES
-       /* compute silog2(residual) for use later */
-       for(residual_sample = 0; residual_sample < residual_samples; residual_sample++) {
-               bits_per_residual_sample[residual_sample] = FLAC__bitmath_silog2(residual[residual_sample]);
-       }
-#endif
-
-#ifdef PRECOMPUTE_PARTITION_SUMS
-       max_partition_order = encoder_precompute_partition_sums_(abs_residual, abs_residual_partition_sums, residual_samples, predictor_order, max_partition_order);
+#if (defined PRECOMPUTE_PARTITION_SUMS) || (defined SEARCH_FOR_ESCAPES)
+       max_partition_order = encoder_precompute_partition_info_(residual, abs_residual, abs_residual_partition_sums, raw_bits_per_partition, residual_samples, predictor_order, max_partition_order);
 
        for(partition_order = (int)max_partition_order, sum = 0; partition_order >= 0; partition_order--) {
-               if(!encoder_set_partitioned_rice_(abs_residual, abs_residual_partition_sums+sum, bits_per_residual_sample, residual_samples, predictor_order, rice_parameter, (unsigned)partition_order, parameters[!best_parameters_index], raw_bits[!best_parameters_index], &residual_bits)) {
-                       assert(best_residual_bits != 0);
-                       break;
+               if(!encoder_set_partitioned_rice_(abs_residual, abs_residual_partition_sums+sum, raw_bits_per_partition+sum, residual_samples, predictor_order, rice_parameter, (unsigned)partition_order, parameters[!best_parameters_index], raw_bits[!best_parameters_index], &residual_bits)) {
+                       assert(0); /* encoder_precompute_partition_info_ should keep this from ever happening */
                }
                sum += 1u << partition_order;
                if(best_residual_bits == 0 || residual_bits < best_residual_bits) {
@@ -1173,7 +1165,7 @@ unsigned encoder_find_best_partition_order_(const int32 residual[], uint32 abs_r
        }
 #else
        for(partition_order = 0; partition_order <= max_partition_order; partition_order++) {
-               if(!encoder_set_partitioned_rice_(abs_residual, abs_residual_partition_sums, bits_per_residual_sample, residual_samples, predictor_order, rice_parameter, partition_order, parameters[!best_parameters_index], raw_bits[!best_parameters_index], &residual_bits)) {
+               if(!encoder_set_partitioned_rice_(abs_residual, 0, 0, residual_samples, predictor_order, rice_parameter, partition_order, parameters[!best_parameters_index], raw_bits[!best_parameters_index], &residual_bits)) {
                        assert(best_residual_bits != 0);
                        break;
                }
@@ -1190,17 +1182,24 @@ unsigned encoder_find_best_partition_order_(const int32 residual[], uint32 abs_r
        return best_residual_bits;
 }
 
-#ifdef PRECOMPUTE_PARTITION_SUMS
-unsigned encoder_precompute_partition_sums_(uint32 abs_residual[], uint32 abs_residual_partition_sums[], unsigned residual_samples, unsigned predictor_order, unsigned max_partition_order)
+#if (defined PRECOMPUTE_PARTITION_SUMS) || (defined SEARCH_FOR_ESCAPES)
+unsigned encoder_precompute_partition_info_(const int32 residual[], uint32 abs_residual[], uint32 abs_residual_partition_sums[], unsigned raw_bits_per_partition[], unsigned residual_samples, unsigned predictor_order, unsigned max_partition_order)
 {
        int partition_order;
-       unsigned sum, merged_sum = 0;
+       unsigned from_partition, to_partition = 0;
        const unsigned blocksize = residual_samples + predictor_order;
 
-       /* first do the sums for max_partition_order */
+       /* first do max_partition_order */
        for(partition_order = (int)max_partition_order; partition_order >= 0; partition_order--) {
-               unsigned partition, partition_sample, partition_samples, residual_sample;
+#ifdef PRECOMPUTE_PARTITION_SUMS
                uint32 abs_residual_partition_sum;
+#endif
+#ifdef SEARCH_FOR_ESCAPES
+               uint32 abs_residual_partition_max;
+               unsigned abs_residual_partition_max_index = 0; /* initialized to silence superfluous compiler warning */
+#endif
+               uint32 abs_r;
+               unsigned partition, partition_sample, partition_samples, residual_sample;
                const unsigned partitions = 1u << partition_order;
                const unsigned default_partition_samples = blocksize >> partition_order;
 
@@ -1213,25 +1212,66 @@ unsigned encoder_precompute_partition_sums_(uint32 abs_residual[], uint32 abs_re
                                partition_samples = default_partition_samples;
                                if(partition == 0)
                                        partition_samples -= predictor_order;
+#ifdef PRECOMPUTE_PARTITION_SUMS
                                abs_residual_partition_sum = 0;
-                               for(partition_sample = 0; partition_sample < partition_samples; partition_sample++)
-                                       abs_residual_partition_sum += abs_residual[residual_sample++]; /* @@@ this can overflow with small max_partition_order and (large blocksizes or bits-per-sample), FIX! */
+#endif
+#ifdef SEARCH_FOR_ESCAPES
+                               abs_residual_partition_max = 0;
+#endif
+                               for(partition_sample = 0; partition_sample < partition_samples; partition_sample++) {
+                                       abs_r = abs_residual[residual_sample];
+#ifdef PRECOMPUTE_PARTITION_SUMS
+                                       abs_residual_partition_sum += abs_r; /* @@@ this can overflow with small max_partition_order and (large blocksizes or bits-per-sample), FIX! */
+#endif
+#ifdef SEARCH_FOR_ESCAPES
+                                       if(abs_r > abs_residual_partition_max) {
+                                               abs_residual_partition_max = abs_r;
+                                               abs_residual_partition_max_index = residual_sample;
+                                       }
+#endif
+                                       residual_sample++;
+                               }
+#ifdef PRECOMPUTE_PARTITION_SUMS
                                abs_residual_partition_sums[partition] = abs_residual_partition_sum;
+#endif
+#ifdef SEARCH_FOR_ESCAPES
+                               if(abs_residual_partition_max > 0)
+                                       raw_bits_per_partition[partition] = FLAC__bitmath_silog2(residual[abs_residual_partition_max_index]);
+                               else
+                                       raw_bits_per_partition[partition] = FLAC__bitmath_silog2(0);
+#endif
                        }
-                       merged_sum = partitions;
+                       to_partition = partitions;
                        break;
                }
        }
 
-       /* now merge partition sums for lower orders */
-       for(sum = 0; partition_order > 0; partition_order--) {
-               unsigned i;
+       /* now merge for lower orders */
+       for(from_partition = 0; partition_order >= 0; partition_order--) {
+#ifdef PRECOMPUTE_PARTITION_SUMS
                uint32 s;
+#endif
+#ifdef SEARCH_FOR_ESCAPES
+               unsigned m;
+#endif
+               unsigned i;
                const unsigned partitions = 1u << partition_order;
                for(i = 0; i < partitions; i++) {
-                       s = abs_residual_partition_sums[sum++];
-                       s += abs_residual_partition_sums[sum++];
-                       abs_residual_partition_sums[merged_sum++] = s;
+#ifdef PRECOMPUTE_PARTITION_SUMS
+                       s = abs_residual_partition_sums[from_partition];
+#endif
+#ifdef SEARCH_FOR_ESCAPES
+                       m = raw_bits_per_partition[from_partition];
+#endif
+                       from_partition++;
+#ifdef PRECOMPUTE_PARTITION_SUMS
+                       abs_residual_partition_sums[to_partition] = s + abs_residual_partition_sums[from_partition];
+#endif
+#ifdef SEARCH_FOR_ESCAPES
+                       raw_bits_per_partition[to_partition] = max(m, raw_bits_per_partition[from_partition]);
+#endif
+                       from_partition++;
+                       to_partition++;
                }
        }
 
@@ -1244,11 +1284,11 @@ unsigned encoder_precompute_partition_sums_(uint32 abs_residual[], uint32 abs_re
 #endif
 #define VARIABLE_RICE_BITS(value, parameter) ((value) >> (parameter))
 
-bool encoder_set_partitioned_rice_(const uint32 abs_residual[], const uint32 abs_residual_partition_sums[], const unsigned bits_per_residual_sample[], const unsigned residual_samples, const unsigned predictor_order, unsigned rice_parameter, const unsigned partition_order, unsigned parameters[], unsigned raw_bits[], unsigned *bits)
+bool encoder_set_partitioned_rice_(const uint32 abs_residual[], const uint32 abs_residual_partition_sums[], const unsigned raw_bits_per_partition[], const unsigned residual_samples, const unsigned predictor_order, unsigned rice_parameter, const unsigned partition_order, unsigned parameters[], unsigned raw_bits[], unsigned *bits)
 {
        unsigned partition_bits;
 #ifdef SEARCH_FOR_ESCAPES
-       unsigned flat_bits, partition_max_bits_per_residual_sample;
+       unsigned flat_bits;
 #endif
        unsigned bits_ = FLAC__ENTROPY_CODING_METHOD_TYPE_LEN + FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ORDER_LEN;
 
@@ -1271,9 +1311,6 @@ bool encoder_set_partitioned_rice_(const uint32 abs_residual[], const uint32 abs
 #endif
                        parameters[0] = rice_parameter;
                        partition_bits += FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_PARAMETER_LEN;
-#ifdef SEARCH_FOR_ESCAPES
-                       partition_max_bits_per_residual_sample = 0;
-#endif
                        for(i = 0; i < residual_samples; i++) {
 #ifdef VARIABLE_RICE_BITS
 #ifdef SYMMETRIC_RICE
@@ -1284,21 +1321,17 @@ bool encoder_set_partitioned_rice_(const uint32 abs_residual[], const uint32 abs
 #else
                                partition_bits += FLAC__bitbuffer_rice_bits(residual[i], rice_parameter); /* NOTE: we will need to pass in residual[] instead of abs_residual[] */
 #endif
-#ifdef SEARCH_FOR_ESCAPES
-                               if(bits_per_residual_sample[i] > partition_max_bits_per_residual_sample)
-                                       partition_max_bits_per_residual_sample = bits_per_residual_sample[i];
-#endif
                        }
 #ifdef SEARCH_FOR_ESCAPES
-                       flat_bits = partition_max_bits_per_residual_sample * residual_samples + FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_PARAMETER_LEN + FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_RAW_LEN;
+                       flat_bits = raw_bits_per_partition[0] * residual_samples;
                        if(flat_bits < partition_bits) {
                                parameters[0] = FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ESCAPE_PARAMETER;
-                               raw_bits[0] = partition_max_bits_per_residual_sample;
+                               raw_bits[0] = raw_bits_per_partition[0];
                                partition_bits = flat_bits;
                        }
 #endif
+                       bits_ += partition_bits;
                }
-               bits_ += partition_bits;
        }
        else {
                unsigned i, j, k;
@@ -1315,7 +1348,13 @@ bool encoder_set_partitioned_rice_(const uint32 abs_residual[], const uint32 abs
                                        partition_samples -= predictor_order;
                        }
                        mean = partition_samples >> 1;
+#ifdef PRECOMPUTE_PARTITION_SUMS
                        mean += abs_residual_partition_sums[i];
+#else
+                       for(k = 0; k < partition_samples; j++, k++)
+                               mean += abs_residual[j];
+                       j -= k;
+#endif
                        mean /= partition_samples;
 #ifdef SYMMETRIC_RICE
                        /* calc parameter = floor(log2(mean)) */
@@ -1347,9 +1386,6 @@ mean>>=1;
                        --parameter;
 #endif
 #endif
-#ifdef SEARCH_FOR_ESCAPES
-                       partition_max_bits_per_residual_sample = 0;
-#endif
                        for(k = 0; k < partition_samples; j++, k++) {
 #ifdef VARIABLE_RICE_BITS
 #ifdef SYMMETRIC_RICE
@@ -1360,16 +1396,12 @@ mean>>=1;
 #else
                                partition_bits += FLAC__bitbuffer_rice_bits(residual[j], parameter); /* NOTE: we will need to pass in residual[] instead of abs_residual[] */
 #endif
-#ifdef SEARCH_FOR_ESCAPES
-                               if(bits_per_residual_sample[j] > partition_max_bits_per_residual_sample)
-                                       partition_max_bits_per_residual_sample = bits_per_residual_sample[j];
-#endif
                        }
 #ifdef SEARCH_FOR_ESCAPES
-                       flat_bits = partition_max_bits_per_residual_sample * partition_samples + FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_PARAMETER_LEN + FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_RAW_LEN;
+                       flat_bits = raw_bits_per_partition[i] * partition_samples;
                        if(flat_bits < partition_bits) {
                                parameters[i] = FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ESCAPE_PARAMETER;
-                               raw_bits[i] = partition_max_bits_per_residual_sample;
+                               raw_bits[i] = raw_bits_per_partition[i];
                                partition_bits = flat_bits;
                        }
 #endif