Add 2003 to copyright notice
[platform/upstream/flac.git] / src / plugin_common / replaygain_synthesis.c
1 /* plugin_common - Routines common to several plugins
2  * Copyright (C) 2002,2003  Josh Coalson
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
17  */
18 /*
19  * This is an aggregation of pieces of code from John Edwards' WaveGain
20  * program.  Mostly cosmetic changes were made; otherwise, the dithering
21  * code is almost untouched and the gain processing was converted from
22  * processing a whole file to processing chunks of samples.
23  *
24  * The original copyright notices for WaveGain's dither.c and wavegain.c
25  * appear below:
26  */
27 /*
28  * (c) 2002 John Edwards
29  * mostly lifted from work by Frank Klemm
30  * random functions for dithering.
31  */
32 /*
33  * Copyright (C) 2002 John Edwards
34  * Additional code by Magnus Holmgren and Gian-Carlo Pascutto
35  */
36
37 #include <string.h> /* for memset() */
38 #include <math.h>
39 #include "private/fast_float_math_hack.h"
40 #include "replaygain_synthesis.h"
41 #include "FLAC/assert.h"
42
43 #if defined _MSC_VER
44 #define FLAC__INLINE __inline
45 #else
46 #define FLAC__INLINE
47 #endif
48
49
50 /*
51  * the following is based on parts of dither.c
52  */
53
54
55 /*
56  *  This is a simple random number generator with good quality for audio purposes.
57  *  It consists of two polycounters with opposite rotation direction and different
58  *  periods. The periods are coprime, so the total period is the product of both.
59  *
60  *     -------------------------------------------------------------------------------------------------
61  * +-> |31:30:29:28:27:26:25:24:23:22:21:20:19:18:17:16:15:14:13:12:11:10: 9: 8: 7: 6: 5: 4: 3: 2: 1: 0|
62  * |   -------------------------------------------------------------------------------------------------
63  * |                                                                          |  |  |  |     |        |
64  * |                                                                          +--+--+--+-XOR-+--------+
65  * |                                                                                      |
66  * +--------------------------------------------------------------------------------------+
67  *
68  *     -------------------------------------------------------------------------------------------------
69  *     |31:30:29:28:27:26:25:24:23:22:21:20:19:18:17:16:15:14:13:12:11:10: 9: 8: 7: 6: 5: 4: 3: 2: 1: 0| <-+
70  *     -------------------------------------------------------------------------------------------------   |
71  *       |  |           |  |                                                                               |
72  *       +--+----XOR----+--+                                                                               |
73  *                |                                                                                        |
74  *                +----------------------------------------------------------------------------------------+
75  *
76  *
77  *  The first has an period of 3*5*17*257*65537, the second of 7*47*73*178481,
78  *  which gives a period of 18.410.713.077.675.721.215. The result is the
79  *  XORed values of both generators.
80  */
81
82 static unsigned int random_int_()
83 {
84         static const unsigned char parity_[256] = {
85                 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
86                 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
87                 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
88                 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
89                 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
90                 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
91                 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
92                 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0
93         };
94         static unsigned int r1_ = 1;
95         static unsigned int r2_ = 1;
96
97         unsigned int t1, t2, t3, t4;
98
99         /* Parity calculation is done via table lookup, this is also available
100          * on CPUs without parity, can be implemented in C and avoid unpredictable
101          * jumps and slow rotate through the carry flag operations.
102          */
103         t3   = t1 = r1_;    t4   = t2 = r2_;
104         t1  &= 0xF5;        t2 >>= 25;
105         t1   = parity_[t1]; t2  &= 0x63;
106         t1 <<= 31;          t2   = parity_[t2];
107
108         return (r1_ = (t3 >> 1) | t1 ) ^ (r2_ = (t4 + t4) | t2 );
109 }
110
111 /* gives a equal distributed random number */
112 /* between -2^31*mult and +2^31*mult */
113 static double random_equi_(double mult)
114 {
115         return mult * (int) random_int_();
116 }
117
118 /* gives a triangular distributed random number */
119 /* between -2^32*mult and +2^32*mult */
120 static double random_triangular_(double mult)
121 {
122         return mult * ( (double) (int) random_int_() + (double) (int) random_int_() );
123 }
124
125
126 static const float  F44_0 [16 + 32] = {
127         (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0,
128         (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0,
129
130         (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0,
131         (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0,
132
133         (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0,
134         (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0
135 };
136
137
138 static const float  F44_1 [16 + 32] = {  /* SNR(w) = 4.843163 dB, SNR = -3.192134 dB */
139         (float) 0.85018292704024355931, (float) 0.29089597350995344721, (float)-0.05021866022121039450, (float)-0.23545456294599161833,
140         (float)-0.58362726442227032096, (float)-0.67038978965193036429, (float)-0.38566861572833459221, (float)-0.15218663390367969967,
141         (float)-0.02577543084864530676, (float) 0.14119295297688728127, (float) 0.22398848581628781612, (float) 0.15401727203382084116,
142         (float) 0.05216161232906000929, (float)-0.00282237820999675451, (float)-0.03042794608323867363, (float)-0.03109780942998826024,
143
144         (float) 0.85018292704024355931, (float) 0.29089597350995344721, (float)-0.05021866022121039450, (float)-0.23545456294599161833,
145         (float)-0.58362726442227032096, (float)-0.67038978965193036429, (float)-0.38566861572833459221, (float)-0.15218663390367969967,
146         (float)-0.02577543084864530676, (float) 0.14119295297688728127, (float) 0.22398848581628781612, (float) 0.15401727203382084116,
147         (float) 0.05216161232906000929, (float)-0.00282237820999675451, (float)-0.03042794608323867363, (float)-0.03109780942998826024,
148
149         (float) 0.85018292704024355931, (float) 0.29089597350995344721, (float)-0.05021866022121039450, (float)-0.23545456294599161833,
150         (float)-0.58362726442227032096, (float)-0.67038978965193036429, (float)-0.38566861572833459221, (float)-0.15218663390367969967,
151         (float)-0.02577543084864530676, (float) 0.14119295297688728127, (float) 0.22398848581628781612, (float) 0.15401727203382084116,
152         (float) 0.05216161232906000929, (float)-0.00282237820999675451, (float)-0.03042794608323867363, (float)-0.03109780942998826024,
153 };
154
155
156 static const float  F44_2 [16 + 32] = {  /* SNR(w) = 10.060213 dB, SNR = -12.766730 dB */
157         (float) 1.78827593892108555290, (float) 0.95508210637394326553, (float)-0.18447626783899924429, (float)-0.44198126506275016437,
158         (float)-0.88404052492547413497, (float)-1.42218907262407452967, (float)-1.02037566838362314995, (float)-0.34861755756425577264,
159         (float)-0.11490230170431934434, (float) 0.12498899339968611803, (float) 0.38065885268563131927, (float) 0.31883491321310506562,
160         (float) 0.10486838686563442765, (float)-0.03105361685110374845, (float)-0.06450524884075370758, (float)-0.02939198261121969816,
161
162         (float) 1.78827593892108555290, (float) 0.95508210637394326553, (float)-0.18447626783899924429, (float)-0.44198126506275016437,
163         (float)-0.88404052492547413497, (float)-1.42218907262407452967, (float)-1.02037566838362314995, (float)-0.34861755756425577264,
164         (float)-0.11490230170431934434, (float) 0.12498899339968611803, (float) 0.38065885268563131927, (float) 0.31883491321310506562,
165         (float) 0.10486838686563442765, (float)-0.03105361685110374845, (float)-0.06450524884075370758, (float)-0.02939198261121969816,
166
167         (float) 1.78827593892108555290, (float) 0.95508210637394326553, (float)-0.18447626783899924429, (float)-0.44198126506275016437,
168         (float)-0.88404052492547413497, (float)-1.42218907262407452967, (float)-1.02037566838362314995, (float)-0.34861755756425577264,
169         (float)-0.11490230170431934434, (float) 0.12498899339968611803, (float) 0.38065885268563131927, (float) 0.31883491321310506562,
170         (float) 0.10486838686563442765, (float)-0.03105361685110374845, (float)-0.06450524884075370758, (float)-0.02939198261121969816,
171 };
172
173
174 static const float  F44_3 [16 + 32] = {  /* SNR(w) = 15.382598 dB, SNR = -29.402334 dB */
175         (float) 2.89072132015058161445, (float) 2.68932810943698754106, (float) 0.21083359339410251227, (float)-0.98385073324997617515,
176         (float)-1.11047823227097316719, (float)-2.18954076314139673147, (float)-2.36498032881953056225, (float)-0.95484132880101140785,
177         (float)-0.23924057925542965158, (float)-0.13865235703915925642, (float) 0.43587843191057992846, (float) 0.65903257226026665927,
178         (float) 0.24361815372443152787, (float)-0.00235974960154720097, (float) 0.01844166574603346289, (float) 0.01722945988740875099,
179
180         (float) 2.89072132015058161445, (float) 2.68932810943698754106, (float) 0.21083359339410251227, (float)-0.98385073324997617515,
181         (float)-1.11047823227097316719, (float)-2.18954076314139673147, (float)-2.36498032881953056225, (float)-0.95484132880101140785,
182         (float)-0.23924057925542965158, (float)-0.13865235703915925642, (float) 0.43587843191057992846, (float) 0.65903257226026665927,
183         (float) 0.24361815372443152787, (float)-0.00235974960154720097, (float) 0.01844166574603346289, (float) 0.01722945988740875099,
184
185         (float) 2.89072132015058161445, (float) 2.68932810943698754106, (float) 0.21083359339410251227, (float)-0.98385073324997617515,
186         (float)-1.11047823227097316719, (float)-2.18954076314139673147, (float)-2.36498032881953056225, (float)-0.95484132880101140785,
187         (float)-0.23924057925542965158, (float)-0.13865235703915925642, (float) 0.43587843191057992846, (float) 0.65903257226026665927,
188         (float) 0.24361815372443152787, (float)-0.00235974960154720097, (float) 0.01844166574603346289, (float) 0.01722945988740875099
189 };
190
191
192 static double scalar16_(const float* x, const float* y)
193 {
194         return
195                 x[ 0]*y[ 0] + x[ 1]*y[ 1] + x[ 2]*y[ 2] + x[ 3]*y[ 3] +
196                 x[ 4]*y[ 4] + x[ 5]*y[ 5] + x[ 6]*y[ 6] + x[ 7]*y[ 7] +
197                 x[ 8]*y[ 8] + x[ 9]*y[ 9] + x[10]*y[10] + x[11]*y[11] +
198                 x[12]*y[12] + x[13]*y[13] + x[14]*y[14] + x[15]*y[15];
199 }
200
201
202 void FLAC__plugin_common__init_dither_context(DitherContext *d, int bits, int shapingtype)
203 {
204         static unsigned char default_dither [] = { 92, 92, 88, 84, 81, 78, 74, 67,  0,  0 };
205         static const float*               F [] = { F44_0, F44_1, F44_2, F44_3 };
206
207         int index;
208
209         if (shapingtype < 0) shapingtype = 0;
210         if (shapingtype > 3) shapingtype = 3;
211         index = bits - 11 - shapingtype;
212         if (index < 0) index = 0;
213         if (index > 9) index = 9;
214
215         memset ( d->ErrorHistory , 0, sizeof (d->ErrorHistory ) );
216         memset ( d->DitherHistory, 0, sizeof (d->DitherHistory) );
217
218         d->FilterCoeff = F [shapingtype];
219         d->Mask   = ((FLAC__uint64)-1) << (32 - bits);
220         d->Add    = 0.5     * ((1L << (32 - bits)) - 1);
221         d->Dither = 0.01*default_dither[index] / (((FLAC__int64)1) << bits);
222 }
223
224 /*
225  * the following is based on parts of wavegain.c
226  */
227
228 static FLAC__INLINE FLAC__int64 dither_output_(DitherContext *d, FLAC__bool do_dithering, int shapingtype, int i, double Sum, int k)
229 {
230         double doubletmp, Sum2;
231         FLAC__int64 val;
232
233 #define ROUND64(x)   ( doubletmp = (x) + d->Add + (FLAC__int64)0x001FFFFD80000000L, *(FLAC__int64*)(&doubletmp) - (FLAC__int64)0x433FFFFD80000000L )
234
235         if(do_dithering) {
236                 if(shapingtype == 0) {
237                         double  tmp = random_equi_(d->Dither);
238                         Sum2 = tmp - d->LastRandomNumber [k];
239                         d->LastRandomNumber [k] = tmp;
240                         Sum2 = Sum += Sum2;
241                         val = ROUND64(Sum2) & d->Mask;
242                 }
243                 else {
244                         Sum2 = random_triangular_(d->Dither) - scalar16_(d->DitherHistory[k], d->FilterCoeff + i);
245                         Sum += d->DitherHistory [k] [(-1-i)&15] = Sum2;
246                         Sum2 = Sum + scalar16_(d->ErrorHistory [k], d->FilterCoeff + i);
247                         val = ROUND64(Sum2) & d->Mask;
248                         d->ErrorHistory [k] [(-1-i)&15] = (float)(Sum - val);
249                 }
250                 return val;
251         }
252         else
253                 return ROUND64(Sum);
254
255 #undef ROUND64
256 }
257
258 #if 0
259         float        peak = 0.f,
260                      new_peak,
261                      factor_clip
262         double       scale,
263                      dB;
264
265         ...
266
267         peak is in the range -32768.0 .. 32767.0
268
269         /* calculate factors for ReplayGain and ClippingPrevention */
270         *track_gain = GetTitleGain() + settings->man_gain;
271         scale = (float) pow(10., *track_gain * 0.05);
272         if(settings->clip_prev) {
273                 factor_clip  = (float) (32767./( peak + 1));
274                 if(scale < factor_clip)
275                         factor_clip = 1.f;
276                 else
277                         factor_clip /= scale;
278                 scale *= factor_clip;
279         }
280         new_peak = (float) peak * scale;
281
282         dB = 20. * log10(scale);
283         *track_gain = (float) dB;
284
285         const double scale = (float) pow(10., (double)gain * 0.05); /*@@@@ why downcast pow() output to float? */
286 #endif
287
288
289 int FLAC__plugin_common__apply_gain(FLAC__byte *data_out, FLAC__int32 *input, unsigned wide_samples, unsigned channels, const unsigned source_bps, const unsigned target_bps, const float scale, const FLAC__bool hard_limit, FLAC__bool do_dithering, NoiseShaping noise_shaping, DitherContext *dither_context)
290 {
291         static const FLAC__int32 conv_factors_[33] = {
292                 -1, /* 0 bits-per-sample (not supported) */
293                 -1, /* 1 bits-per-sample (not supported) */
294                 -1, /* 2 bits-per-sample (not supported) */
295                 -1, /* 3 bits-per-sample (not supported) */
296                 268435456, /* 4 bits-per-sample */
297                 134217728, /* 5 bits-per-sample */
298                 67108864, /* 6 bits-per-sample */
299                 33554432, /* 7 bits-per-sample */
300                 16777216, /* 8 bits-per-sample */
301                 8388608, /* 9 bits-per-sample */
302                 4194304, /* 10 bits-per-sample */
303                 2097152, /* 11 bits-per-sample */
304                 1048576, /* 12 bits-per-sample */
305                 524288, /* 13 bits-per-sample */
306                 262144, /* 14 bits-per-sample */
307                 131072, /* 15 bits-per-sample */
308                 65536, /* 16 bits-per-sample */
309                 32768, /* 17 bits-per-sample */
310                 16384, /* 18 bits-per-sample */
311                 8192, /* 19 bits-per-sample */
312                 4096, /* 20 bits-per-sample */
313                 2048, /* 21 bits-per-sample */
314                 1024, /* 22 bits-per-sample */
315                 512, /* 23 bits-per-sample */
316                 256, /* 24 bits-per-sample */
317                 128, /* 25 bits-per-sample */
318                 64, /* 26 bits-per-sample */
319                 32, /* 27 bits-per-sample */
320                 16, /* 28 bits-per-sample */
321                 8, /* 29 bits-per-sample */
322                 4, /* 30 bits-per-sample */
323                 2, /* 31 bits-per-sample */
324                 1 /* 32 bits-per-sample */
325         };
326         static const FLAC__int64 hard_clip_factors_[33] = {
327                 0, /* 0 bits-per-sample (not supported) */
328                 0, /* 1 bits-per-sample (not supported) */
329                 0, /* 2 bits-per-sample (not supported) */
330                 0, /* 3 bits-per-sample (not supported) */
331                 -8, /* 4 bits-per-sample */
332                 -16, /* 5 bits-per-sample */
333                 -32, /* 6 bits-per-sample */
334                 -64, /* 7 bits-per-sample */
335                 -128, /* 8 bits-per-sample */
336                 -256, /* 9 bits-per-sample */
337                 -512, /* 10 bits-per-sample */
338                 -1024, /* 11 bits-per-sample */
339                 -2048, /* 12 bits-per-sample */
340                 -4096, /* 13 bits-per-sample */
341                 -8192, /* 14 bits-per-sample */
342                 -16384, /* 15 bits-per-sample */
343                 -32768, /* 16 bits-per-sample */
344                 -65536, /* 17 bits-per-sample */
345                 -131072, /* 18 bits-per-sample */
346                 -262144, /* 19 bits-per-sample */
347                 -524288, /* 20 bits-per-sample */
348                 -1048576, /* 21 bits-per-sample */
349                 -2097152, /* 22 bits-per-sample */
350                 -4194304, /* 23 bits-per-sample */
351                 -8388608, /* 24 bits-per-sample */
352                 -16777216, /* 25 bits-per-sample */
353                 -33554432, /* 26 bits-per-sample */
354                 -67108864, /* 27 bits-per-sample */
355                 -134217728, /* 28 bits-per-sample */
356                 -268435456, /* 29 bits-per-sample */
357                 -536870912, /* 30 bits-per-sample */
358                 -1073741824, /* 31 bits-per-sample */
359                 (FLAC__int64)(-1073741824) * 2 /* 32 bits-per-sample */
360         };
361         const FLAC__int32 conv_factor = conv_factors_[source_bps];
362         const FLAC__int64 hard_clip_factor = hard_clip_factors_[source_bps];
363         /*
364          * The integer input coming in has a varying range based on the
365          * source_bps.  We want to normalize it to [-1.0, 1.0) so instead
366          * of doing two multiplies on each sample, we just multiple
367          * 'scale' by 1/(2^(source_bps-1))
368          */
369         const double multi_scale = scale / (double)(1u << (source_bps-1));
370
371         FLAC__byte * const start = data_out;
372         const unsigned samples = wide_samples * channels;
373 #ifdef FLAC__PLUGIN_COMMON__DONT_UNROLL
374         const unsigned dither_twiggle = channels - 1;
375         unsigned dither_source = 0;
376 #endif
377         unsigned i;
378         int coeff;
379         double sample;
380
381         FLAC__ASSERT(FLAC_PLUGIN__MAX_SUPPORTED_CHANNELS == 2);
382         FLAC__ASSERT(channels > 0 && channels <= FLAC_PLUGIN__MAX_SUPPORTED_CHANNELS);
383         FLAC__ASSERT(source_bps >= 4);
384         FLAC__ASSERT(target_bps >= 4);
385         FLAC__ASSERT(source_bps <= 32);
386         FLAC__ASSERT(target_bps < 32);
387         FLAC__ASSERT((target_bps & 7) == 0);
388
389 #ifdef FLAC__PLUGIN_COMMON__DONT_UNROLL
390         /*
391          * This flavor handles 1 or 2 channels with the same code
392          */
393         coeff = 0;
394         for(i = 0; i < samples; i++, coeff++) {
395                 sample = (double)input[i] * multi_scale;
396
397                 if(hard_limit) {
398                         /* hard 6dB limiting */
399                         if(sample < -0.5)
400                                 sample = tanh((sample + 0.5) / (1-0.5)) * (1-0.5) - 0.5;
401                         else if(sample > 0.5)
402                                 sample = tanh((sample - 0.5) / (1-0.5)) * (1-0.5) + 0.5;
403                 }
404                 sample *= 2147483647.f;
405
406                 {
407                         FLAC__int64 val64;
408                         FLAC__int32 val32;
409
410                         if(coeff >= (32<<dither_twiggle))
411                                 coeff = 0;
412
413                         /* 'coeff>>dither_twiggle' is the same as 'coeff/channels' */
414                         val64 = dither_output_(dither_context, do_dithering, noise_shaping, coeff>>dither_twiggle, sample, dither_source) / conv_factor;
415
416                         dither_source ^= dither_twiggle;
417
418                         val32 = (FLAC__int32)val64;
419                         if(val64 >= -hard_clip_factor)
420                                 val32 = (FLAC__int32)(-(hard_clip_factor+1));
421                         else if(val64 < hard_clip_factor)
422                                 val32 = (FLAC__int32)hard_clip_factor;
423
424                         switch(target_bps) {
425                                 case 8:
426                                         data_out[0] = val32 ^ 0x80;
427                                         break;
428                                 case 24:
429                                         data_out[2] = (FLAC__byte)(val32 >> 16);
430                                         /* fall through */
431                                 case 16:
432                                         data_out[1] = (FLAC__byte)(val32 >> 8);
433                                         data_out[0] = (FLAC__byte)val32;
434                         }
435                 }
436
437                 data_out += target_bps/8;
438         }
439 #else
440         /*
441          * This flavor has optimized versions for 1 or 2 channels
442          */
443         if(channels == 2) {
444                 FLAC__int64 val64;
445                 FLAC__int32 val32;
446
447                 coeff = 0;
448                 for(i = 0; i < samples; ) {
449                         sample = (double)input[i] * multi_scale;
450
451                         if(hard_limit) {
452                                 /* hard 6dB limiting */
453                                 if(sample < -0.5)
454                                         sample = tanh((sample + 0.5) / (1-0.5)) * (1-0.5) - 0.5;
455                                 else if(sample > 0.5)
456                                         sample = tanh((sample - 0.5) / (1-0.5)) * (1-0.5) + 0.5;
457                         }
458                         sample *= 2147483647.f;
459
460                         val64 = dither_output_(dither_context, do_dithering, noise_shaping, coeff, sample, 0) / conv_factor;
461
462                         val32 = (FLAC__int32)val64;
463                         if(val64 >= -hard_clip_factor)
464                                 val32 = (FLAC__int32)(-(hard_clip_factor+1));
465                         else if(val64 < hard_clip_factor)
466                                 val32 = (FLAC__int32)hard_clip_factor;
467
468                         switch(target_bps) {
469                                 case 8:
470                                         data_out[0] = val32 ^ 0x80;
471                                         break;
472                                 case 24:
473                                         data_out[2] = (FLAC__byte)(val32 >> 16);
474                                         /* fall through */
475                                 case 16:
476                                         data_out[1] = (FLAC__byte)(val32 >> 8);
477                                         data_out[0] = (FLAC__byte)val32;
478                         }
479
480                         data_out += target_bps/8;
481
482                         i++;
483
484                         sample = (double)input[i] * multi_scale;
485
486                         if(hard_limit) {
487                                 /* hard 6dB limiting */
488                                 if(sample < -0.5)
489                                         sample = tanh((sample + 0.5) / (1-0.5)) * (1-0.5) - 0.5;
490                                 else if(sample > 0.5)
491                                         sample = tanh((sample - 0.5) / (1-0.5)) * (1-0.5) + 0.5;
492                         }
493                         sample *= 2147483647.f;
494
495                         val64 = dither_output_(dither_context, do_dithering, noise_shaping, coeff, sample, 1) / conv_factor;
496
497                         val32 = (FLAC__int32)val64;
498                         if(val64 >= -hard_clip_factor)
499                                 val32 = (FLAC__int32)(-(hard_clip_factor+1));
500                         else if(val64 < hard_clip_factor)
501                                 val32 = (FLAC__int32)hard_clip_factor;
502
503                         switch(target_bps) {
504                                 case 8:
505                                         data_out[0] = val32 ^ 0x80;
506                                         break;
507                                 case 24:
508                                         data_out[2] = (FLAC__byte)(val32 >> 16);
509                                         /* fall through */
510                                 case 16:
511                                         data_out[1] = (FLAC__byte)(val32 >> 8);
512                                         data_out[0] = (FLAC__byte)val32;
513                         }
514
515                         data_out += target_bps/8;
516
517                         i++;
518                         coeff++;
519                         if(coeff >= 32)
520                                 coeff = 0;
521                 }
522         }
523         else {
524                 FLAC__int64 val64;
525                 FLAC__int32 val32;
526
527                 coeff = 0;
528                 for(i = 0; i < samples; i++, coeff++) {
529                         if(coeff >= 32)
530                                 coeff = 0;
531
532                         sample = (double)input[i] * multi_scale;
533
534                         if(hard_limit) {
535                                 /* hard 6dB limiting */
536                                 if(sample < -0.5)
537                                         sample = tanh((sample + 0.5) / (1-0.5)) * (1-0.5) - 0.5;
538                                 else if(sample > 0.5)
539                                         sample = tanh((sample - 0.5) / (1-0.5)) * (1-0.5) + 0.5;
540                         }
541                         sample *= 2147483647.f;
542
543                         val64 = dither_output_(dither_context, do_dithering, noise_shaping, coeff, sample, 0) / conv_factor;
544
545                         val32 = (FLAC__int32)val64;
546                         if(val64 >= -hard_clip_factor)
547                                 val32 = (FLAC__int32)(-(hard_clip_factor+1));
548                         else if(val64 < hard_clip_factor)
549                                 val32 = (FLAC__int32)hard_clip_factor;
550
551                         switch(target_bps) {
552                                 case 8:
553                                         data_out[0] = val32 ^ 0x80;
554                                         break;
555                                 case 24:
556                                         data_out[2] = (FLAC__byte)(val32 >> 16);
557                                         /* fall through */
558                                 case 16:
559                                         data_out[1] = (FLAC__byte)(val32 >> 8);
560                                         data_out[0] = (FLAC__byte)val32;
561                         }
562
563                         data_out += target_bps/8;
564                 }
565         }
566 #endif
567
568         return data_out - start;
569 }