src/plugin_common/replaygain_synthesis.c

   1 /* plugin_common - Routines common to several plugins
   2  * Copyright (C) 2002,2003  Josh Coalson
   3  *
   4  * This program is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU General Public License
   6  * as published by the Free Software Foundation; either version 2
   7  * of the License, or (at your option) any later version.
   8  *
   9  * This program is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program; if not, write to the Free Software
  16  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  17  */
  18 /*
  19  * This is an aggregation of pieces of code from John Edwards' WaveGain
  20  * program.  Mostly cosmetic changes were made; otherwise, the dithering
  21  * code is almost untouched and the gain processing was converted from
  22  * processing a whole file to processing chunks of samples.
  23  *
  24  * The original copyright notices for WaveGain's dither.c and wavegain.c
  25  * appear below:
  26  */
  27 /*
  28  * (c) 2002 John Edwards
  29  * mostly lifted from work by Frank Klemm
  30  * random functions for dithering.
  31  */
  32 /*
  33  * Copyright (C) 2002 John Edwards
  34  * Additional code by Magnus Holmgren and Gian-Carlo Pascutto
  35  */
  36
  37 #include <string.h> /* for memset() */
  38 #include <math.h>
  39 #include "private/fast_float_math_hack.h"
  40 #include "replaygain_synthesis.h"
  41 #include "FLAC/assert.h"
  42
  43 #if defined _MSC_VER
  44 #define FLAC__INLINE __inline
  45 #else
  46 #define FLAC__INLINE
  47 #endif
  48
  49
  50 /*
  51  * the following is based on parts of dither.c
  52  */
  53
  54
  55 /*
  56  *  This is a simple random number generator with good quality for audio purposes.
  57  *  It consists of two polycounters with opposite rotation direction and different
  58  *  periods. The periods are coprime, so the total period is the product of both.
  59  *
  60  *     -------------------------------------------------------------------------------------------------
  61  * +-> |31:30:29:28:27:26:25:24:23:22:21:20:19:18:17:16:15:14:13:12:11:10: 9: 8: 7: 6: 5: 4: 3: 2: 1: 0|
  62  * |   -------------------------------------------------------------------------------------------------
  63  * |                                                                          |  |  |  |     |        |
  64  * |                                                                          +--+--+--+-XOR-+--------+
  65  * |                                                                                      |
  66  * +--------------------------------------------------------------------------------------+
  67  *
  68  *     -------------------------------------------------------------------------------------------------
  69  *     |31:30:29:28:27:26:25:24:23:22:21:20:19:18:17:16:15:14:13:12:11:10: 9: 8: 7: 6: 5: 4: 3: 2: 1: 0| <-+
  70  *     -------------------------------------------------------------------------------------------------   |
  71  *       |  |           |  |                                                                               |
  72  *       +--+----XOR----+--+                                                                               |
  73  *                |                                                                                        |
  74  *                +----------------------------------------------------------------------------------------+
  75  *
  76  *
  77  *  The first has an period of 3*5*17*257*65537, the second of 7*47*73*178481,
  78  *  which gives a period of 18.410.713.077.675.721.215. The result is the
  79  *  XORed values of both generators.
  80  */
  81
  82 static unsigned int random_int_()
  83 {
  84         static const unsigned char parity_[256] = {
  85                 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
  86                 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
  87                 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
  88                 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
  89                 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
  90                 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
  91                 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
  92                 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0
  93         };
  94         static unsigned int r1_ = 1;
  95         static unsigned int r2_ = 1;
  96
  97         unsigned int t1, t2, t3, t4;
  98
  99         /* Parity calculation is done via table lookup, this is also available
 100          * on CPUs without parity, can be implemented in C and avoid unpredictable
 101          * jumps and slow rotate through the carry flag operations.
 102          */
 103         t3   = t1 = r1_;    t4   = t2 = r2_;
 104         t1  &= 0xF5;        t2 >>= 25;
 105         t1   = parity_[t1]; t2  &= 0x63;
 106         t1 <<= 31;          t2   = parity_[t2];
 107
 108         return (r1_ = (t3 >> 1) | t1 ) ^ (r2_ = (t4 + t4) | t2 );
 109 }
 110
 111 /* gives a equal distributed random number */
 112 /* between -2^31*mult and +2^31*mult */
 113 static double random_equi_(double mult)
 114 {
 115         return mult * (int) random_int_();
 116 }
 117
 118 /* gives a triangular distributed random number */
 119 /* between -2^32*mult and +2^32*mult */
 120 static double random_triangular_(double mult)
 121 {
 122         return mult * ( (double) (int) random_int_() + (double) (int) random_int_() );
 123 }
 124
 125
 126 static const float  F44_0 [16 + 32] = {
 127         (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0,
 128         (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0,
 129
 130         (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0,
 131         (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0,
 132
 133         (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0,
 134         (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0, (float)0
 135 };
 136
 137
 138 static const float  F44_1 [16 + 32] = {  /* SNR(w) = 4.843163 dB, SNR = -3.192134 dB */
 139         (float) 0.85018292704024355931, (float) 0.29089597350995344721, (float)-0.05021866022121039450, (float)-0.23545456294599161833,
 140         (float)-0.58362726442227032096, (float)-0.67038978965193036429, (float)-0.38566861572833459221, (float)-0.15218663390367969967,
 141         (float)-0.02577543084864530676, (float) 0.14119295297688728127, (float) 0.22398848581628781612, (float) 0.15401727203382084116,
 142         (float) 0.05216161232906000929, (float)-0.00282237820999675451, (float)-0.03042794608323867363, (float)-0.03109780942998826024,
 143
 144         (float) 0.85018292704024355931, (float) 0.29089597350995344721, (float)-0.05021866022121039450, (float)-0.23545456294599161833,
 145         (float)-0.58362726442227032096, (float)-0.67038978965193036429, (float)-0.38566861572833459221, (float)-0.15218663390367969967,
 146         (float)-0.02577543084864530676, (float) 0.14119295297688728127, (float) 0.22398848581628781612, (float) 0.15401727203382084116,
 147         (float) 0.05216161232906000929, (float)-0.00282237820999675451, (float)-0.03042794608323867363, (float)-0.03109780942998826024,
 148
 149         (float) 0.85018292704024355931, (float) 0.29089597350995344721, (float)-0.05021866022121039450, (float)-0.23545456294599161833,
 150         (float)-0.58362726442227032096, (float)-0.67038978965193036429, (float)-0.38566861572833459221, (float)-0.15218663390367969967,
 151         (float)-0.02577543084864530676, (float) 0.14119295297688728127, (float) 0.22398848581628781612, (float) 0.15401727203382084116,
 152         (float) 0.05216161232906000929, (float)-0.00282237820999675451, (float)-0.03042794608323867363, (float)-0.03109780942998826024,
 153 };
 154
 155
 156 static const float  F44_2 [16 + 32] = {  /* SNR(w) = 10.060213 dB, SNR = -12.766730 dB */
 157         (float) 1.78827593892108555290, (float) 0.95508210637394326553, (float)-0.18447626783899924429, (float)-0.44198126506275016437,
 158         (float)-0.88404052492547413497, (float)-1.42218907262407452967, (float)-1.02037566838362314995, (float)-0.34861755756425577264,
 159         (float)-0.11490230170431934434, (float) 0.12498899339968611803, (float) 0.38065885268563131927, (float) 0.31883491321310506562,
 160         (float) 0.10486838686563442765, (float)-0.03105361685110374845, (float)-0.06450524884075370758, (float)-0.02939198261121969816,
 161
 162         (float) 1.78827593892108555290, (float) 0.95508210637394326553, (float)-0.18447626783899924429, (float)-0.44198126506275016437,
 163         (float)-0.88404052492547413497, (float)-1.42218907262407452967, (float)-1.02037566838362314995, (float)-0.34861755756425577264,
 164         (float)-0.11490230170431934434, (float) 0.12498899339968611803, (float) 0.38065885268563131927, (float) 0.31883491321310506562,
 165         (float) 0.10486838686563442765, (float)-0.03105361685110374845, (float)-0.06450524884075370758, (float)-0.02939198261121969816,
 166
 167         (float) 1.78827593892108555290, (float) 0.95508210637394326553, (float)-0.18447626783899924429, (float)-0.44198126506275016437,
 168         (float)-0.88404052492547413497, (float)-1.42218907262407452967, (float)-1.02037566838362314995, (float)-0.34861755756425577264,
 169         (float)-0.11490230170431934434, (float) 0.12498899339968611803, (float) 0.38065885268563131927, (float) 0.31883491321310506562,
 170         (float) 0.10486838686563442765, (float)-0.03105361685110374845, (float)-0.06450524884075370758, (float)-0.02939198261121969816,
 171 };
 172
 173
 174 static const float  F44_3 [16 + 32] = {  /* SNR(w) = 15.382598 dB, SNR = -29.402334 dB */
 175         (float) 2.89072132015058161445, (float) 2.68932810943698754106, (float) 0.21083359339410251227, (float)-0.98385073324997617515,
 176         (float)-1.11047823227097316719, (float)-2.18954076314139673147, (float)-2.36498032881953056225, (float)-0.95484132880101140785,
 177         (float)-0.23924057925542965158, (float)-0.13865235703915925642, (float) 0.43587843191057992846, (float) 0.65903257226026665927,
 178         (float) 0.24361815372443152787, (float)-0.00235974960154720097, (float) 0.01844166574603346289, (float) 0.01722945988740875099,
 179
 180         (float) 2.89072132015058161445, (float) 2.68932810943698754106, (float) 0.21083359339410251227, (float)-0.98385073324997617515,
 181         (float)-1.11047823227097316719, (float)-2.18954076314139673147, (float)-2.36498032881953056225, (float)-0.95484132880101140785,
 182         (float)-0.23924057925542965158, (float)-0.13865235703915925642, (float) 0.43587843191057992846, (float) 0.65903257226026665927,
 183         (float) 0.24361815372443152787, (float)-0.00235974960154720097, (float) 0.01844166574603346289, (float) 0.01722945988740875099,
 184
 185         (float) 2.89072132015058161445, (float) 2.68932810943698754106, (float) 0.21083359339410251227, (float)-0.98385073324997617515,
 186         (float)-1.11047823227097316719, (float)-2.18954076314139673147, (float)-2.36498032881953056225, (float)-0.95484132880101140785,
 187         (float)-0.23924057925542965158, (float)-0.13865235703915925642, (float) 0.43587843191057992846, (float) 0.65903257226026665927,
 188         (float) 0.24361815372443152787, (float)-0.00235974960154720097, (float) 0.01844166574603346289, (float) 0.01722945988740875099
 189 };
 190
 191
 192 static double scalar16_(const float* x, const float* y)
 193 {
 194         return
 195                 x[ 0]*y[ 0] + x[ 1]*y[ 1] + x[ 2]*y[ 2] + x[ 3]*y[ 3] +
 196                 x[ 4]*y[ 4] + x[ 5]*y[ 5] + x[ 6]*y[ 6] + x[ 7]*y[ 7] +
 197                 x[ 8]*y[ 8] + x[ 9]*y[ 9] + x[10]*y[10] + x[11]*y[11] +
 198                 x[12]*y[12] + x[13]*y[13] + x[14]*y[14] + x[15]*y[15];
 199 }
 200
 201
 202 void FLAC__plugin_common__init_dither_context(DitherContext *d, int bits, int shapingtype)
 203 {
 204         static unsigned char default_dither [] = { 92, 92, 88, 84, 81, 78, 74, 67,  0,  0 };
 205         static const float*               F [] = { F44_0, F44_1, F44_2, F44_3 };
 206
 207         int index;
 208
 209         if (shapingtype < 0) shapingtype = 0;
 210         if (shapingtype > 3) shapingtype = 3;
 211         index = bits - 11 - shapingtype;
 212         if (index < 0) index = 0;
 213         if (index > 9) index = 9;
 214
 215         memset ( d->ErrorHistory , 0, sizeof (d->ErrorHistory ) );
 216         memset ( d->DitherHistory, 0, sizeof (d->DitherHistory) );
 217
 218         d->FilterCoeff = F [shapingtype];
 219         d->Mask   = ((FLAC__uint64)-1) << (32 - bits);
 220         d->Add    = 0.5     * ((1L << (32 - bits)) - 1);
 221         d->Dither = 0.01*default_dither[index] / (((FLAC__int64)1) << bits);
 222 }
 223
 224 /*
 225  * the following is based on parts of wavegain.c
 226  */
 227
 228 static FLAC__INLINE FLAC__int64 dither_output_(DitherContext *d, FLAC__bool do_dithering, int shapingtype, int i, double Sum, int k)
 229 {
 230         double doubletmp, Sum2;
 231         FLAC__int64 val;
 232
 233 #define ROUND64(x)   ( doubletmp = (x) + d->Add + (FLAC__int64)0x001FFFFD80000000L, *(FLAC__int64*)(&doubletmp) - (FLAC__int64)0x433FFFFD80000000L )
 234
 235         if(do_dithering) {
 236                 if(shapingtype == 0) {
 237                         double  tmp = random_equi_(d->Dither);
 238                         Sum2 = tmp - d->LastRandomNumber [k];
 239                         d->LastRandomNumber [k] = tmp;
 240                         Sum2 = Sum += Sum2;
 241                         val = ROUND64(Sum2) & d->Mask;
 242                 }
 243                 else {
 244                         Sum2 = random_triangular_(d->Dither) - scalar16_(d->DitherHistory[k], d->FilterCoeff + i);
 245                         Sum += d->DitherHistory [k] [(-1-i)&15] = Sum2;
 246                         Sum2 = Sum + scalar16_(d->ErrorHistory [k], d->FilterCoeff + i);
 247                         val = ROUND64(Sum2) & d->Mask;
 248                         d->ErrorHistory [k] [(-1-i)&15] = (float)(Sum - val);
 249                 }
 250                 return val;
 251         }
 252         else
 253                 return ROUND64(Sum);
 254
 255 #undef ROUND64
 256 }
 257
 258 #if 0
 259         float        peak = 0.f,
 260                      new_peak,
 261                      factor_clip
 262         double       scale,
 263                      dB;
 264
 265         ...
 266
 267         peak is in the range -32768.0 .. 32767.0
 268
 269         /* calculate factors for ReplayGain and ClippingPrevention */
 270         *track_gain = GetTitleGain() + settings->man_gain;
 271         scale = (float) pow(10., *track_gain * 0.05);
 272         if(settings->clip_prev) {
 273                 factor_clip  = (float) (32767./( peak + 1));
 274                 if(scale < factor_clip)
 275                         factor_clip = 1.f;
 276                 else
 277                         factor_clip /= scale;
 278                 scale *= factor_clip;
 279         }
 280         new_peak = (float) peak * scale;
 281
 282         dB = 20. * log10(scale);
 283         *track_gain = (float) dB;
 284
 285         const double scale = (float) pow(10., (double)gain * 0.05); /*@@@@ why downcast pow() output to float? */
 286 #endif
 287
 288
 289 int FLAC__plugin_common__apply_gain(FLAC__byte *data_out, FLAC__int32 *input, unsigned wide_samples, unsigned channels, const unsigned source_bps, const unsigned target_bps, const float scale, const FLAC__bool hard_limit, FLAC__bool do_dithering, NoiseShaping noise_shaping, DitherContext *dither_context)
 290 {
 291         static const FLAC__int32 conv_factors_[33] = {
 292                 -1, /* 0 bits-per-sample (not supported) */
 293                 -1, /* 1 bits-per-sample (not supported) */
 294                 -1, /* 2 bits-per-sample (not supported) */
 295                 -1, /* 3 bits-per-sample (not supported) */
 296                 268435456, /* 4 bits-per-sample */
 297                 134217728, /* 5 bits-per-sample */
 298                 67108864, /* 6 bits-per-sample */
 299                 33554432, /* 7 bits-per-sample */
 300                 16777216, /* 8 bits-per-sample */
 301                 8388608, /* 9 bits-per-sample */
 302                 4194304, /* 10 bits-per-sample */
 303                 2097152, /* 11 bits-per-sample */
 304                 1048576, /* 12 bits-per-sample */
 305                 524288, /* 13 bits-per-sample */
 306                 262144, /* 14 bits-per-sample */
 307                 131072, /* 15 bits-per-sample */
 308                 65536, /* 16 bits-per-sample */
 309                 32768, /* 17 bits-per-sample */
 310                 16384, /* 18 bits-per-sample */
 311                 8192, /* 19 bits-per-sample */
 312                 4096, /* 20 bits-per-sample */
 313                 2048, /* 21 bits-per-sample */
 314                 1024, /* 22 bits-per-sample */
 315                 512, /* 23 bits-per-sample */
 316                 256, /* 24 bits-per-sample */
 317                 128, /* 25 bits-per-sample */
 318                 64, /* 26 bits-per-sample */
 319                 32, /* 27 bits-per-sample */
 320                 16, /* 28 bits-per-sample */
 321                 8, /* 29 bits-per-sample */
 322                 4, /* 30 bits-per-sample */
 323                 2, /* 31 bits-per-sample */
 324                 1 /* 32 bits-per-sample */
 325         };
 326         static const FLAC__int64 hard_clip_factors_[33] = {
 327                 0, /* 0 bits-per-sample (not supported) */
 328                 0, /* 1 bits-per-sample (not supported) */
 329                 0, /* 2 bits-per-sample (not supported) */
 330                 0, /* 3 bits-per-sample (not supported) */
 331                 -8, /* 4 bits-per-sample */
 332                 -16, /* 5 bits-per-sample */
 333                 -32, /* 6 bits-per-sample */
 334                 -64, /* 7 bits-per-sample */
 335                 -128, /* 8 bits-per-sample */
 336                 -256, /* 9 bits-per-sample */
 337                 -512, /* 10 bits-per-sample */
 338                 -1024, /* 11 bits-per-sample */
 339                 -2048, /* 12 bits-per-sample */
 340                 -4096, /* 13 bits-per-sample */
 341                 -8192, /* 14 bits-per-sample */
 342                 -16384, /* 15 bits-per-sample */
 343                 -32768, /* 16 bits-per-sample */
 344                 -65536, /* 17 bits-per-sample */
 345                 -131072, /* 18 bits-per-sample */
 346                 -262144, /* 19 bits-per-sample */
 347                 -524288, /* 20 bits-per-sample */
 348                 -1048576, /* 21 bits-per-sample */
 349                 -2097152, /* 22 bits-per-sample */
 350                 -4194304, /* 23 bits-per-sample */
 351                 -8388608, /* 24 bits-per-sample */
 352                 -16777216, /* 25 bits-per-sample */
 353                 -33554432, /* 26 bits-per-sample */
 354                 -67108864, /* 27 bits-per-sample */
 355                 -134217728, /* 28 bits-per-sample */
 356                 -268435456, /* 29 bits-per-sample */
 357                 -536870912, /* 30 bits-per-sample */
 358                 -1073741824, /* 31 bits-per-sample */
 359                 (FLAC__int64)(-1073741824) * 2 /* 32 bits-per-sample */
 360         };
 361         const FLAC__int32 conv_factor = conv_factors_[source_bps];
 362         const FLAC__int64 hard_clip_factor = hard_clip_factors_[source_bps];
 363         /*
 364          * The integer input coming in has a varying range based on the
 365          * source_bps.  We want to normalize it to [-1.0, 1.0) so instead
 366          * of doing two multiplies on each sample, we just multiple
 367          * 'scale' by 1/(2^(source_bps-1))
 368          */
 369         const double multi_scale = scale / (double)(1u << (source_bps-1));
 370
 371         FLAC__byte * const start = data_out;
 372         const unsigned samples = wide_samples * channels;
 373 #ifdef FLAC__PLUGIN_COMMON__DONT_UNROLL
 374         const unsigned dither_twiggle = channels - 1;
 375         unsigned dither_source = 0;
 376 #endif
 377         unsigned i;
 378         int coeff;
 379         double sample;
 380
 381         FLAC__ASSERT(FLAC_PLUGIN__MAX_SUPPORTED_CHANNELS == 2);
 382         FLAC__ASSERT(channels > 0 && channels <= FLAC_PLUGIN__MAX_SUPPORTED_CHANNELS);
 383         FLAC__ASSERT(source_bps >= 4);
 384         FLAC__ASSERT(target_bps >= 4);
 385         FLAC__ASSERT(source_bps <= 32);
 386         FLAC__ASSERT(target_bps < 32);
 387         FLAC__ASSERT((target_bps & 7) == 0);
 388
 389 #ifdef FLAC__PLUGIN_COMMON__DONT_UNROLL
 390         /*
 391          * This flavor handles 1 or 2 channels with the same code
 392          */
 393         coeff = 0;
 394         for(i = 0; i < samples; i++, coeff++) {
 395                 sample = (double)input[i] * multi_scale;
 396
 397                 if(hard_limit) {
 398                         /* hard 6dB limiting */
 399                         if(sample < -0.5)
 400                                 sample = tanh((sample + 0.5) / (1-0.5)) * (1-0.5) - 0.5;
 401                         else if(sample > 0.5)
 402                                 sample = tanh((sample - 0.5) / (1-0.5)) * (1-0.5) + 0.5;
 403                 }
 404                 sample *= 2147483647.f;
 405
 406                 {
 407                         FLAC__int64 val64;
 408                         FLAC__int32 val32;
 409
 410                         if(coeff >= (32<<dither_twiggle))
 411                                 coeff = 0;
 412
 413                         /* 'coeff>>dither_twiggle' is the same as 'coeff/channels' */
 414                         val64 = dither_output_(dither_context, do_dithering, noise_shaping, coeff>>dither_twiggle, sample, dither_source) / conv_factor;
 415
 416                         dither_source ^= dither_twiggle;
 417
 418                         val32 = (FLAC__int32)val64;
 419                         if(val64 >= -hard_clip_factor)
 420                                 val32 = (FLAC__int32)(-(hard_clip_factor+1));
 421                         else if(val64 < hard_clip_factor)
 422                                 val32 = (FLAC__int32)hard_clip_factor;
 423
 424                         switch(target_bps) {
 425                                 case 8:
 426                                         data_out[0] = val32 ^ 0x80;
 427                                         break;
 428                                 case 24:
 429                                         data_out[2] = (FLAC__byte)(val32 >> 16);
 430                                         /* fall through */
 431                                 case 16:
 432                                         data_out[1] = (FLAC__byte)(val32 >> 8);
 433                                         data_out[0] = (FLAC__byte)val32;
 434                         }
 435                 }
 436
 437                 data_out += target_bps/8;
 438         }
 439 #else
 440         /*
 441          * This flavor has optimized versions for 1 or 2 channels
 442          */
 443         if(channels == 2) {
 444                 FLAC__int64 val64;
 445                 FLAC__int32 val32;
 446
 447                 coeff = 0;
 448                 for(i = 0; i < samples; ) {
 449                         sample = (double)input[i] * multi_scale;
 450
 451                         if(hard_limit) {
 452                                 /* hard 6dB limiting */
 453                                 if(sample < -0.5)
 454                                         sample = tanh((sample + 0.5) / (1-0.5)) * (1-0.5) - 0.5;
 455                                 else if(sample > 0.5)
 456                                         sample = tanh((sample - 0.5) / (1-0.5)) * (1-0.5) + 0.5;
 457                         }
 458                         sample *= 2147483647.f;
 459
 460                         val64 = dither_output_(dither_context, do_dithering, noise_shaping, coeff, sample, 0) / conv_factor;
 461
 462                         val32 = (FLAC__int32)val64;
 463                         if(val64 >= -hard_clip_factor)
 464                                 val32 = (FLAC__int32)(-(hard_clip_factor+1));
 465                         else if(val64 < hard_clip_factor)
 466                                 val32 = (FLAC__int32)hard_clip_factor;
 467
 468                         switch(target_bps) {
 469                                 case 8:
 470                                         data_out[0] = val32 ^ 0x80;
 471                                         break;
 472                                 case 24:
 473                                         data_out[2] = (FLAC__byte)(val32 >> 16);
 474                                         /* fall through */
 475                                 case 16:
 476                                         data_out[1] = (FLAC__byte)(val32 >> 8);
 477                                         data_out[0] = (FLAC__byte)val32;
 478                         }
 479
 480                         data_out += target_bps/8;
 481
 482                         i++;
 483
 484                         sample = (double)input[i] * multi_scale;
 485
 486                         if(hard_limit) {
 487                                 /* hard 6dB limiting */
 488                                 if(sample < -0.5)
 489                                         sample = tanh((sample + 0.5) / (1-0.5)) * (1-0.5) - 0.5;
 490                                 else if(sample > 0.5)
 491                                         sample = tanh((sample - 0.5) / (1-0.5)) * (1-0.5) + 0.5;
 492                         }
 493                         sample *= 2147483647.f;
 494
 495                         val64 = dither_output_(dither_context, do_dithering, noise_shaping, coeff, sample, 1) / conv_factor;
 496
 497                         val32 = (FLAC__int32)val64;
 498                         if(val64 >= -hard_clip_factor)
 499                                 val32 = (FLAC__int32)(-(hard_clip_factor+1));
 500                         else if(val64 < hard_clip_factor)
 501                                 val32 = (FLAC__int32)hard_clip_factor;
 502
 503                         switch(target_bps) {
 504                                 case 8:
 505                                         data_out[0] = val32 ^ 0x80;
 506                                         break;
 507                                 case 24:
 508                                         data_out[2] = (FLAC__byte)(val32 >> 16);
 509                                         /* fall through */
 510                                 case 16:
 511                                         data_out[1] = (FLAC__byte)(val32 >> 8);
 512                                         data_out[0] = (FLAC__byte)val32;
 513                         }
 514
 515                         data_out += target_bps/8;
 516
 517                         i++;
 518                         coeff++;
 519                         if(coeff >= 32)
 520                                 coeff = 0;
 521                 }
 522         }
 523         else {
 524                 FLAC__int64 val64;
 525                 FLAC__int32 val32;
 526
 527                 coeff = 0;
 528                 for(i = 0; i < samples; i++, coeff++) {
 529                         if(coeff >= 32)
 530                                 coeff = 0;
 531
 532                         sample = (double)input[i] * multi_scale;
 533
 534                         if(hard_limit) {
 535                                 /* hard 6dB limiting */
 536                                 if(sample < -0.5)
 537                                         sample = tanh((sample + 0.5) / (1-0.5)) * (1-0.5) - 0.5;
 538                                 else if(sample > 0.5)
 539                                         sample = tanh((sample - 0.5) / (1-0.5)) * (1-0.5) + 0.5;
 540                         }
 541                         sample *= 2147483647.f;
 542
 543                         val64 = dither_output_(dither_context, do_dithering, noise_shaping, coeff, sample, 0) / conv_factor;
 544
 545                         val32 = (FLAC__int32)val64;
 546                         if(val64 >= -hard_clip_factor)
 547                                 val32 = (FLAC__int32)(-(hard_clip_factor+1));
 548                         else if(val64 < hard_clip_factor)
 549                                 val32 = (FLAC__int32)hard_clip_factor;
 550
 551                         switch(target_bps) {
 552                                 case 8:
 553                                         data_out[0] = val32 ^ 0x80;
 554                                         break;
 555                                 case 24:
 556                                         data_out[2] = (FLAC__byte)(val32 >> 16);
 557                                         /* fall through */
 558                                 case 16:
 559                                         data_out[1] = (FLAC__byte)(val32 >> 8);
 560                                         data_out[0] = (FLAC__byte)val32;
 561                         }
 562
 563                         data_out += target_bps/8;
 564                 }
 565         }
 566 #endif
 567
 568         return data_out - start;
 569 }