libavcodec/ilbcdec.c

   1 /*
   2  * Copyright (c) 2013, The WebRTC project authors. All rights reserved.
   3  *
   4  * Redistribution and use in source and binary forms, with or without
   5  * modification, are permitted provided that the following conditions are
   6  * met:
   7  *
   8  *   * Redistributions of source code must retain the above copyright
   9  *     notice, this list of conditions and the following disclaimer.
  10  *
  11  *   * Redistributions in binary form must reproduce the above copyright
  12  *     notice, this list of conditions and the following disclaimer in
  13  *     the documentation and/or other materials provided with the
  14  *     distribution.
  15  *
  16  *   * Neither the name of Google nor the names of its contributors may
  17  *     be used to endorse or promote products derived from this software
  18  *     without specific prior written permission.
  19  *
  20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  */
  32
  33 #include "libavutil/channel_layout.h"
  34 #include "avcodec.h"
  35 #include "codec_internal.h"
  36 #include "decode.h"
  37 #include "get_bits.h"
  38 #include "ilbcdata.h"
  39
  40 #define LPC_N_20MS            1
  41 #define LPC_N_30MS            2
  42 #define LPC_N_MAX             2
  43 #define LSF_NSPLIT            3
  44 #define NASUB_MAX             4
  45 #define LPC_FILTERORDER       10
  46 #define NSUB_MAX              6
  47 #define SUBL                  40
  48
  49 #define ST_MEM_L_TBL          85
  50 #define MEM_LF_TBL            147
  51 #define STATE_SHORT_LEN_20MS  57
  52 #define STATE_SHORT_LEN_30MS  58
  53
  54 #define BLOCKL_MAX            240
  55 #define CB_MEML               147
  56 #define CB_NSTAGES            3
  57 #define CB_HALFFILTERLEN      4
  58 #define CB_FILTERLEN          8
  59
  60 #define ENH_NBLOCKS_TOT 8
  61 #define ENH_BLOCKL     80
  62 #define ENH_BUFL     (ENH_NBLOCKS_TOT)*ENH_BLOCKL
  63 #define ENH_BUFL_FILTEROVERHEAD  3
  64 #define BLOCKL_MAX      240
  65 #define NSUB_20MS         4
  66 #define NSUB_30MS         6
  67 #define NSUB_MAX          6
  68 #define NASUB_20MS        2
  69 #define NASUB_30MS        4
  70 #define NASUB_MAX         4
  71 #define STATE_LEN        80
  72 #define STATE_SHORT_LEN_30MS  58
  73 #define STATE_SHORT_LEN_20MS  57
  74
  75 #define SPL_MUL_16_16(a, b) ((int32_t) (((int16_t)(a)) * ((int16_t)(b))))
  76 #define SPL_MUL_16_16_RSFT(a, b, c) (SPL_MUL_16_16(a, b) >> (c))
  77
  78 typedef struct ILBCFrame {
  79     int16_t  lsf[LSF_NSPLIT*LPC_N_MAX];
  80     int16_t  cb_index[CB_NSTAGES*(NASUB_MAX + 1)];
  81     int16_t  gain_index[CB_NSTAGES*(NASUB_MAX + 1)];
  82     int16_t  ifm;
  83     int16_t  state_first;
  84     int16_t  idx[STATE_SHORT_LEN_30MS];
  85     int16_t  firstbits;
  86     int16_t  start;
  87 } ILBCFrame;
  88
  89 typedef struct ILBCContext {
  90     AVClass         *class;
  91     int              enhancer;
  92
  93     int              mode;
  94     ILBCFrame        frame;
  95
  96     int              prev_enh_pl;
  97     int              consPLICount;
  98     int              last_lag;
  99     int              state_short_len;
 100     int              lpc_n;
 101     int16_t          nasub;
 102     int16_t          nsub;
 103     int              block_samples;
 104     int16_t          no_of_words;
 105     int16_t          no_of_bytes;
 106     int16_t          lsfdeq[LPC_FILTERORDER*LPC_N_MAX];
 107     int16_t          lsfold[LPC_FILTERORDER];
 108     int16_t          syntMem[LPC_FILTERORDER];
 109     int16_t          lsfdeqold[LPC_FILTERORDER];
 110     int16_t          weightdenum[(LPC_FILTERORDER + 1) * NSUB_MAX];
 111     int16_t          syntdenum[NSUB_MAX * (LPC_FILTERORDER + 1)];
 112     int16_t          old_syntdenum[NSUB_MAX * (LPC_FILTERORDER + 1)];
 113     int16_t          enh_buf[ENH_BUFL+ENH_BUFL_FILTEROVERHEAD];
 114     int16_t          enh_period[ENH_NBLOCKS_TOT];
 115     int16_t          prevResidual[NSUB_MAX*SUBL];
 116     int16_t          decresidual[BLOCKL_MAX];
 117     int16_t          plc_residual[BLOCKL_MAX + LPC_FILTERORDER];
 118     int16_t          seed;
 119     int16_t          prevPLI;
 120     int16_t          prevScale;
 121     int16_t          prevLag;
 122     int16_t          per_square;
 123     int16_t          prev_lpc[LPC_FILTERORDER + 1];
 124     int16_t          plc_lpc[LPC_FILTERORDER + 1];
 125     int16_t          hpimemx[2];
 126     int16_t          hpimemy[4];
 127 } ILBCContext;
 128
 129 static int unpack_frame(ILBCContext *s, const uint8_t *buf, int size)
 130 {
 131     ILBCFrame *frame = &s->frame;
 132     GetBitContext gb0, *const gb = &gb0;
 133     int j, ret;
 134
 135     if ((ret = init_get_bits8(gb, buf, size)) < 0)
 136         return ret;
 137
 138     frame->lsf[0] = get_bits(gb, 6);
 139     frame->lsf[1] = get_bits(gb, 7);
 140     frame->lsf[2] = get_bits(gb, 7);
 141
 142     if (s->mode == 20) {
 143         frame->start          = get_bits(gb, 2);
 144         frame->state_first    = get_bits1(gb);
 145         frame->ifm            = get_bits(gb, 6);
 146         frame->cb_index[0]    = get_bits(gb, 6) << 1;
 147         frame->gain_index[0]  = get_bits(gb, 2) << 3;
 148         frame->gain_index[1]  = get_bits1(gb) << 3;
 149         frame->cb_index[3]    = get_bits(gb, 7) << 1;
 150         frame->gain_index[3]  = get_bits1(gb) << 4;
 151         frame->gain_index[4]  = get_bits1(gb) << 3;
 152         frame->gain_index[6]  = get_bits1(gb) << 4;
 153     } else {
 154         frame->lsf[3]         = get_bits(gb, 6);
 155         frame->lsf[4]         = get_bits(gb, 7);
 156         frame->lsf[5]         = get_bits(gb, 7);
 157         frame->start          = get_bits(gb, 3);
 158         frame->state_first    = get_bits1(gb);
 159         frame->ifm            = get_bits(gb, 6);
 160         frame->cb_index[0]    = get_bits(gb, 4) << 3;
 161         frame->gain_index[0]  = get_bits1(gb) << 4;
 162         frame->gain_index[1]  = get_bits1(gb) << 3;
 163         frame->cb_index[3]    = get_bits(gb, 6) << 2;
 164         frame->gain_index[3]  = get_bits1(gb) << 4;
 165         frame->gain_index[4]  = get_bits1(gb) << 3;
 166     }
 167
 168     for (j = 0; j < 48; j++)
 169         frame->idx[j] = get_bits1(gb) << 2;
 170
 171     if (s->mode == 20) {
 172         for (; j < 57; j++)
 173             frame->idx[j] = get_bits1(gb) << 2;
 174
 175         frame->gain_index[1] |= get_bits1(gb) << 2;
 176         frame->gain_index[3] |= get_bits(gb, 2) << 2;
 177         frame->gain_index[4] |= get_bits1(gb) << 2;
 178         frame->gain_index[6] |= get_bits1(gb) << 3;
 179         frame->gain_index[7]  = get_bits(gb, 2) << 2;
 180     } else {
 181         for (; j < 58; j++)
 182             frame->idx[j] = get_bits1(gb) << 2;
 183
 184         frame->cb_index[0]    |= get_bits(gb, 2) << 1;
 185         frame->gain_index[0]  |= get_bits1(gb) << 3;
 186         frame->gain_index[1]  |= get_bits1(gb) << 2;
 187         frame->cb_index[3]    |= get_bits1(gb) << 1;
 188         frame->cb_index[6]     = get_bits1(gb) << 7;
 189         frame->cb_index[6]    |= get_bits(gb, 6) << 1;
 190         frame->cb_index[9]     = get_bits(gb, 7) << 1;
 191         frame->cb_index[12]    = get_bits(gb, 3) << 5;
 192         frame->cb_index[12]   |= get_bits(gb, 4) << 1;
 193         frame->gain_index[3]  |= get_bits(gb, 2) << 2;
 194         frame->gain_index[4]  |= get_bits(gb, 2) << 1;
 195         frame->gain_index[6]   = get_bits(gb, 2) << 3;
 196         frame->gain_index[7]   = get_bits(gb, 2) << 2;
 197         frame->gain_index[9]   = get_bits1(gb) << 4;
 198         frame->gain_index[10]  = get_bits1(gb) << 3;
 199         frame->gain_index[12]  = get_bits1(gb) << 4;
 200         frame->gain_index[13]  = get_bits1(gb) << 3;
 201     }
 202
 203     for (j = 0; j < 56; j++)
 204         frame->idx[j] |= get_bits(gb, 2);
 205
 206     if (s->mode == 20) {
 207         frame->idx[56]        |= get_bits(gb, 2);
 208         frame->cb_index[0]    |= get_bits1(gb);
 209         frame->cb_index[1]     = get_bits(gb, 7);
 210         frame->cb_index[2]     = get_bits(gb, 6) << 1;
 211         frame->cb_index[2]    |= get_bits1(gb);
 212         frame->gain_index[0]  |= get_bits(gb, 3);
 213         frame->gain_index[1]  |= get_bits(gb, 2);
 214         frame->gain_index[2]   = get_bits(gb, 3);
 215         frame->cb_index[3]    |= get_bits1(gb);
 216         frame->cb_index[4]     = get_bits(gb, 6) << 1;
 217         frame->cb_index[4]    |= get_bits1(gb);
 218         frame->cb_index[5]     = get_bits(gb, 7);
 219         frame->cb_index[6]     = get_bits(gb, 8);
 220         frame->cb_index[7]     = get_bits(gb, 8);
 221         frame->cb_index[8]     = get_bits(gb, 8);
 222         frame->gain_index[3]  |= get_bits(gb, 2);
 223         frame->gain_index[4]  |= get_bits(gb, 2);
 224         frame->gain_index[5]   = get_bits(gb, 3);
 225         frame->gain_index[6]  |= get_bits(gb, 3);
 226         frame->gain_index[7]  |= get_bits(gb, 2);
 227         frame->gain_index[8]   = get_bits(gb, 3);
 228     } else {
 229         frame->idx[56]        |= get_bits(gb, 2);
 230         frame->idx[57]        |= get_bits(gb, 2);
 231         frame->cb_index[0]    |= get_bits1(gb);
 232         frame->cb_index[1]     = get_bits(gb, 7);
 233         frame->cb_index[2]     = get_bits(gb, 4) << 3;
 234         frame->cb_index[2]    |= get_bits(gb, 3);
 235         frame->gain_index[0]  |= get_bits(gb, 3);
 236         frame->gain_index[1]  |= get_bits(gb, 2);
 237         frame->gain_index[2]   = get_bits(gb, 3);
 238         frame->cb_index[3]    |= get_bits1(gb);
 239         frame->cb_index[4]     = get_bits(gb, 4) << 3;
 240         frame->cb_index[4]    |= get_bits(gb, 3);
 241         frame->cb_index[5]     = get_bits(gb, 7);
 242         frame->cb_index[6]    |= get_bits1(gb);
 243         frame->cb_index[7]     = get_bits(gb, 5) << 3;
 244         frame->cb_index[7]    |= get_bits(gb, 3);
 245         frame->cb_index[8]     = get_bits(gb, 8);
 246         frame->cb_index[9]    |= get_bits1(gb);
 247         frame->cb_index[10]    = get_bits(gb, 4) << 4;
 248         frame->cb_index[10]   |= get_bits(gb, 4);
 249         frame->cb_index[11]    = get_bits(gb, 8);
 250         frame->cb_index[12]   |= get_bits1(gb);
 251         frame->cb_index[13]    = get_bits(gb, 3) << 5;
 252         frame->cb_index[13]   |= get_bits(gb, 5);
 253         frame->cb_index[14]    = get_bits(gb, 8);
 254         frame->gain_index[3]  |= get_bits(gb, 2);
 255         frame->gain_index[4]  |= get_bits1(gb);
 256         frame->gain_index[5]   = get_bits(gb, 3);
 257         frame->gain_index[6]  |= get_bits(gb, 3);
 258         frame->gain_index[7]  |= get_bits(gb, 2);
 259         frame->gain_index[8]   = get_bits(gb, 3);
 260         frame->gain_index[9]  |= get_bits(gb, 4);
 261         frame->gain_index[10] |= get_bits1(gb) << 2;
 262         frame->gain_index[10] |= get_bits(gb, 2);
 263         frame->gain_index[11]  = get_bits(gb, 3);
 264         frame->gain_index[12] |= get_bits(gb, 4);
 265         frame->gain_index[13] |= get_bits(gb, 3);
 266         frame->gain_index[14]  = get_bits(gb, 3);
 267     }
 268
 269     return get_bits1(gb);
 270 }
 271
 272 static void index_conv(int16_t *index)
 273 {
 274     int k;
 275
 276     for (k = 4; k < 6; k++) {
 277         if (index[k] >= 44 && index[k] < 108) {
 278             index[k] += 64;
 279         } else if (index[k] >= 108 && index[k] < 128) {
 280             index[k] += 128;
 281         }
 282     }
 283 }
 284
 285 static void lsf_dequantization(int16_t *lsfdeq, int16_t *index, int16_t lpc_n)
 286 {
 287     int i, j, pos = 0, cb_pos = 0;
 288
 289     for (i = 0; i < LSF_NSPLIT; i++) {
 290         for (j = 0; j < lsf_dim_codebook[i]; j++) {
 291             lsfdeq[pos + j] = lsf_codebook[cb_pos + index[i] * lsf_dim_codebook[i] + j];
 292         }
 293
 294         pos    += lsf_dim_codebook[i];
 295         cb_pos += lsf_size_codebook[i] * lsf_dim_codebook[i];
 296     }
 297
 298     if (lpc_n > 1) {
 299         pos = 0;
 300         cb_pos = 0;
 301         for (i = 0; i < LSF_NSPLIT; i++) {
 302             for (j = 0; j < lsf_dim_codebook[i]; j++) {
 303                 lsfdeq[LPC_FILTERORDER + pos + j] = lsf_codebook[cb_pos +
 304                     index[LSF_NSPLIT + i] * lsf_dim_codebook[i] + j];
 305             }
 306
 307             pos    += lsf_dim_codebook[i];
 308             cb_pos += lsf_size_codebook[i] * lsf_dim_codebook[i];
 309         }
 310     }
 311 }
 312
 313 static void lsf_check_stability(int16_t *lsf, int dim, int nb_vectors)
 314 {
 315     for (int n = 0; n < 2; n++) {
 316         for (int m = 0; m < nb_vectors; m++) {
 317             for (int k = 0; k < dim - 1; k++) {
 318                 int i = m * dim + k;
 319
 320                 if ((lsf[i + 1] - lsf[i]) < 319) {
 321                     if (lsf[i + 1] < lsf[i]) {
 322                         lsf[i + 1] = lsf[i] + 160;
 323                         lsf[i]     = lsf[i + 1] - 160;
 324                     } else {
 325                         lsf[i]     -= 160;
 326                         lsf[i + 1] += 160;
 327                     }
 328                 }
 329
 330                 lsf[i] = av_clip(lsf[i], 82, 25723);
 331             }
 332         }
 333     }
 334 }
 335
 336 static void lsf_interpolate(int16_t *out, const int16_t *in1,
 337                             const int16_t *in2, int16_t coef,
 338                             int size)
 339 {
 340     int invcoef = 16384 - coef, i;
 341
 342     for (i = 0; i < size; i++)
 343         out[i] = (coef * in1[i] + invcoef * in2[i] + 8192) >> 14;
 344 }
 345
 346 static void lsf2lsp(const int16_t *lsf, int16_t *lsp, int order)
 347 {
 348     int16_t diff, freq;
 349     int32_t tmp;
 350     int i, k;
 351
 352     for (i = 0; i < order; i++) {
 353         freq = (lsf[i] * 20861) >> 15;
 354         /* 20861: 1.0/(2.0*PI) in Q17 */
 355         /*
 356            Upper 8 bits give the index k and
 357            Lower 8 bits give the difference, which needs
 358            to be approximated linearly
 359          */
 360         k = FFMIN(freq >> 8, 63);
 361         diff = freq & 0xFF;
 362
 363         /* Calculate linear approximation */
 364         tmp = cos_derivative_tbl[k] * diff;
 365         lsp[i] = cos_tbl[k] + (tmp >> 12);
 366     }
 367 }
 368
 369 static void get_lsp_poly(const int16_t *lsp, int32_t *f)
 370 {
 371     int16_t high, low;
 372     int i, j, k, l;
 373     int32_t tmp;
 374
 375     f[0] = 16777216;
 376     f[1] = lsp[0] * -1024;
 377
 378     for (i = 2, k = 2, l = 2; i <= 5; i++, k += 2) {
 379         f[l] = f[l - 2];
 380
 381         for (j = i; j > 1; j--, l--) {
 382             high = f[l - 1] >> 16;
 383             low = (f[l - 1] - (high * (1 << 16))) >> 1;
 384
 385             tmp = ((high * lsp[k]) * 4) + (((low * lsp[k]) >> 15) * 4);
 386
 387             f[l] += f[l - 2];
 388             f[l] -= (unsigned)tmp;
 389         }
 390
 391         f[l] -= lsp[k] * (1 << 10);
 392         l += i;
 393     }
 394 }
 395
 396 static void lsf2poly(int16_t *a, const int16_t *lsf)
 397 {
 398     int32_t f[2][6];
 399     int16_t lsp[10];
 400     int32_t tmp;
 401     int i;
 402
 403     lsf2lsp(lsf, lsp, LPC_FILTERORDER);
 404
 405     get_lsp_poly(&lsp[0], f[0]);
 406     get_lsp_poly(&lsp[1], f[1]);
 407
 408     for (i = 5; i > 0; i--) {
 409         f[0][i] += (unsigned)f[0][i - 1];
 410         f[1][i] -= (unsigned)f[1][i - 1];
 411     }
 412
 413     a[0] = 4096;
 414     for (i = 5; i > 0; i--) {
 415         tmp = f[0][6 - i] + (unsigned)f[1][6 - i] + 4096;
 416         a[6 - i] = tmp >> 13;
 417
 418         tmp = f[0][6 - i] - (unsigned)f[1][6 - i] + 4096;
 419         a[5 + i] = tmp >> 13;
 420     }
 421 }
 422
 423 static void lsp_interpolate2polydec(int16_t *a, const int16_t *lsf1,
 424                                     const int16_t *lsf2, int coef, int length)
 425 {
 426     int16_t lsftmp[LPC_FILTERORDER];
 427
 428     lsf_interpolate(lsftmp, lsf1, lsf2, coef, length);
 429     lsf2poly(a, lsftmp);
 430 }
 431
 432 static void bw_expand(int16_t *out, const int16_t *in, const int16_t *coef, int length)
 433 {
 434     int i;
 435
 436     out[0] = in[0];
 437     for (i = 1; i < length; i++)
 438         out[i] = (coef[i] * in[i] + 16384) >> 15;
 439 }
 440
 441 static void lsp_interpolate(int16_t *syntdenum, int16_t *weightdenum,
 442                             const int16_t *lsfdeq, int16_t length,
 443                             ILBCContext *s)
 444 {
 445     int16_t lp[LPC_FILTERORDER + 1];
 446     const int16_t *const lsfdeq2 = lsfdeq + length;
 447     int i, pos, lp_length;
 448
 449     lp_length = length + 1;
 450
 451     if (s->mode == 30) {
 452         lsp_interpolate2polydec(lp, (*s).lsfdeqold, lsfdeq, lsf_weight_30ms[0], length);
 453         memcpy(syntdenum, lp, lp_length * 2);
 454         bw_expand(weightdenum, lp, kLpcChirpSyntDenum, lp_length);
 455
 456         pos = lp_length;
 457         for (i = 1; i < 6; i++) {
 458             lsp_interpolate2polydec(lp, lsfdeq, lsfdeq2,
 459                                                  lsf_weight_30ms[i],
 460                                                  length);
 461             memcpy(syntdenum + pos, lp, lp_length * 2);
 462             bw_expand(weightdenum + pos, lp, kLpcChirpSyntDenum, lp_length);
 463             pos += lp_length;
 464         }
 465     } else {
 466         pos = 0;
 467         for (i = 0; i < s->nsub; i++) {
 468             lsp_interpolate2polydec(lp, s->lsfdeqold, lsfdeq,
 469                                     lsf_weight_20ms[i], length);
 470             memcpy(syntdenum + pos, lp, lp_length * 2);
 471             bw_expand(weightdenum + pos, lp, kLpcChirpSyntDenum, lp_length);
 472             pos += lp_length;
 473         }
 474     }
 475
 476     if (s->mode == 30) {
 477         memcpy(s->lsfdeqold, lsfdeq2, length * 2);
 478     } else {
 479         memcpy(s->lsfdeqold, lsfdeq, length * 2);
 480     }
 481 }
 482
 483 static void filter_mafq12(const int16_t *in_ptr, int16_t *out_ptr,
 484                           const int16_t *B, int16_t B_length,
 485                           int16_t length)
 486 {
 487     int o, i, j;
 488
 489     for (i = 0; i < length; i++) {
 490         const int16_t *b_ptr = &B[0];
 491         const int16_t *x_ptr = &in_ptr[i];
 492
 493         o = 0;
 494         for (j = 0; j < B_length; j++)
 495             o += b_ptr[j] * *x_ptr--;
 496
 497         o = av_clip(o, -134217728, 134215679);
 498
 499         out_ptr[i] = ((o + 2048) >> 12);
 500     }
 501 }
 502
 503 static void filter_arfq12(const int16_t *data_in,
 504                           int16_t *data_out,
 505                           const int16_t *coefficients,
 506                           int coefficients_length,
 507                           int data_length)
 508 {
 509     int i, j;
 510
 511     for (i = 0; i < data_length; i++) {
 512         int output = 0, sum = 0;
 513
 514         for (j = coefficients_length - 1; j > 0; j--) {
 515             sum += (unsigned)(coefficients[j] * data_out[i - j]);
 516         }
 517
 518         output = coefficients[0] * data_in[i] - (unsigned)sum;
 519         output = av_clip(output, -134217728, 134215679);
 520
 521         data_out[i] = (output + 2048) >> 12;
 522     }
 523 }
 524
 525 static void state_construct(int16_t ifm, const int16_t *idx,
 526                             const int16_t *synt_denum, int16_t *Out_fix,
 527                            int16_t len)
 528 {
 529     int k;
 530     int16_t maxVal;
 531     int16_t *tmp1, *tmp3;
 532     const int16_t *tmp2;
 533     /* Stack based */
 534     int16_t numerator[1 + LPC_FILTERORDER];
 535     int16_t sampleValVec[2 * STATE_SHORT_LEN_30MS + LPC_FILTERORDER];
 536     int16_t sampleMaVec[2 * STATE_SHORT_LEN_30MS + LPC_FILTERORDER];
 537     int16_t *sampleVal = &sampleValVec[LPC_FILTERORDER];
 538     int16_t *sampleMa = &sampleMaVec[LPC_FILTERORDER];
 539     int16_t *sampleAr = &sampleValVec[LPC_FILTERORDER];
 540
 541     /* initialization of coefficients */
 542
 543     for (k = 0; k < LPC_FILTERORDER + 1; k++) {
 544         numerator[k] = synt_denum[LPC_FILTERORDER - k];
 545     }
 546
 547     /* decoding of the maximum value */
 548
 549     maxVal = frg_quant_mod[ifm];
 550
 551     /* decoding of the sample values */
 552     tmp1 = sampleVal;
 553     tmp2 = &idx[len - 1];
 554
 555     if (ifm < 37) {
 556         for (k = 0; k < len; k++) {
 557             /*the shifting is due to the Q13 in sq4_fixQ13[i], also the adding of 2097152 (= 0.5 << 22)
 558                maxVal is in Q8 and result is in Q(-1) */
 559             (*tmp1) = (int16_t) ((SPL_MUL_16_16(maxVal, ilbc_state[(*tmp2)]) + 2097152) >> 22);
 560             tmp1++;
 561             tmp2--;
 562         }
 563     } else if (ifm < 59) {
 564         for (k = 0; k < len; k++) {
 565             /*the shifting is due to the Q13 in sq4_fixQ13[i], also the adding of 262144 (= 0.5 << 19)
 566                maxVal is in Q5 and result is in Q(-1) */
 567             (*tmp1) = (int16_t) ((SPL_MUL_16_16(maxVal, ilbc_state[(*tmp2)]) + 262144) >> 19);
 568             tmp1++;
 569             tmp2--;
 570         }
 571     } else {
 572         for (k = 0; k < len; k++) {
 573             /*the shifting is due to the Q13 in sq4_fixQ13[i], also the adding of 65536 (= 0.5 << 17)
 574                maxVal is in Q3 and result is in Q(-1) */
 575             (*tmp1) = (int16_t) ((SPL_MUL_16_16(maxVal, ilbc_state[(*tmp2)]) + 65536) >> 17);
 576             tmp1++;
 577             tmp2--;
 578         }
 579     }
 580
 581     /* Set the rest of the data to zero */
 582     memset(&sampleVal[len], 0, len * 2);
 583
 584     /* circular convolution with all-pass filter */
 585
 586     /* Set the state to zero */
 587     memset(sampleValVec, 0, LPC_FILTERORDER * 2);
 588
 589     /* Run MA filter + AR filter */
 590     filter_mafq12(sampleVal, sampleMa, numerator, LPC_FILTERORDER + 1, len + LPC_FILTERORDER);
 591     memset(&sampleMa[len + LPC_FILTERORDER], 0, (len - LPC_FILTERORDER) * 2);
 592     filter_arfq12(sampleMa, sampleAr, synt_denum, LPC_FILTERORDER + 1, 2 * len);
 593
 594     tmp1 = &sampleAr[len - 1];
 595     tmp2 = &sampleAr[2 * len - 1];
 596     tmp3 = Out_fix;
 597     for (k = 0; k < len; k++) {
 598         (*tmp3) = (*tmp1) + (*tmp2);
 599         tmp1--;
 600         tmp2--;
 601         tmp3++;
 602     }
 603 }
 604
 605 static int16_t gain_dequantization(int index, int max_in, int stage)
 606 {
 607     int16_t scale = FFMAX(1638, FFABS(max_in));
 608
 609     return ((scale * ilbc_gain[stage][index]) + 8192) >> 14;
 610 }
 611
 612 static void vector_rmultiplication(int16_t *out, const int16_t *in,
 613                                    const int16_t *win,
 614                                    int length, int shift)
 615 {
 616     for (int i = 0; i < length; i++)
 617         out[i] = (in[i] * win[-i]) >> shift;
 618 }
 619
 620 static void vector_multiplication(int16_t *out, const int16_t *in,
 621                                   const int16_t *win, int length,
 622                                   int shift)
 623 {
 624     for (int i = 0; i < length; i++)
 625         out[i] = (in[i] * win[i]) >> shift;
 626 }
 627
 628 static void add_vector_and_shift(int16_t *out, const int16_t *in1,
 629                                  const int16_t *in2, int length,
 630                                  int shift)
 631 {
 632     for (int i = 0; i < length; i++)
 633         out[i] = (in1[i] + in2[i]) >> shift;
 634 }
 635
 636 static void create_augmented_vector(int index, const int16_t *buffer, int16_t *cbVec)
 637 {
 638     int16_t cbVecTmp[4];
 639     int interpolation_length = FFMIN(4, index);
 640     int16_t ilow = index - interpolation_length;
 641
 642     memcpy(cbVec, buffer - index, index * 2);
 643
 644     vector_multiplication(&cbVec[ilow], buffer - index - interpolation_length, alpha, interpolation_length, 15);
 645     vector_rmultiplication(cbVecTmp, buffer - interpolation_length, &alpha[interpolation_length - 1], interpolation_length, 15);
 646     add_vector_and_shift(&cbVec[ilow], &cbVec[ilow], cbVecTmp, interpolation_length, 0);
 647
 648     memcpy(cbVec + index, buffer - index, FFMIN(SUBL - index, index) * sizeof(*cbVec));
 649 }
 650
 651 static void get_codebook(int16_t * cbvec,   /* (o) Constructed codebook vector */
 652                      int16_t * mem,     /* (i) Codebook buffer */
 653                      int16_t index,     /* (i) Codebook index */
 654                      int16_t lMem,      /* (i) Length of codebook buffer */
 655                      int16_t cbveclen   /* (i) Codebook vector length */
 656 )
 657 {
 658     int16_t k, base_size;
 659     int16_t lag;
 660     /* Stack based */
 661     int16_t tempbuff2[SUBL + 5];
 662
 663     /* Determine size of codebook sections */
 664     base_size = lMem - cbveclen + 1;
 665
 666     if (cbveclen == SUBL) {
 667         base_size += cbveclen / 2;
 668     }
 669
 670     /* No filter -> First codebook section */
 671     if (index < lMem - cbveclen + 1) {
 672         /* first non-interpolated vectors */
 673
 674         k = index + cbveclen;
 675         /* get vector */
 676         memcpy(cbvec, mem + lMem - k, cbveclen * 2);
 677     } else if (index < base_size) {
 678
 679         /* Calculate lag */
 680
 681         k = (int16_t) SPL_MUL_16_16(2, (index - (lMem - cbveclen + 1))) + cbveclen;
 682
 683         lag = k / 2;
 684
 685         create_augmented_vector(lag, mem + lMem, cbvec);
 686     } else {
 687         int16_t memIndTest;
 688
 689         /* first non-interpolated vectors */
 690
 691         if (index - base_size < lMem - cbveclen + 1) {
 692
 693             /* Set up filter memory, stuff zeros outside memory buffer */
 694
 695             memIndTest = lMem - (index - base_size + cbveclen);
 696
 697             memset(mem - CB_HALFFILTERLEN, 0, CB_HALFFILTERLEN * 2);
 698             memset(mem + lMem, 0, CB_HALFFILTERLEN * 2);
 699
 700             /* do filtering to get the codebook vector */
 701
 702             filter_mafq12(&mem[memIndTest + 4], cbvec, kCbFiltersRev, CB_FILTERLEN, cbveclen);
 703         } else {
 704             /* interpolated vectors */
 705             /* Stuff zeros outside memory buffer  */
 706             memIndTest = lMem - cbveclen - CB_FILTERLEN;
 707             memset(mem + lMem, 0, CB_HALFFILTERLEN * 2);
 708
 709             /* do filtering */
 710             filter_mafq12(&mem[memIndTest + 7], tempbuff2, kCbFiltersRev, CB_FILTERLEN, (int16_t) (cbveclen + 5));
 711
 712             /* Calculate lag index */
 713             lag = (cbveclen << 1) - 20 + index - base_size - lMem - 1;
 714
 715             create_augmented_vector(lag, tempbuff2 + SUBL + 5, cbvec);
 716         }
 717     }
 718 }
 719
 720 static void construct_vector (
 721     int16_t *decvector,   /* (o) Decoded vector */
 722     const int16_t *index,       /* (i) Codebook indices */
 723     const int16_t *gain_index,  /* (i) Gain quantization indices */
 724     int16_t *mem,         /* (i) Buffer for codevector construction */
 725     int16_t lMem,         /* (i) Length of buffer */
 726     int16_t veclen)
 727 {
 728     int16_t gain[CB_NSTAGES];
 729     int16_t cbvec0[SUBL];
 730     int16_t cbvec1[SUBL];
 731     int16_t cbvec2[SUBL];
 732     unsigned a32;
 733     int16_t *gainPtr;
 734     int j;
 735
 736     /* gain de-quantization */
 737
 738     gain[0] = gain_dequantization(gain_index[0], 16384, 0);
 739     gain[1] = gain_dequantization(gain_index[1], gain[0], 1);
 740     gain[2] = gain_dequantization(gain_index[2], gain[1], 2);
 741
 742     /* codebook vector construction and construction of total vector */
 743
 744     /* Stack based */
 745     get_codebook(cbvec0, mem, index[0], lMem, veclen);
 746     get_codebook(cbvec1, mem, index[1], lMem, veclen);
 747     get_codebook(cbvec2, mem, index[2], lMem, veclen);
 748
 749     gainPtr = &gain[0];
 750     for (j = 0; j < veclen; j++) {
 751         a32 = SPL_MUL_16_16(*gainPtr++, cbvec0[j]);
 752         a32 += SPL_MUL_16_16(*gainPtr++, cbvec1[j]);
 753         a32 += SPL_MUL_16_16(*gainPtr, cbvec2[j]);
 754         gainPtr -= 2;
 755         decvector[j] = (int)(a32 + 8192) >> 14;
 756     }
 757 }
 758
 759 static void reverse_memcpy(int16_t *dest, const int16_t *source, int length)
 760 {
 761     int16_t* destPtr = dest;
 762     const int16_t *sourcePtr = source;
 763     int j;
 764
 765     for (j = 0; j < length; j++)
 766         *destPtr-- = *sourcePtr++;
 767 }
 768
 769 static void decode_residual(ILBCContext *s,
 770                             ILBCFrame *encbits,
 771                             int16_t *decresidual,
 772                             const int16_t *syntdenum)
 773 {
 774     int16_t meml_gotten, Nfor, Nback, diff, start_pos;
 775     int16_t subcount, subframe;
 776     int16_t *reverseDecresidual = s->enh_buf;        /* Reversed decoded data, used for decoding backwards in time (reuse memory in state) */
 777     int16_t *memVec = s->prevResidual;
 778     int16_t *mem = &memVec[CB_HALFFILTERLEN];   /* Memory for codebook */
 779
 780     diff = STATE_LEN - s->state_short_len;
 781
 782     if (encbits->state_first == 1) {
 783         start_pos = (encbits->start - 1) * SUBL;
 784     } else {
 785         start_pos = (encbits->start - 1) * SUBL + diff;
 786     }
 787
 788     /* decode scalar part of start state */
 789
 790     state_construct(encbits->ifm, encbits->idx, &syntdenum[(encbits->start - 1) * (LPC_FILTERORDER + 1)], &decresidual[start_pos], s->state_short_len);
 791
 792     if (encbits->state_first) { /* put adaptive part in the end */
 793         /* setup memory */
 794         memset(mem, 0, (int16_t) (CB_MEML - s->state_short_len) * 2);
 795         memcpy(mem + CB_MEML - s->state_short_len, decresidual + start_pos, s->state_short_len * 2);
 796
 797         /* construct decoded vector */
 798
 799         construct_vector(&decresidual[start_pos + s->state_short_len], encbits->cb_index, encbits->gain_index, mem + CB_MEML - ST_MEM_L_TBL, ST_MEM_L_TBL, (int16_t) diff);
 800
 801     } else { /* put adaptive part in the beginning */
 802         /* setup memory */
 803         meml_gotten = s->state_short_len;
 804         reverse_memcpy(mem + CB_MEML - 1, decresidual + start_pos, meml_gotten);
 805         memset(mem, 0, (int16_t) (CB_MEML - meml_gotten) * 2);
 806
 807         /* construct decoded vector */
 808         construct_vector(reverseDecresidual, encbits->cb_index, encbits->gain_index, mem + CB_MEML - ST_MEM_L_TBL, ST_MEM_L_TBL, diff);
 809
 810         /* get decoded residual from reversed vector */
 811         reverse_memcpy(&decresidual[start_pos - 1], reverseDecresidual, diff);
 812     }
 813
 814     /* counter for predicted subframes */
 815     subcount = 1;
 816
 817     /* forward prediction of subframes */
 818     Nfor = s->nsub - encbits->start - 1;
 819
 820     if (Nfor > 0) {
 821         /* setup memory */
 822         memset(mem, 0, (CB_MEML - STATE_LEN) * 2);
 823         memcpy(mem + CB_MEML - STATE_LEN, decresidual + (encbits->start - 1) * SUBL, STATE_LEN * 2);
 824
 825         /* loop over subframes to encode */
 826         for (subframe = 0; subframe < Nfor; subframe++) {
 827             /* construct decoded vector */
 828             construct_vector(&decresidual[(encbits->start + 1 + subframe) * SUBL], encbits->cb_index + subcount * CB_NSTAGES, encbits->gain_index + subcount * CB_NSTAGES, mem, MEM_LF_TBL, SUBL);
 829
 830             /* update memory */
 831             memmove(mem, mem + SUBL, (CB_MEML - SUBL) * sizeof(*mem));
 832             memcpy(mem + CB_MEML - SUBL, &decresidual[(encbits->start + 1 + subframe) * SUBL], SUBL * 2);
 833
 834             subcount++;
 835         }
 836
 837     }
 838
 839     /* backward prediction of subframes */
 840     Nback = encbits->start - 1;
 841
 842     if (Nback > 0) {
 843         /* setup memory */
 844         meml_gotten = SUBL * (s->nsub + 1 - encbits->start);
 845         if (meml_gotten > CB_MEML) {
 846             meml_gotten = CB_MEML;
 847         }
 848
 849         reverse_memcpy(mem + CB_MEML - 1, decresidual + (encbits->start - 1) * SUBL, meml_gotten);
 850         memset(mem, 0, (int16_t) (CB_MEML - meml_gotten) * 2);
 851
 852         /* loop over subframes to decode */
 853         for (subframe = 0; subframe < Nback; subframe++) {
 854             /* construct decoded vector */
 855             construct_vector(&reverseDecresidual[subframe * SUBL], encbits->cb_index + subcount * CB_NSTAGES,
 856                         encbits->gain_index + subcount * CB_NSTAGES, mem, MEM_LF_TBL, SUBL);
 857
 858             /* update memory */
 859             memmove(mem, mem + SUBL, (CB_MEML - SUBL) * sizeof(*mem));
 860             memcpy(mem + CB_MEML - SUBL, &reverseDecresidual[subframe * SUBL], SUBL * 2);
 861
 862             subcount++;
 863         }
 864
 865         /* get decoded residual from reversed vector */
 866         reverse_memcpy(decresidual + SUBL * Nback - 1, reverseDecresidual, SUBL * Nback);
 867     }
 868 }
 869
 870 static int16_t max_abs_value_w16(const int16_t* vector, int length)
 871 {
 872     int i = 0, absolute = 0, maximum = 0;
 873
 874     if (vector == NULL || length <= 0) {
 875         return -1;
 876     }
 877
 878     for (i = 0; i < length; i++) {
 879         absolute = FFABS(vector[i]);
 880         if (absolute > maximum)
 881             maximum = absolute;
 882     }
 883
 884     // Guard the case for abs(-32768).
 885     return FFMIN(maximum, INT16_MAX);
 886 }
 887
 888 static int16_t get_size_in_bits(uint32_t n)
 889 {
 890     int16_t bits;
 891
 892     if (0xFFFF0000 & n) {
 893         bits = 16;
 894     } else {
 895         bits = 0;
 896     }
 897
 898     if (0x0000FF00 & (n >> bits)) bits += 8;
 899     if (0x000000F0 & (n >> bits)) bits += 4;
 900     if (0x0000000C & (n >> bits)) bits += 2;
 901     if (0x00000002 & (n >> bits)) bits += 1;
 902     if (0x00000001 & (n >> bits)) bits += 1;
 903
 904     return bits;
 905 }
 906
 907 static int32_t scale_dot_product(const int16_t *v1, const int16_t *v2, int length, int scaling)
 908 {
 909     int64_t sum = 0;
 910
 911     for (int i = 0; i < length; i++)
 912         sum += (v1[i] * v2[i]) >> scaling;
 913
 914     return av_clipl_int32(sum);
 915 }
 916
 917 static void correlation(int32_t *corr, int32_t *ener, const int16_t *buffer,
 918                         int16_t lag, int16_t blen, int16_t srange, int16_t scale)
 919 {
 920     const int16_t *w16ptr = &buffer[blen - srange - lag];
 921
 922     *corr = scale_dot_product(&buffer[blen - srange], w16ptr, srange, scale);
 923     *ener = scale_dot_product(w16ptr, w16ptr, srange, scale);
 924
 925     if (*ener == 0) {
 926         *corr = 0;
 927         *ener = 1;
 928     }
 929 }
 930
 931 #define SPL_SHIFT_W32(x, c) (((c) >= 0) ? ((x) << (c)) : ((x) >> (-(c))))
 932
 933 static int16_t norm_w32(int32_t a)
 934 {
 935     if (a == 0) {
 936         return 0;
 937     } else if (a < 0) {
 938         a = ~a;
 939     }
 940
 941     return ff_clz(a);
 942 }
 943
 944 static int32_t div_w32_w16(int32_t num, int16_t den)
 945 {
 946     if (den != 0)
 947         return num / den;
 948     else
 949         return 0x7FFFFFFF;
 950 }
 951
 952 static void do_plc(int16_t *plc_residual,      /* (o) concealed residual */
 953                    int16_t *plc_lpc,           /* (o) concealed LP parameters */
 954                    int16_t PLI,                /* (i) packet loss indicator
 955                                                       0 - no PL, 1 = PL */
 956                    const int16_t *decresidual, /* (i) decoded residual */
 957                    const int16_t *lpc,         /* (i) decoded LPC (only used for no PL) */
 958                    int16_t inlag,              /* (i) pitch lag */
 959                    ILBCContext *s)             /* (i/o) decoder instance */
 960 {
 961     int16_t i, pick;
 962     int32_t cross, ener, cross_comp, ener_comp = 0;
 963     int32_t measure, max_measure, energy;
 964     int16_t max, cross_square_max, cross_square;
 965     int16_t j, lag, tmp1, tmp2, randlag;
 966     int16_t shift1, shift2, shift3, shift_max;
 967     int16_t scale3;
 968     int16_t corrLen;
 969     int32_t tmpW32, tmp2W32;
 970     int16_t use_gain;
 971     int16_t tot_gain;
 972     int16_t max_perSquare;
 973     int16_t scale1, scale2;
 974     int16_t totscale;
 975     int32_t nom;
 976     int16_t denom;
 977     int16_t pitchfact;
 978     int16_t use_lag;
 979     int ind;
 980     int16_t randvec[BLOCKL_MAX];
 981
 982     /* Packet Loss */
 983     if (PLI == 1) {
 984
 985         s->consPLICount += 1;
 986
 987         /* if previous frame not lost,
 988            determine pitch pred. gain */
 989
 990         if (s->prevPLI != 1) {
 991
 992             /* Maximum 60 samples are correlated, preserve as high accuracy
 993                as possible without getting overflow */
 994             max = max_abs_value_w16(s->prevResidual, s->block_samples);
 995             scale3 = (get_size_in_bits(max) << 1) - 25;
 996             if (scale3 < 0) {
 997                 scale3 = 0;
 998             }
 999
1000             /* Store scale for use when interpolating between the
1001              * concealment and the received packet */
1002             s->prevScale = scale3;
1003
1004             /* Search around the previous lag +/-3 to find the
1005                best pitch period */
1006             lag = inlag - 3;
1007
1008             /* Guard against getting outside the frame */
1009             corrLen = FFMIN(60, s->block_samples - (inlag + 3));
1010
1011             correlation(&cross, &ener, s->prevResidual, lag, s->block_samples, corrLen, scale3);
1012
1013             /* Normalize and store cross^2 and the number of shifts */
1014             shift_max = get_size_in_bits(FFABS(cross)) - 15;
1015             cross_square_max = (int16_t) SPL_MUL_16_16_RSFT(SPL_SHIFT_W32(cross, -shift_max), SPL_SHIFT_W32(cross, -shift_max), 15);
1016
1017             for (j = inlag - 2; j <= inlag + 3; j++) {
1018                 correlation(&cross_comp, &ener_comp, s->prevResidual, j, s->block_samples, corrLen, scale3);
1019
1020                 /* Use the criteria (corr*corr)/energy to compare if
1021                    this lag is better or not. To avoid the division,
1022                    do a cross multiplication */
1023                 shift1 = get_size_in_bits(FFABS(cross_comp)) - 15;
1024                 cross_square = (int16_t) SPL_MUL_16_16_RSFT(SPL_SHIFT_W32(cross_comp, -shift1), SPL_SHIFT_W32(cross_comp, -shift1), 15);
1025
1026                 shift2 = get_size_in_bits(ener) - 15;
1027                 measure = SPL_MUL_16_16(SPL_SHIFT_W32(ener, -shift2), cross_square);
1028
1029                 shift3 = get_size_in_bits(ener_comp) - 15;
1030                 max_measure = SPL_MUL_16_16(SPL_SHIFT_W32(ener_comp, -shift3), cross_square_max);
1031
1032                 /* Calculate shift value, so that the two measures can
1033                    be put in the same Q domain */
1034                 if (((shift_max << 1) + shift3) > ((shift1 << 1) + shift2)) {
1035                     tmp1 = FFMIN(31, (shift_max << 1) + shift3 - (shift1 << 1) - shift2);
1036                     tmp2 = 0;
1037                 } else {
1038                     tmp1 = 0;
1039                     tmp2 = FFMIN(31, (shift1 << 1) + shift2 - (shift_max << 1) - shift3);
1040                 }
1041
1042                 if ((measure >> tmp1) > (max_measure >> tmp2)) {
1043                     /* New lag is better => record lag, measure and domain */
1044                     lag = j;
1045                     cross_square_max = cross_square;
1046                     cross = cross_comp;
1047                     shift_max = shift1;
1048                     ener = ener_comp;
1049                 }
1050             }
1051
1052             /* Calculate the periodicity for the lag with the maximum correlation.
1053
1054                Definition of the periodicity:
1055                abs(corr(vec1, vec2))/(sqrt(energy(vec1))*sqrt(energy(vec2)))
1056
1057                Work in the Square domain to simplify the calculations
1058                max_perSquare is less than 1 (in Q15)
1059              */
1060             tmp2W32 = scale_dot_product(&s->prevResidual[s->block_samples - corrLen], &s->prevResidual[s->block_samples - corrLen], corrLen, scale3);
1061
1062             if ((tmp2W32 > 0) && (ener_comp > 0)) {
1063                 /* norm energies to int16_t, compute the product of the energies and
1064                    use the upper int16_t as the denominator */
1065
1066                 scale1 = norm_w32(tmp2W32) - 16;
1067                 tmp1 = SPL_SHIFT_W32(tmp2W32, scale1);
1068
1069                 scale2 = norm_w32(ener) - 16;
1070                 tmp2 =  SPL_SHIFT_W32(ener, scale2);
1071                 denom = SPL_MUL_16_16_RSFT(tmp1, tmp2, 16);    /* denom in Q(scale1+scale2-16) */
1072
1073                 /* Square the cross correlation and norm it such that max_perSquare
1074                    will be in Q15 after the division */
1075
1076                 totscale = scale1 + scale2 - 1;
1077                 tmp1 = SPL_SHIFT_W32(cross, (totscale >> 1));
1078                 tmp2 = SPL_SHIFT_W32(cross, totscale - (totscale >> 1));
1079
1080                 nom = SPL_MUL_16_16(tmp1, tmp2);
1081                 max_perSquare = div_w32_w16(nom, denom);
1082             } else {
1083                 max_perSquare = 0;
1084             }
1085         } else {
1086             /* previous frame lost, use recorded lag and gain */
1087             lag = s->prevLag;
1088             max_perSquare = s->per_square;
1089         }
1090
1091         /* Attenuate signal and scale down pitch pred gain if
1092            several frames lost consecutively */
1093
1094         use_gain = 32767;       /* 1.0 in Q15 */
1095
1096         if (s->consPLICount * s->block_samples > 320) {
1097             use_gain = 29491;   /* 0.9 in Q15 */
1098         } else if (s->consPLICount * s->block_samples > 640) {
1099             use_gain = 22938;   /* 0.7 in Q15 */
1100         } else if (s->consPLICount * s->block_samples > 960) {
1101             use_gain = 16384;   /* 0.5 in Q15 */
1102         } else if (s->consPLICount * s->block_samples > 1280) {
1103             use_gain = 0;       /* 0.0 in Q15 */
1104         }
1105
1106         /* Compute mixing factor of picth repeatition and noise:
1107            for max_per>0.7 set periodicity to 1.0
1108            0.4<max_per<0.7 set periodicity to (maxper-0.4)/0.7-0.4)
1109            max_per<0.4 set periodicity to 0.0
1110          */
1111
1112         if (max_perSquare > 7868) {     /* periodicity > 0.7  (0.7^4=0.2401 in Q15) */
1113             pitchfact = 32767;
1114         } else if (max_perSquare > 839) {       /* 0.4 < periodicity < 0.7 (0.4^4=0.0256 in Q15) */
1115             /* find best index and interpolate from that */
1116             ind = 5;
1117             while ((max_perSquare < kPlcPerSqr[ind]) && (ind > 0)) {
1118                 ind--;
1119             }
1120             /* pitch fact is approximated by first order */
1121             tmpW32 = kPlcPitchFact[ind] + SPL_MUL_16_16_RSFT(kPlcPfSlope[ind], (max_perSquare - kPlcPerSqr[ind]), 11);
1122
1123             pitchfact = FFMIN(tmpW32, 32767); /* guard against overflow */
1124
1125         } else {                /* periodicity < 0.4 */
1126             pitchfact = 0;
1127         }
1128
1129         /* avoid repetition of same pitch cycle (buzzyness) */
1130         use_lag = lag;
1131         if (lag < 80) {
1132             use_lag = 2 * lag;
1133         }
1134
1135         /* compute concealed residual */
1136         energy = 0;
1137
1138         for (i = 0; i < s->block_samples; i++) {
1139             /* noise component -  52 < randlagFIX < 117 */
1140             s->seed = SPL_MUL_16_16(s->seed, 31821) + 13849;
1141             randlag = 53 + (s->seed & 63);
1142
1143             pick = i - randlag;
1144
1145             if (pick < 0) {
1146                 randvec[i] = s->prevResidual[s->block_samples + pick];
1147             } else {
1148                 randvec[i] = s->prevResidual[pick];
1149             }
1150
1151             /* pitch repeatition component */
1152             pick = i - use_lag;
1153
1154             if (pick < 0) {
1155                 plc_residual[i] = s->prevResidual[s->block_samples + pick];
1156             } else {
1157                 plc_residual[i] = plc_residual[pick];
1158             }
1159
1160             /* Attinuate total gain for each 10 ms */
1161             if (i < 80) {
1162                 tot_gain = use_gain;
1163             } else if (i < 160) {
1164                 tot_gain = SPL_MUL_16_16_RSFT(31130, use_gain, 15);    /* 0.95*use_gain */
1165             } else {
1166                 tot_gain = SPL_MUL_16_16_RSFT(29491, use_gain, 15);    /* 0.9*use_gain */
1167             }
1168
1169             /* mix noise and pitch repeatition */
1170             plc_residual[i] = SPL_MUL_16_16_RSFT(tot_gain, (pitchfact * plc_residual[i] + (32767 - pitchfact) * randvec[i] + 16384) >> 15, 15);
1171
1172             /* Shifting down the result one step extra to ensure that no overflow
1173                will occur */
1174             energy += SPL_MUL_16_16_RSFT(plc_residual[i], plc_residual[i], (s->prevScale + 1));
1175
1176         }
1177
1178         /* less than 30 dB, use only noise */
1179         if (energy < SPL_SHIFT_W32(s->block_samples * 900, -s->prevScale - 1)) {
1180             energy = 0;
1181             for (i = 0; i < s->block_samples; i++) {
1182                 plc_residual[i] = randvec[i];
1183             }
1184         }
1185
1186         /* use the old LPC */
1187         memcpy(plc_lpc, (*s).prev_lpc, (LPC_FILTERORDER + 1) * 2);
1188
1189         /* Update state in case there are multiple frame losses */
1190         s->prevLag = lag;
1191         s->per_square = max_perSquare;
1192     } else { /* no packet loss, copy input */
1193         memcpy(plc_residual, decresidual, s->block_samples * 2);
1194         memcpy(plc_lpc, lpc, (LPC_FILTERORDER + 1) * 2);
1195         s->consPLICount = 0;
1196     }
1197
1198     /* update state */
1199     s->prevPLI = PLI;
1200     memcpy(s->prev_lpc, plc_lpc, (LPC_FILTERORDER + 1) * 2);
1201     memcpy(s->prevResidual, plc_residual, s->block_samples * 2);
1202
1203     return;
1204 }
1205
1206 static int xcorr_coeff(const int16_t *target, const int16_t *regressor,
1207                        int16_t subl, int16_t searchLen,
1208                        int16_t offset, int16_t step)
1209 {
1210     int16_t maxlag;
1211     int16_t pos;
1212     int16_t max;
1213     int16_t cross_corr_scale, energy_scale;
1214     int16_t cross_corr_sg_mod, cross_corr_sg_mod_max;
1215     int32_t cross_corr, energy;
1216     int16_t cross_corr_mod, energy_mod, enery_mod_max;
1217     const int16_t *rp;
1218     const int16_t *rp_beg, *rp_end;
1219     int16_t totscale, totscale_max;
1220     int16_t scalediff;
1221     int32_t new_crit, max_crit;
1222     int shifts;
1223     int k;
1224
1225     /* Initializations, to make sure that the first one is selected */
1226     cross_corr_sg_mod_max = 0;
1227     enery_mod_max = INT16_MAX;
1228     totscale_max = -500;
1229     maxlag = 0;
1230     pos = 0;
1231
1232     /* Find scale value and start position */
1233     if (step == 1) {
1234         max = max_abs_value_w16(regressor, (int16_t) (subl + searchLen - 1));
1235         rp_beg = regressor;
1236         rp_end = &regressor[subl];
1237     } else {                    /* step== -1 */
1238         max = max_abs_value_w16(&regressor[-searchLen], (int16_t) (subl + searchLen - 1));
1239         rp_beg = &regressor[-1];
1240         rp_end = &regressor[subl - 1];
1241     }
1242
1243     /* Introduce a scale factor on the energy in int32_t in
1244        order to make sure that the calculation does not
1245        overflow */
1246
1247     if (max > 5000) {
1248         shifts = 2;
1249     } else {
1250         shifts = 0;
1251     }
1252
1253     /* Calculate the first energy, then do a +/- to get the other energies */
1254     energy = scale_dot_product(regressor, regressor, subl, shifts);
1255
1256     for (k = 0; k < searchLen; k++) {
1257         rp = &regressor[pos];
1258
1259         cross_corr = scale_dot_product(target, rp, subl, shifts);
1260
1261         if ((energy > 0) && (cross_corr > 0)) {
1262             /* Put cross correlation and energy on 16 bit word */
1263             cross_corr_scale = norm_w32(cross_corr) - 16;
1264             cross_corr_mod = (int16_t) SPL_SHIFT_W32(cross_corr, cross_corr_scale);
1265             energy_scale = norm_w32(energy) - 16;
1266             energy_mod = (int16_t) SPL_SHIFT_W32(energy, energy_scale);
1267
1268             /* Square cross correlation and store upper int16_t */
1269             cross_corr_sg_mod = (int16_t) SPL_MUL_16_16_RSFT(cross_corr_mod, cross_corr_mod, 16);
1270
1271             /* Calculate the total number of (dynamic) right shifts that have
1272                been performed on (cross_corr*cross_corr)/energy
1273              */
1274             totscale = energy_scale - (cross_corr_scale * 2);
1275
1276             /* Calculate the shift difference in order to be able to compare the two
1277                (cross_corr*cross_corr)/energy in the same domain
1278              */
1279             scalediff = totscale - totscale_max;
1280             scalediff = FFMIN(scalediff, 31);
1281             scalediff = FFMAX(scalediff, -31);
1282
1283             /* Compute the cross multiplication between the old best criteria
1284                and the new one to be able to compare them without using a
1285                division */
1286
1287             if (scalediff < 0) {
1288                 new_crit = ((int32_t) cross_corr_sg_mod * enery_mod_max) >> (-scalediff);
1289                 max_crit = ((int32_t) cross_corr_sg_mod_max * energy_mod);
1290             } else {
1291                 new_crit = ((int32_t) cross_corr_sg_mod * enery_mod_max);
1292                 max_crit = ((int32_t) cross_corr_sg_mod_max * energy_mod) >> scalediff;
1293             }
1294
1295             /* Store the new lag value if the new criteria is larger
1296                than previous largest criteria */
1297
1298             if (new_crit > max_crit) {
1299                 cross_corr_sg_mod_max = cross_corr_sg_mod;
1300                 enery_mod_max = energy_mod;
1301                 totscale_max = totscale;
1302                 maxlag = k;
1303             }
1304         }
1305         pos += step;
1306
1307         /* Do a +/- to get the next energy */
1308         energy += (unsigned)step * ((*rp_end * *rp_end - *rp_beg * *rp_beg) >> shifts);
1309
1310         rp_beg += step;
1311         rp_end += step;
1312     }
1313
1314     return maxlag + offset;
1315 }
1316
1317 static void hp_output(int16_t *signal, const int16_t *ba, int16_t *y,
1318                       int16_t *x, int16_t len)
1319 {
1320     int32_t tmp;
1321
1322     for (int i = 0; i < len; i++) {
1323         tmp = SPL_MUL_16_16(y[1], ba[3]);     /* (-a[1])*y[i-1] (low part) */
1324         tmp += SPL_MUL_16_16(y[3], ba[4]);    /* (-a[2])*y[i-2] (low part) */
1325         tmp = (tmp >> 15);
1326         tmp += SPL_MUL_16_16(y[0], ba[3]);    /* (-a[1])*y[i-1] (high part) */
1327         tmp += SPL_MUL_16_16(y[2], ba[4]);    /* (-a[2])*y[i-2] (high part) */
1328         tmp = (tmp * 2);
1329
1330         tmp += SPL_MUL_16_16(signal[i], ba[0]);       /* b[0]*x[0] */
1331         tmp += SPL_MUL_16_16(x[0], ba[1]);    /* b[1]*x[i-1] */
1332         tmp += SPL_MUL_16_16(x[1], ba[2]);    /* b[2]*x[i-2] */
1333
1334         /* Update state (input part) */
1335         x[1] = x[0];
1336         x[0] = signal[i];
1337
1338         /* Convert back to Q0 and multiply with 2 */
1339         signal[i] = av_clip_intp2(tmp + 1024, 26) >> 11;
1340
1341         /* Update state (filtered part) */
1342         y[2] = y[0];
1343         y[3] = y[1];
1344
1345         /* upshift tmp by 3 with saturation */
1346         if (tmp > 268435455) {
1347             tmp = INT32_MAX;
1348         } else if (tmp < -268435456) {
1349             tmp = INT32_MIN;
1350         } else {
1351             tmp = tmp * 8;
1352         }
1353
1354         y[0] = tmp >> 16;
1355         y[1] = (tmp - (y[0] * (1 << 16))) >> 1;
1356     }
1357 }
1358
1359 static int ilbc_decode_frame(AVCodecContext *avctx, AVFrame *frame,
1360                              int *got_frame_ptr, AVPacket *avpkt)
1361 {
1362     ILBCContext *s     = avctx->priv_data;
1363     int mode = s->mode, ret;
1364     int16_t *plc_data = &s->plc_residual[LPC_FILTERORDER];
1365
1366     memset(&s->frame, 0, sizeof(ILBCFrame));
1367     ret = unpack_frame(s, avpkt->data, avpkt->size);
1368     if (ret < 0)
1369         return ret;
1370     if (ret)
1371         mode = 0;
1372
1373     frame->nb_samples = s->block_samples;
1374     if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
1375         return ret;
1376
1377     if (s->frame.start < 1 || s->frame.start > 5)
1378         mode = 0;
1379
1380     if (mode) {
1381         index_conv(s->frame.cb_index);
1382
1383         lsf_dequantization(s->lsfdeq, s->frame.lsf, s->lpc_n);
1384         lsf_check_stability(s->lsfdeq, LPC_FILTERORDER, s->lpc_n);
1385         lsp_interpolate(s->syntdenum, s->weightdenum,
1386                         s->lsfdeq, LPC_FILTERORDER, s);
1387         decode_residual(s, &s->frame, s->decresidual, s->syntdenum);
1388
1389         do_plc(s->plc_residual, s->plc_lpc, 0,
1390                                s->decresidual, s->syntdenum + (LPC_FILTERORDER + 1) * (s->nsub - 1),
1391                                s->last_lag, s);
1392
1393         memcpy(s->decresidual, s->plc_residual, s->block_samples * 2);
1394     }
1395
1396     if (s->enhancer) {
1397         /* TODO */
1398     } else {
1399         int16_t lag, i;
1400
1401         /* Find last lag (since the enhancer is not called to give this info) */
1402         if (s->mode == 20) {
1403             lag = xcorr_coeff(&s->decresidual[s->block_samples-60], &s->decresidual[s->block_samples-80],
1404                               60, 80, 20, -1);
1405         } else {
1406             lag = xcorr_coeff(&s->decresidual[s->block_samples-ENH_BLOCKL],
1407                               &s->decresidual[s->block_samples-ENH_BLOCKL-20],
1408                               ENH_BLOCKL, 100, 20, -1);
1409         }
1410
1411         /* Store lag (it is needed if next packet is lost) */
1412         s->last_lag = lag;
1413
1414         /* copy data and run synthesis filter */
1415         memcpy(plc_data, s->decresidual, s->block_samples * 2);
1416
1417         /* Set up the filter state */
1418         memcpy(&plc_data[-LPC_FILTERORDER], s->syntMem, LPC_FILTERORDER * 2);
1419
1420         for (i = 0; i < s->nsub; i++) {
1421             filter_arfq12(plc_data+i*SUBL, plc_data+i*SUBL,
1422                                       s->syntdenum + i*(LPC_FILTERORDER + 1),
1423                                       LPC_FILTERORDER + 1, SUBL);
1424         }
1425
1426         /* Save the filter state */
1427         memcpy(s->syntMem, &plc_data[s->block_samples-LPC_FILTERORDER], LPC_FILTERORDER * 2);
1428     }
1429
1430     memcpy(frame->data[0], plc_data, s->block_samples * 2);
1431
1432     hp_output((int16_t *)frame->data[0], hp_out_coeffs,
1433               s->hpimemy, s->hpimemx, s->block_samples);
1434
1435     memcpy(s->old_syntdenum, s->syntdenum, s->nsub*(LPC_FILTERORDER + 1) * 2);
1436
1437     s->prev_enh_pl = 0;
1438     if (mode == 0)
1439         s->prev_enh_pl = 1;
1440
1441     *got_frame_ptr = 1;
1442
1443     return avpkt->size;
1444 }
1445
1446 static av_cold int ilbc_decode_init(AVCodecContext *avctx)
1447 {
1448     ILBCContext *s  = avctx->priv_data;
1449
1450     if (avctx->block_align == 38)
1451         s->mode = 20;
1452     else if (avctx->block_align == 50)
1453         s->mode = 30;
1454     else if (avctx->bit_rate > 0)
1455         s->mode = avctx->bit_rate <= 14000 ? 30 : 20;
1456     else
1457         return AVERROR_INVALIDDATA;
1458
1459     av_channel_layout_uninit(&avctx->ch_layout);
1460     avctx->ch_layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO;
1461     avctx->sample_rate    = 8000;
1462     avctx->sample_fmt     = AV_SAMPLE_FMT_S16;
1463
1464     if (s->mode == 30) {
1465         s->block_samples = 240;
1466         s->nsub = NSUB_30MS;
1467         s->nasub = NASUB_30MS;
1468         s->lpc_n = LPC_N_30MS;
1469         s->state_short_len = STATE_SHORT_LEN_30MS;
1470     } else {
1471         s->block_samples = 160;
1472         s->nsub = NSUB_20MS;
1473         s->nasub = NASUB_20MS;
1474         s->lpc_n = LPC_N_20MS;
1475         s->state_short_len = STATE_SHORT_LEN_20MS;
1476     }
1477
1478     return 0;
1479 }
1480
1481 const FFCodec ff_ilbc_decoder = {
1482     .p.name         = "ilbc",
1483     CODEC_LONG_NAME("iLBC (Internet Low Bitrate Codec)"),
1484     .p.type         = AVMEDIA_TYPE_AUDIO,
1485     .p.id           = AV_CODEC_ID_ILBC,
1486     .init           = ilbc_decode_init,
1487     FF_CODEC_DECODE_CB(ilbc_decode_frame),
1488     .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_CHANNEL_CONF,
1489     .priv_data_size = sizeof(ILBCContext),
1490 };