opencore/codecs_v2/audio/gsm_amr/amr_nb/common/src/r_fft.cpp

   1 /* ------------------------------------------------------------------
   2  * Copyright (C) 1998-2009 PacketVideo
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
  13  * express or implied.
  14  * See the License for the specific language governing permissions
  15  * and limitations under the License.
  16  * -------------------------------------------------------------------
  17  */
  18 /****************************************************************************************
  19 Portions of this file are derived from the following 3GPP standard:
  20
  21     3GPP TS 26.073
  22     ANSI-C code for the Adaptive Multi-Rate (AMR) speech codec
  23     Available from http://www.3gpp.org
  24
  25 (C) 2004, 3GPP Organizational Partners (ARIB, ATIS, CCSA, ETSI, TTA, TTC)
  26 Permission to distribute, modify and use this file under the standard license
  27 terms listed above has been obtained from the copyright holder.
  28 ****************************************************************************************/
  29 /*
  30
  31  Filename: r_fft.cpp
  32
  33 ------------------------------------------------------------------------------
  34 */
  35
  36 /*----------------------------------------------------------------------------
  37 ; INCLUDES
  38 ----------------------------------------------------------------------------*/
  39 #include "typedef.h"
  40 #include "cnst.h"
  41 #include "oper_32b.h"
  42 #include "vad2.h"
  43 #include "sub.h"
  44 #include "add.h"
  45 #include "shr.h"
  46 #include "shl.h"
  47 #include "l_mult.h"
  48 #include "l_mac.h"
  49 #include "l_msu.h"
  50 #include "round.h"
  51 #include "l_negate.h"
  52 #include "l_deposit_h.h"
  53 #include "l_shr.h"
  54
  55 /*----------------------------------------------------------------------------
  56 ; MACROS
  57 ; Define module specific macros here
  58 ----------------------------------------------------------------------------*/
  59
  60
  61 /*----------------------------------------------------------------------------
  62 ; DEFINES
  63 ; Include all pre-processor statements here. Include conditional
  64 ; compile variables also.
  65 ----------------------------------------------------------------------------*/
  66 #define         SIZE            128
  67 #define         SIZE_BY_TWO     64
  68 #define         NUM_STAGE       6
  69 #define         TRUE            1
  70 #define         FALSE           0
  71
  72 /*----------------------------------------------------------------------------
  73 ; LOCAL FUNCTION DEFINITIONS
  74 ; Function Prototype declaration
  75 ----------------------------------------------------------------------------*/
  76
  77 /*----------------------------------------------------------------------------
  78 ; LOCAL STORE/BUFFER/POINTER DEFINITIONS
  79 ; Variable declaration - defined here and used outside this module
  80 ----------------------------------------------------------------------------*/
  81
  82 const Word16 phs_tbl[] =
  83 {
  84
  85     32767,       0,  32729,  -1608,  32610,  -3212,  32413,  -4808,
  86     32138,   -6393,  31786,  -7962,  31357,  -9512,  30853, -11039,
  87     30274,  -12540,  29622, -14010,  28899, -15447,  28106, -16846,
  88     27246,  -18205,  26320, -19520,  25330, -20788,  24279, -22006,
  89     23170,  -23170,  22006, -24279,  20788, -25330,  19520, -26320,
  90     18205,  -27246,  16846, -28106,  15447, -28899,  14010, -29622,
  91     12540,  -30274,  11039, -30853,   9512, -31357,   7962, -31786,
  92     6393,  -32138,   4808, -32413,   3212, -32610,   1608, -32729,
  93     0,  -32768,  -1608, -32729,  -3212, -32610,  -4808, -32413,
  94     -6393, -32138,  -7962, -31786,  -9512, -31357, -11039, -30853,
  95     -12540, -30274, -14010, -29622, -15447, -28899, -16846, -28106,
  96     -18205, -27246, -19520, -26320, -20788, -25330, -22006, -24279,
  97     -23170, -23170, -24279, -22006, -25330, -20788, -26320, -19520,
  98     -27246, -18205, -28106, -16846, -28899, -15447, -29622, -14010,
  99     -30274, -12540, -30853, -11039, -31357,  -9512, -31786,  -7962,
 100     -32138,  -6393, -32413,  -4808, -32610,  -3212, -32729,  -1608
 101
 102 };
 103
 104 const Word16 ii_table[] =
 105     {SIZE / 2, SIZE / 4, SIZE / 8, SIZE / 16, SIZE / 32, SIZE / 64};
 106
 107 /*
 108 ------------------------------------------------------------------------------
 109  FUNCTION NAME: c_fft
 110 ------------------------------------------------------------------------------
 111  INPUT AND OUTPUT DEFINITIONS
 112
 113  Inputs:
 114     farray_ptr = pointer to complex array that the FFT operates on of type
 115                  Word16.
 116     pOverflow = pointer to overflow (Flag)
 117
 118  Outputs:
 119     pOverflow = 1 if the math functions called by cor_h_x2 result in overflow
 120     else zero.
 121
 122  Returns:
 123     None
 124
 125  Global Variables Used:
 126     None
 127
 128  Local Variables Needed:
 129     None
 130
 131 ------------------------------------------------------------------------------
 132  FUNCTION DESCRIPTION
 133
 134  This is an implementation of decimation-in-time FFT algorithm for
 135  real sequences.  The techniques used here can be found in several
 136  books, e.g., i) Proakis and Manolakis, "Digital Signal Processing",
 137  2nd Edition, Chapter 9, and ii) W.H. Press et. al., "Numerical
 138  Recipes in C", 2nd Ediiton, Chapter 12.
 139
 140  Input -  There is one input to this function:
 141
 142    1) An integer pointer to the input data array
 143
 144  Output - There is no return value.
 145    The input data are replaced with transformed data.  If the
 146    input is a real time domain sequence, it is replaced with
 147    the complex FFT for positive frequencies.  The FFT value
 148    for DC and the foldover frequency are combined to form the
 149    first complex number in the array.  The remaining complex
 150    numbers correspond to increasing frequencies.  If the input
 151    is a complex frequency domain sequence arranged as above,
 152    it is replaced with the corresponding time domain sequence.
 153
 154  Notes:
 155
 156    1) This function is designed to be a part of a VAD
 157       algorithm that requires 128-point FFT of real
 158       sequences.  This is achieved here through a 64-point
 159       complex FFT.  Consequently, the FFT size information is
 160       not transmitted explicitly.  However, some flexibility
 161       is provided in the function to change the size of the
 162       FFT by specifying the size information through "define"
 163       statements.
 164
 165    2) The values of the complex sinusoids used in the FFT
 166       algorithm are stored in a ROM table.
 167
 168    3) In the c_fft function, the FFT values are divided by
 169       2 after each stage of computation thus dividing the
 170       final FFT values by 64.  This is somewhat different
 171           from the usual definition of FFT where the factor 1/N,
 172           i.e., 1/64, used for the IFFT and not the FFT.  No factor
 173           is used in the r_fft function.
 174
 175 ------------------------------------------------------------------------------
 176  REQUIREMENTS
 177
 178  None
 179
 180 ------------------------------------------------------------------------------
 181  REFERENCES
 182
 183  r_fft.c, UMTS GSM AMR speech codec, R99 - Version 3.2.0, March 2, 2001
 184
 185 ------------------------------------------------------------------------------
 186  PSEUDO-CODE
 187
 188 The original etsi reference code uses a global flag Overflow. However, in the
 189 actual implementation a pointer to a the overflow flag is passed in.
 190
 191 void c_fft(Word16 * farray_ptr)
 192 {
 193     Word16 i, j, k, ii, jj, kk, ji, kj, ii2;
 194     Word32 ftmp, ftmp_real, ftmp_imag;
 195     Word16 tmp, tmp1, tmp2;
 196
 197     // Rearrange the input array in bit reversed order
 198     for (i = 0, j = 0; i < SIZE - 2; i = i + 2)
 199     {
 200         if (sub(j, i) > 0)
 201         {
 202             ftmp = *(farray_ptr + i);
 203             *(farray_ptr + i) = *(farray_ptr + j);
 204             *(farray_ptr + j) = ftmp;
 205
 206             ftmp = *(farray_ptr + i + 1);
 207             *(farray_ptr + i + 1) = *(farray_ptr + j + 1);
 208             *(farray_ptr + j + 1) = ftmp;
 209         }
 210
 211         k = SIZE_BY_TWO;
 212         while (sub(j, k) >= 0)
 213         {
 214             j = sub(j, k);
 215             k = shr(k, 1);
 216         }
 217         j = add(j, k);
 218     }
 219
 220     // The FFT part
 221     for (i = 0; i < NUM_STAGE; i++)
 222     {               // i is stage counter
 223         jj = shl(2, i);     // FFT size
 224         kk = shl(jj, 1);    // 2 * FFT size
 225         ii = ii_table[i];   // 2 * number of FFT's
 226         ii2 = shl(ii, 1);
 227         ji = 0;         // ji is phase table index
 228
 229         for (j = 0; j < jj; j = j + 2)
 230         {                   // j is sample counter
 231
 232             for (k = j; k < SIZE; k = k + kk)
 233             {               // k is butterfly top
 234                 kj = add(k, jj);    // kj is butterfly bottom
 235
 236                 // Butterfly computations
 237                 ftmp_real = L_mult(*(farray_ptr + kj), phs_tbl[ji]);
 238                 ftmp_real = L_msu(ftmp_real, *(farray_ptr + kj + 1), phs_tbl[ji + 1]);
 239
 240                 ftmp_imag = L_mult(*(farray_ptr + kj + 1), phs_tbl[ji]);
 241                 ftmp_imag = L_mac(ftmp_imag, *(farray_ptr + kj), phs_tbl[ji + 1]);
 242
 243                 tmp1 = pv_round(ftmp_real);
 244                 tmp2 = pv_round(ftmp_imag);
 245
 246                 tmp = sub(*(farray_ptr + k), tmp1);
 247                 *(farray_ptr + kj) = shr(tmp, 1);
 248
 249                 tmp = sub(*(farray_ptr + k + 1), tmp2);
 250                 *(farray_ptr + kj + 1) = shr(tmp, 1);
 251
 252                 tmp = add(*(farray_ptr + k), tmp1);
 253                 *(farray_ptr + k) = shr(tmp, 1);
 254
 255                 tmp = add(*(farray_ptr + k + 1), tmp2);
 256                 *(farray_ptr + k + 1) = shr(tmp, 1);
 257             }
 258
 259             ji =  add(ji, ii2);
 260         }
 261     }
 262 }                               // end of c_fft ()
 263
 264
 265 ------------------------------------------------------------------------------
 266  CAUTION [optional]
 267  [State any special notes, constraints or cautions for users of this function]
 268
 269 ------------------------------------------------------------------------------
 270 */
 271
 272 /* FFT function for complex sequences */
 273 /*
 274  * The decimation-in-time complex FFT is implemented below.
 275  * The input complex numbers are presented as real part followed by
 276  * imaginary part for each sample.  The counters are therefore
 277  * incremented by two to access the complex valued samples.
 278  */
 279
 280 void c_fft(Word16 * farray_ptr, Flag *pOverflow)
 281 {
 282
 283     Word16 i;
 284     Word16 j;
 285     Word16 k;
 286     Word16 ii;
 287     Word16 jj;
 288     Word16 kk;
 289     Word16 ji;
 290     Word16 kj;
 291     Word16 ii2;
 292     Word32 ftmp;
 293     Word32 ftmp_real;
 294     Word32 ftmp_imag;
 295     Word16 tmp;
 296     Word16 tmp1;
 297     Word16 tmp2;
 298
 299     /* Rearrange the input array in bit reversed order */
 300     for (i = 0, j = 0; i < SIZE - 2; i = i + 2)
 301     {
 302         if (j > i)
 303         {
 304             ftmp = *(farray_ptr + i);
 305             *(farray_ptr + i) = *(farray_ptr + j);
 306             *(farray_ptr + j) = (Word16)ftmp;
 307
 308             ftmp = *(farray_ptr + i + 1);
 309             *(farray_ptr + i + 1) = *(farray_ptr + j + 1);
 310             *(farray_ptr + j + 1) = (Word16)ftmp;
 311         }
 312
 313         k = SIZE_BY_TWO;
 314         while (j >= k)
 315         {
 316             j = sub(j, k, pOverflow);
 317             k = shr(k, 1, pOverflow);
 318         }
 319         j = add(j, k, pOverflow);
 320     }
 321
 322     /* The FFT part */
 323     for (i = 0; i < NUM_STAGE; i++)
 324     {               /* i is stage counter */
 325         jj = shl(2, i, pOverflow);     /* FFT size */
 326         kk = shl(jj, 1, pOverflow);    /* 2 * FFT size */
 327         ii = ii_table[i];   /* 2 * number of FFT's */
 328         ii2 = shl(ii, 1, pOverflow);
 329         ji = 0;         /* ji is phase table index */
 330
 331         for (j = 0; j < jj; j = j + 2)
 332         {                   /* j is sample counter */
 333
 334             for (k = j; k < SIZE; k = k + kk)
 335             {               /* k is butterfly top */
 336                 kj = add(k, jj, pOverflow);    /* kj is butterfly bottom */
 337
 338                 /* Butterfly computations */
 339                 ftmp_real = L_mult(*(farray_ptr + kj), phs_tbl[ji], pOverflow);
 340                 ftmp_real = L_msu(ftmp_real, *(farray_ptr + kj + 1),
 341                                   phs_tbl[ji + 1], pOverflow);
 342
 343                 ftmp_imag = L_mult(*(farray_ptr + kj + 1),
 344                                    phs_tbl[ji], pOverflow);
 345                 ftmp_imag = L_mac(ftmp_imag, *(farray_ptr + kj),
 346                                   phs_tbl[ji + 1], pOverflow);
 347
 348                 tmp1 = pv_round(ftmp_real, pOverflow);
 349                 tmp2 = pv_round(ftmp_imag, pOverflow);
 350
 351                 tmp = sub(*(farray_ptr + k), tmp1, pOverflow);
 352                 *(farray_ptr + kj) = shr(tmp, 1, pOverflow);
 353
 354                 tmp = sub(*(farray_ptr + k + 1), tmp2, pOverflow);
 355                 *(farray_ptr + kj + 1) = shr(tmp, 1, pOverflow);
 356
 357                 tmp = add(*(farray_ptr + k), tmp1, pOverflow);
 358                 *(farray_ptr + k) = shr(tmp, 1, pOverflow);
 359
 360                 tmp = add(*(farray_ptr + k + 1), tmp2, pOverflow);
 361                 *(farray_ptr + k + 1) = shr(tmp, 1, pOverflow);
 362             }
 363
 364             ji =  add(ji, ii2, pOverflow);
 365         }
 366     }
 367 }                               /* end of c_fft () */
 368
 369
 370 /*
 371 ------------------------------------------------------------------------------
 372  FUNCTION NAME: r_fft
 373 ------------------------------------------------------------------------------
 374  INPUT AND OUTPUT DEFINITIONS
 375
 376  Inputs:
 377     farray_ptr = pointer to complex array that the FFT operates on of type
 378                  Word16.
 379     pOverflow = pointer to overflow (Flag)
 380
 381  Outputs:
 382     pOverflow = 1 if the math functions called by cor_h_x2 result in overflow
 383     else zero.
 384
 385  Returns:
 386     None
 387
 388  Global Variables Used:
 389     None
 390
 391  Local Variables Needed:
 392     None
 393
 394 ------------------------------------------------------------------------------
 395  FUNCTION DESCRIPTION
 396
 397  This is an implementation of decimation-in-time FFT algorithm for
 398  real sequences.  The techniques used here can be found in several
 399  books, e.g., i) Proakis and Manolakis, "Digital Signal Processing",
 400  2nd Edition, Chapter 9, and ii) W.H. Press et. al., "Numerical
 401  Recipes in C", 2nd Ediiton, Chapter 12.
 402
 403  Input -  There is one input to this function:
 404
 405    1) An integer pointer to the input data array
 406
 407  Output - There is no return value.
 408    The input data are replaced with transformed data.  If the
 409    input is a real time domain sequence, it is replaced with
 410    the complex FFT for positive frequencies.  The FFT value
 411    for DC and the foldover frequency are combined to form the
 412    first complex number in the array.  The remaining complex
 413    numbers correspond to increasing frequencies.  If the input
 414    is a complex frequency domain sequence arranged as above,
 415    it is replaced with the corresponding time domain sequence.
 416
 417  Notes:
 418
 419    1) This function is designed to be a part of a VAD
 420       algorithm that requires 128-point FFT of real
 421       sequences.  This is achieved here through a 64-point
 422       complex FFT.  Consequently, the FFT size information is
 423       not transmitted explicitly.  However, some flexibility
 424       is provided in the function to change the size of the
 425       FFT by specifying the size information through "define"
 426       statements.
 427
 428    2) The values of the complex sinusoids used in the FFT
 429       algorithm are stored in a ROM table.
 430
 431    3) In the c_fft function, the FFT values are divided by
 432       2 after each stage of computation thus dividing the
 433       final FFT values by 64.  This is somewhat different
 434           from the usual definition of FFT where the factor 1/N,
 435           i.e., 1/64, used for the IFFT and not the FFT.  No factor
 436           is used in the r_fft function.
 437
 438 ------------------------------------------------------------------------------
 439  REQUIREMENTS
 440
 441  None
 442
 443 ------------------------------------------------------------------------------
 444  REFERENCES
 445
 446  r_fft.c, UMTS GSM AMR speech codec, R99 - Version 3.2.0, March 2, 2001
 447
 448 ------------------------------------------------------------------------------
 449  PSEUDO-CODE
 450
 451 The original etsi reference code uses a global flag Overflow. However, in the
 452 actual implementation a pointer to a the overflow flag is passed in.
 453
 454 void r_fft(Word16 * farray_ptr)
 455 {
 456
 457     Word16 ftmp1_real, ftmp1_imag, ftmp2_real, ftmp2_imag;
 458     Word32 Lftmp1_real, Lftmp1_imag;
 459     Word16 i, j;
 460     Word32 Ltmp1;
 461
 462     // Perform the complex FFT
 463     c_fft(farray_ptr);
 464
 465     // First, handle the DC and foldover frequencies
 466     ftmp1_real = *farray_ptr;
 467     ftmp2_real = *(farray_ptr + 1);
 468     *farray_ptr = add(ftmp1_real, ftmp2_real);
 469     *(farray_ptr + 1) = sub(ftmp1_real, ftmp2_real);
 470
 471     // Now, handle the remaining positive frequencies
 472     for (i = 2, j = SIZE - i; i <= SIZE_BY_TWO; i = i + 2, j = SIZE - i)
 473     {
 474         ftmp1_real = add(*(farray_ptr + i), *(farray_ptr + j));
 475         ftmp1_imag = sub(*(farray_ptr + i + 1), *(farray_ptr + j + 1));
 476         ftmp2_real = add(*(farray_ptr + i + 1), *(farray_ptr + j + 1));
 477         ftmp2_imag = sub(*(farray_ptr + j), *(farray_ptr + i));
 478
 479         Lftmp1_real = L_deposit_h(ftmp1_real);
 480         Lftmp1_imag = L_deposit_h(ftmp1_imag);
 481
 482         Ltmp1 = L_mac(Lftmp1_real, ftmp2_real, phs_tbl[i]);
 483         Ltmp1 = L_msu(Ltmp1, ftmp2_imag, phs_tbl[i + 1]);
 484         *(farray_ptr + i) = pv_round(L_shr(Ltmp1, 1));
 485
 486         Ltmp1 = L_mac(Lftmp1_imag, ftmp2_imag, phs_tbl[i]);
 487         Ltmp1 = L_mac(Ltmp1, ftmp2_real, phs_tbl[i + 1]);
 488         *(farray_ptr + i + 1) = pv_round(L_shr(Ltmp1, 1));
 489
 490         Ltmp1 = L_mac(Lftmp1_real, ftmp2_real, phs_tbl[j]);
 491         Ltmp1 = L_mac(Ltmp1, ftmp2_imag, phs_tbl[j + 1]);
 492         *(farray_ptr + j) = pv_round(L_shr(Ltmp1, 1));
 493
 494         Ltmp1 = L_negate(Lftmp1_imag);
 495         Ltmp1 = L_msu(Ltmp1, ftmp2_imag, phs_tbl[j]);
 496         Ltmp1 = L_mac(Ltmp1, ftmp2_real, phs_tbl[j + 1]);
 497         *(farray_ptr + j + 1) = pv_round(L_shr(Ltmp1, 1));
 498
 499     }
 500 }                               // end r_fft ()
 501
 502 ------------------------------------------------------------------------------
 503  CAUTION [optional]
 504  [State any special notes, constraints or cautions for users of this function]
 505
 506 ------------------------------------------------------------------------------
 507 */
 508
 509 /* FFT function for complex sequences */
 510 /*
 511  * The decimation-in-time complex FFT is implemented below.
 512  * The input complex numbers are presented as real part followed by
 513  * imaginary part for each sample.  The counters are therefore
 514  * incremented by two to access the complex valued samples.
 515  */
 516 void r_fft(Word16 * farray_ptr, Flag *pOverflow)
 517 {
 518
 519     Word16 ftmp1_real;
 520     Word16 ftmp1_imag;
 521     Word16 ftmp2_real;
 522     Word16 ftmp2_imag;
 523     Word32 Lftmp1_real;
 524     Word32 Lftmp1_imag;
 525     Word16 i;
 526     Word16 j;
 527     Word32 Ltmp1;
 528
 529     /* Perform the complex FFT */
 530     c_fft(farray_ptr, pOverflow);
 531
 532     /* First, handle the DC and foldover frequencies */
 533     ftmp1_real = *farray_ptr;
 534     ftmp2_real = *(farray_ptr + 1);
 535     *farray_ptr = add(ftmp1_real, ftmp2_real, pOverflow);
 536     *(farray_ptr + 1) = sub(ftmp1_real, ftmp2_real, pOverflow);
 537
 538     /* Now, handle the remaining positive frequencies */
 539     for (i = 2, j = SIZE - i; i <= SIZE_BY_TWO; i = i + 2, j = SIZE - i)
 540     {
 541         ftmp1_real = add(*(farray_ptr + i), *(farray_ptr + j), pOverflow);
 542         ftmp1_imag = sub(*(farray_ptr + i + 1),
 543                          *(farray_ptr + j + 1), pOverflow);
 544         ftmp2_real = add(*(farray_ptr + i + 1),
 545                          *(farray_ptr + j + 1), pOverflow);
 546         ftmp2_imag = sub(*(farray_ptr + j),
 547                          *(farray_ptr + i), pOverflow);
 548
 549         Lftmp1_real = L_deposit_h(ftmp1_real);
 550         Lftmp1_imag = L_deposit_h(ftmp1_imag);
 551
 552         Ltmp1 = L_mac(Lftmp1_real, ftmp2_real, phs_tbl[i], pOverflow);
 553         Ltmp1 = L_msu(Ltmp1, ftmp2_imag, phs_tbl[i + 1], pOverflow);
 554         *(farray_ptr + i) = pv_round(L_shr(Ltmp1, 1, pOverflow), pOverflow);
 555
 556         Ltmp1 = L_mac(Lftmp1_imag, ftmp2_imag, phs_tbl[i], pOverflow);
 557         Ltmp1 = L_mac(Ltmp1, ftmp2_real, phs_tbl[i + 1], pOverflow);
 558         *(farray_ptr + i + 1) = pv_round(L_shr(Ltmp1, 1, pOverflow), pOverflow);
 559
 560         Ltmp1 = L_mac(Lftmp1_real, ftmp2_real, phs_tbl[j], pOverflow);
 561         Ltmp1 = L_mac(Ltmp1, ftmp2_imag, phs_tbl[j + 1], pOverflow);
 562         *(farray_ptr + j) = pv_round(L_shr(Ltmp1, 1, pOverflow), pOverflow);
 563
 564         Ltmp1 = L_negate(Lftmp1_imag);
 565         Ltmp1 = L_msu(Ltmp1, ftmp2_imag, phs_tbl[j], pOverflow);
 566         Ltmp1 = L_mac(Ltmp1, ftmp2_real, phs_tbl[j + 1], pOverflow);
 567         *(farray_ptr + j + 1) = pv_round(L_shr(Ltmp1, 1, pOverflow), pOverflow);
 568
 569     }
 570 }                               /* end r_fft () */