src/third_party/webrtc/modules/audio_coding/codecs/opus/opus_interface.c

   1 /*
   2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include "webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h"
  12
  13 #include <stdlib.h>
  14 #include <string.h>
  15
  16 #include "opus.h"
  17
  18 #include "webrtc/common_audio/signal_processing/resample_by_2_internal.h"
  19 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
  20
  21 enum {
  22   /* Maximum supported frame size in WebRTC is 60 ms. */
  23   kWebRtcOpusMaxEncodeFrameSizeMs = 60,
  24
  25   /* The format allows up to 120 ms frames. Since we don't control the other
  26    * side, we must allow for packets of that size. NetEq is currently limited
  27    * to 60 ms on the receive side. */
  28   kWebRtcOpusMaxDecodeFrameSizeMs = 120,
  29
  30   /* Maximum sample count per channel is 48 kHz * maximum frame size in
  31    * milliseconds. */
  32   kWebRtcOpusMaxFrameSizePerChannel = 48 * kWebRtcOpusMaxDecodeFrameSizeMs,
  33
  34   /* Maximum sample count per frame is 48 kHz * maximum frame size in
  35    * milliseconds * maximum number of channels. */
  36   kWebRtcOpusMaxFrameSize = kWebRtcOpusMaxFrameSizePerChannel * 2,
  37
  38   /* Maximum sample count per channel for output resampled to 32 kHz,
  39    * 32 kHz * maximum frame size in milliseconds. */
  40   kWebRtcOpusMaxFrameSizePerChannel32kHz = 32 * kWebRtcOpusMaxDecodeFrameSizeMs,
  41
  42   /* Number of samples in resampler state. */
  43   kWebRtcOpusStateSize = 7,
  44
  45   /* Default frame size, 20 ms @ 48 kHz, in samples (for one channel). */
  46   kWebRtcOpusDefaultFrameSize = 960,
  47 };
  48
  49 struct WebRtcOpusEncInst {
  50   OpusEncoder* encoder;
  51 };
  52
  53 int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst, int32_t channels) {
  54   OpusEncInst* state;
  55   if (inst != NULL) {
  56     state = (OpusEncInst*) calloc(1, sizeof(OpusEncInst));
  57     if (state) {
  58       int error;
  59       /* Default to VoIP application for mono, and AUDIO for stereo. */
  60       int application = (channels == 1) ? OPUS_APPLICATION_VOIP :
  61           OPUS_APPLICATION_AUDIO;
  62
  63       state->encoder = opus_encoder_create(48000, channels, application,
  64                                            &error);
  65       if (error == OPUS_OK && state->encoder != NULL) {
  66         *inst = state;
  67         return 0;
  68       }
  69       free(state);
  70     }
  71   }
  72   return -1;
  73 }
  74
  75 int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst) {
  76   if (inst) {
  77     opus_encoder_destroy(inst->encoder);
  78     free(inst);
  79     return 0;
  80   } else {
  81     return -1;
  82   }
  83 }
  84
  85 int16_t WebRtcOpus_Encode(OpusEncInst* inst, int16_t* audio_in, int16_t samples,
  86                           int16_t length_encoded_buffer, uint8_t* encoded) {
  87   opus_int16* audio = (opus_int16*) audio_in;
  88   unsigned char* coded = encoded;
  89   int res;
  90
  91   if (samples > 48 * kWebRtcOpusMaxEncodeFrameSizeMs) {
  92     return -1;
  93   }
  94
  95   res = opus_encode(inst->encoder, audio, samples, coded,
  96                     length_encoded_buffer);
  97
  98   if (res > 0) {
  99     return res;
 100   }
 101   return -1;
 102 }
 103
 104 int16_t WebRtcOpus_SetBitRate(OpusEncInst* inst, int32_t rate) {
 105   if (inst) {
 106   return opus_encoder_ctl(inst->encoder, OPUS_SET_BITRATE(rate));
 107   } else {
 108     return -1;
 109   }
 110 }
 111
 112 struct WebRtcOpusDecInst {
 113   int16_t state_48_32_left[8];
 114   int16_t state_48_32_right[8];
 115   OpusDecoder* decoder_left;
 116   OpusDecoder* decoder_right;
 117   int prev_decoded_samples;
 118   int channels;
 119 };
 120
 121 int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst, int channels) {
 122   int error_l;
 123   int error_r;
 124   OpusDecInst* state;
 125
 126   if (inst != NULL) {
 127     /* Create Opus decoder state. */
 128     state = (OpusDecInst*) calloc(1, sizeof(OpusDecInst));
 129     if (state == NULL) {
 130       return -1;
 131     }
 132
 133     /* Create new memory for left and right channel, always at 48000 Hz. */
 134     state->decoder_left = opus_decoder_create(48000, channels, &error_l);
 135     state->decoder_right = opus_decoder_create(48000, channels, &error_r);
 136     if (error_l == OPUS_OK && error_r == OPUS_OK && state->decoder_left != NULL
 137         && state->decoder_right != NULL) {
 138       /* Creation of memory all ok. */
 139       state->channels = channels;
 140       state->prev_decoded_samples = kWebRtcOpusDefaultFrameSize;
 141       *inst = state;
 142       return 0;
 143     }
 144
 145     /* If memory allocation was unsuccessful, free the entire state. */
 146     if (state->decoder_left) {
 147       opus_decoder_destroy(state->decoder_left);
 148     }
 149     if (state->decoder_right) {
 150       opus_decoder_destroy(state->decoder_right);
 151     }
 152     free(state);
 153   }
 154   return -1;
 155 }
 156
 157 int16_t WebRtcOpus_DecoderFree(OpusDecInst* inst) {
 158   if (inst) {
 159     opus_decoder_destroy(inst->decoder_left);
 160     opus_decoder_destroy(inst->decoder_right);
 161     free(inst);
 162     return 0;
 163   } else {
 164     return -1;
 165   }
 166 }
 167
 168 int WebRtcOpus_DecoderChannels(OpusDecInst* inst) {
 169   return inst->channels;
 170 }
 171
 172 int16_t WebRtcOpus_DecoderInitNew(OpusDecInst* inst) {
 173   int error = opus_decoder_ctl(inst->decoder_left, OPUS_RESET_STATE);
 174   if (error == OPUS_OK) {
 175     memset(inst->state_48_32_left, 0, sizeof(inst->state_48_32_left));
 176     memset(inst->state_48_32_right, 0, sizeof(inst->state_48_32_right));
 177     return 0;
 178   }
 179   return -1;
 180 }
 181
 182 int16_t WebRtcOpus_DecoderInit(OpusDecInst* inst) {
 183   int error = opus_decoder_ctl(inst->decoder_left, OPUS_RESET_STATE);
 184   if (error == OPUS_OK) {
 185     memset(inst->state_48_32_left, 0, sizeof(inst->state_48_32_left));
 186     return 0;
 187   }
 188   return -1;
 189 }
 190
 191 int16_t WebRtcOpus_DecoderInitSlave(OpusDecInst* inst) {
 192   int error = opus_decoder_ctl(inst->decoder_right, OPUS_RESET_STATE);
 193   if (error == OPUS_OK) {
 194     memset(inst->state_48_32_right, 0, sizeof(inst->state_48_32_right));
 195     return 0;
 196   }
 197   return -1;
 198 }
 199
 200 /* |frame_size| is set to maximum Opus frame size in the normal case, and
 201  * is set to the number of samples needed for PLC in case of losses.
 202  * It is up to the caller to make sure the value is correct. */
 203 static int DecodeNative(OpusDecoder* inst, const int16_t* encoded,
 204                         int16_t encoded_bytes, int frame_size,
 205                         int16_t* decoded, int16_t* audio_type) {
 206   unsigned char* coded = (unsigned char*) encoded;
 207   opus_int16* audio = (opus_int16*) decoded;
 208
 209   int res = opus_decode(inst, coded, encoded_bytes, audio, frame_size, 0);
 210
 211   /* TODO(tlegrand): set to DTX for zero-length packets? */
 212   *audio_type = 0;
 213
 214   if (res > 0) {
 215     return res;
 216   }
 217   return -1;
 218 }
 219
 220 /* Resample from 48 to 32 kHz. Length of state is assumed to be
 221  * kWebRtcOpusStateSize (7).
 222  */
 223 static int WebRtcOpus_Resample48to32(const int16_t* samples_in, int length,
 224                                      int16_t* state, int16_t* samples_out) {
 225   int i;
 226   int blocks;
 227   int16_t output_samples;
 228   int32_t buffer32[kWebRtcOpusMaxFrameSizePerChannel + kWebRtcOpusStateSize];
 229
 230   /* Resample from 48 kHz to 32 kHz. */
 231   for (i = 0; i < kWebRtcOpusStateSize; i++) {
 232     buffer32[i] = state[i];
 233     state[i] = samples_in[length - kWebRtcOpusStateSize + i];
 234   }
 235   for (i = 0; i < length; i++) {
 236     buffer32[kWebRtcOpusStateSize + i] = samples_in[i];
 237   }
 238   /* Resampling 3 samples to 2. Function divides the input in |blocks| number
 239    * of 3-sample groups, and output is |blocks| number of 2-sample groups.
 240    * When this is removed, the compensation in WebRtcOpus_DurationEst should be
 241    * removed too. */
 242   blocks = length / 3;
 243   WebRtcSpl_Resample48khzTo32khz(buffer32, buffer32, blocks);
 244   output_samples = (int16_t) (blocks * 2);
 245   WebRtcSpl_VectorBitShiftW32ToW16(samples_out, output_samples, buffer32, 15);
 246
 247   return output_samples;
 248 }
 249
 250 static int WebRtcOpus_DeInterleaveResample(OpusDecInst* inst, int16_t* input,
 251                                            int sample_pairs, int16_t* output) {
 252   int i;
 253   int16_t buffer_left[kWebRtcOpusMaxFrameSizePerChannel];
 254   int16_t buffer_right[kWebRtcOpusMaxFrameSizePerChannel];
 255   int16_t buffer_out[kWebRtcOpusMaxFrameSizePerChannel32kHz];
 256   int resampled_samples;
 257
 258   /* De-interleave the signal in left and right channel. */
 259   for (i = 0; i < sample_pairs; i++) {
 260     /* Take every second sample, starting at the first sample. */
 261     buffer_left[i] = input[i * 2];
 262     buffer_right[i] = input[i * 2 + 1];
 263   }
 264
 265   /* Resample from 48 kHz to 32 kHz for left channel. */
 266   resampled_samples = WebRtcOpus_Resample48to32(
 267       buffer_left, sample_pairs, inst->state_48_32_left, buffer_out);
 268
 269   /* Add samples interleaved to output vector. */
 270   for (i = 0; i < resampled_samples; i++) {
 271     output[i * 2] = buffer_out[i];
 272   }
 273
 274   /* Resample from 48 kHz to 32 kHz for right channel. */
 275   resampled_samples = WebRtcOpus_Resample48to32(
 276       buffer_right, sample_pairs, inst->state_48_32_right, buffer_out);
 277
 278   /* Add samples interleaved to output vector. */
 279   for (i = 0; i < resampled_samples; i++) {
 280     output[i * 2 + 1] = buffer_out[i];
 281   }
 282
 283   return resampled_samples;
 284 }
 285
 286 int16_t WebRtcOpus_DecodeNew(OpusDecInst* inst, const uint8_t* encoded,
 287                              int16_t encoded_bytes, int16_t* decoded,
 288                              int16_t* audio_type) {
 289   /* |buffer| is big enough for 120 ms (the largest Opus packet size) of stereo
 290    * audio at 48 kHz. */
 291   int16_t buffer[kWebRtcOpusMaxFrameSize];
 292   int16_t* coded = (int16_t*)encoded;
 293   int decoded_samples;
 294   int resampled_samples;
 295
 296   /* If mono case, just do a regular call to the decoder.
 297    * If stereo, we need to de-interleave the stereo output into blocks with
 298    * left and right channel. Each block is resampled to 32 kHz, and then
 299    * interleaved again. */
 300
 301   /* Decode to a temporary buffer. */
 302   decoded_samples = DecodeNative(inst->decoder_left, coded, encoded_bytes,
 303                                  kWebRtcOpusMaxFrameSizePerChannel,
 304                                  buffer, audio_type);
 305   if (decoded_samples < 0) {
 306     return -1;
 307   }
 308
 309   if (inst->channels == 2) {
 310     /* De-interleave and resample. */
 311     resampled_samples = WebRtcOpus_DeInterleaveResample(inst,
 312                                                         buffer,
 313                                                         decoded_samples,
 314                                                         decoded);
 315   } else {
 316     /* Resample from 48 kHz to 32 kHz. Filter state memory for left channel is
 317      * used for mono signals. */
 318     resampled_samples = WebRtcOpus_Resample48to32(buffer,
 319                                                   decoded_samples,
 320                                                   inst->state_48_32_left,
 321                                                   decoded);
 322   }
 323
 324   /* Update decoded sample memory, to be used by the PLC in case of losses. */
 325   inst->prev_decoded_samples = decoded_samples;
 326
 327   return resampled_samples;
 328 }
 329
 330 int16_t WebRtcOpus_Decode(OpusDecInst* inst, const int16_t* encoded,
 331                           int16_t encoded_bytes, int16_t* decoded,
 332                           int16_t* audio_type) {
 333   /* |buffer16| is big enough for 120 ms (the largestOpus packet size) of
 334    * stereo audio at 48 kHz. */
 335   int16_t buffer16[kWebRtcOpusMaxFrameSize];
 336   int decoded_samples;
 337   int16_t output_samples;
 338   int i;
 339
 340   /* If mono case, just do a regular call to the decoder.
 341    * If stereo, call to WebRtcOpus_Decode() gives left channel as output, and
 342    * calls to WebRtcOpus_Decode_slave() give right channel as output.
 343    * This is to make stereo work with the current setup of NetEQ, which
 344    * requires two calls to the decoder to produce stereo. */
 345
 346   /* Decode to a temporary buffer. */
 347   decoded_samples = DecodeNative(inst->decoder_left, encoded, encoded_bytes,
 348                                  kWebRtcOpusMaxFrameSizePerChannel, buffer16,
 349                                  audio_type);
 350   if (decoded_samples < 0) {
 351     return -1;
 352   }
 353   if (inst->channels == 2) {
 354     /* The parameter |decoded_samples| holds the number of samples pairs, in
 355      * case of stereo. Number of samples in |buffer16| equals |decoded_samples|
 356      * times 2. */
 357     for (i = 0; i < decoded_samples; i++) {
 358       /* Take every second sample, starting at the first sample. This gives
 359        * the left channel. */
 360       buffer16[i] = buffer16[i * 2];
 361     }
 362   }
 363
 364   /* Resample from 48 kHz to 32 kHz. */
 365   output_samples = WebRtcOpus_Resample48to32(buffer16, decoded_samples,
 366                                              inst->state_48_32_left, decoded);
 367
 368   /* Update decoded sample memory, to be used by the PLC in case of losses. */
 369   inst->prev_decoded_samples = decoded_samples;
 370
 371   return output_samples;
 372 }
 373
 374 int16_t WebRtcOpus_DecodeSlave(OpusDecInst* inst, const int16_t* encoded,
 375                                int16_t encoded_bytes, int16_t* decoded,
 376                                int16_t* audio_type) {
 377   /* |buffer16| is big enough for 120 ms (the largestOpus packet size) of
 378    * stereo audio at 48 kHz. */
 379   int16_t buffer16[kWebRtcOpusMaxFrameSize];
 380   int decoded_samples;
 381   int16_t output_samples;
 382   int i;
 383
 384   /* Decode to a temporary buffer. */
 385   decoded_samples = DecodeNative(inst->decoder_right, encoded, encoded_bytes,
 386                                  kWebRtcOpusMaxFrameSizePerChannel, buffer16,
 387                                  audio_type);
 388   if (decoded_samples < 0) {
 389     return -1;
 390   }
 391   if (inst->channels == 2) {
 392     /* The parameter |decoded_samples| holds the number of samples pairs, in
 393      * case of stereo. Number of samples in |buffer16| equals |decoded_samples|
 394      * times 2. */
 395     for (i = 0; i < decoded_samples; i++) {
 396       /* Take every second sample, starting at the second sample. This gives
 397        * the right channel. */
 398       buffer16[i] = buffer16[i * 2 + 1];
 399     }
 400   } else {
 401     /* Decode slave should never be called for mono packets. */
 402     return -1;
 403   }
 404   /* Resample from 48 kHz to 32 kHz. */
 405   output_samples = WebRtcOpus_Resample48to32(buffer16, decoded_samples,
 406                                              inst->state_48_32_right, decoded);
 407
 408   return output_samples;
 409 }
 410
 411 int16_t WebRtcOpus_DecodePlc(OpusDecInst* inst, int16_t* decoded,
 412                              int16_t number_of_lost_frames) {
 413   int16_t buffer[kWebRtcOpusMaxFrameSize];
 414   int16_t audio_type = 0;
 415   int decoded_samples;
 416   int resampled_samples;
 417   int plc_samples;
 418
 419   /* If mono case, just do a regular call to the plc function, before
 420    * resampling.
 421    * If stereo, we need to de-interleave the stereo output into blocks with
 422    * left and right channel. Each block is resampled to 32 kHz, and then
 423    * interleaved again. */
 424
 425   /* Decode to a temporary buffer. The number of samples we ask for is
 426    * |number_of_lost_frames| times |prev_decoded_samples_|. Limit the number
 427    * of samples to maximum |kWebRtcOpusMaxFrameSizePerChannel|. */
 428   plc_samples = number_of_lost_frames * inst->prev_decoded_samples;
 429   plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ?
 430       plc_samples : kWebRtcOpusMaxFrameSizePerChannel;
 431   decoded_samples = DecodeNative(inst->decoder_left, NULL, 0, plc_samples,
 432                                  buffer, &audio_type);
 433   if (decoded_samples < 0) {
 434     return -1;
 435   }
 436
 437   if (inst->channels == 2) {
 438      /* De-interleave and resample. */
 439      resampled_samples = WebRtcOpus_DeInterleaveResample(inst,
 440                                                          buffer,
 441                                                          decoded_samples,
 442                                                          decoded);
 443    } else {
 444      /* Resample from 48 kHz to 32 kHz. Filter state memory for left channel is
 445       * used for mono signals. */
 446      resampled_samples = WebRtcOpus_Resample48to32(buffer,
 447                                                    decoded_samples,
 448                                                    inst->state_48_32_left,
 449                                                    decoded);
 450    }
 451
 452   return resampled_samples;
 453 }
 454
 455 int16_t WebRtcOpus_DecodePlcMaster(OpusDecInst* inst, int16_t* decoded,
 456                                    int16_t number_of_lost_frames) {
 457   int16_t buffer[kWebRtcOpusMaxFrameSize];
 458   int decoded_samples;
 459   int resampled_samples;
 460   int16_t audio_type = 0;
 461   int plc_samples;
 462   int i;
 463
 464   /* If mono case, just do a regular call to the decoder.
 465    * If stereo, call to WebRtcOpus_DecodePlcMaster() gives left channel as
 466    * output, and calls to WebRtcOpus_DecodePlcSlave() give right channel as
 467    * output. This is to make stereo work with the current setup of NetEQ, which
 468    * requires two calls to the decoder to produce stereo. */
 469
 470   /* Decode to a temporary buffer. The number of samples we ask for is
 471    * |number_of_lost_frames| times |prev_decoded_samples_|. Limit the number
 472    * of samples to maximum |kWebRtcOpusMaxFrameSizePerChannel|. */
 473   plc_samples = number_of_lost_frames * inst->prev_decoded_samples;
 474   plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ?
 475       plc_samples : kWebRtcOpusMaxFrameSizePerChannel;
 476   decoded_samples = DecodeNative(inst->decoder_left, NULL, 0, plc_samples,
 477                                  buffer, &audio_type);
 478   if (decoded_samples < 0) {
 479     return -1;
 480   }
 481
 482   if (inst->channels == 2) {
 483     /* The parameter |decoded_samples| holds the number of sample pairs, in
 484      * case of stereo. The original number of samples in |buffer| equals
 485      * |decoded_samples| times 2. */
 486     for (i = 0; i < decoded_samples; i++) {
 487       /* Take every second sample, starting at the first sample. This gives
 488        * the left channel. */
 489       buffer[i] = buffer[i * 2];
 490     }
 491   }
 492
 493   /* Resample from 48 kHz to 32 kHz for left channel. */
 494   resampled_samples = WebRtcOpus_Resample48to32(buffer,
 495                                                 decoded_samples,
 496                                                 inst->state_48_32_left,
 497                                                 decoded);
 498   return resampled_samples;
 499 }
 500
 501 int16_t WebRtcOpus_DecodePlcSlave(OpusDecInst* inst, int16_t* decoded,
 502                                   int16_t number_of_lost_frames) {
 503   int16_t buffer[kWebRtcOpusMaxFrameSize];
 504   int decoded_samples;
 505   int resampled_samples;
 506   int16_t audio_type = 0;
 507   int plc_samples;
 508   int i;
 509
 510   /* Calls to WebRtcOpus_DecodePlcSlave() give right channel as output.
 511    * The function should never be called in the mono case. */
 512   if (inst->channels != 2) {
 513     return -1;
 514   }
 515
 516   /* Decode to a temporary buffer. The number of samples we ask for is
 517    * |number_of_lost_frames| times |prev_decoded_samples_|. Limit the number
 518    * of samples to maximum |kWebRtcOpusMaxFrameSizePerChannel|. */
 519   plc_samples = number_of_lost_frames * inst->prev_decoded_samples;
 520   plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel)
 521       ? plc_samples : kWebRtcOpusMaxFrameSizePerChannel;
 522   decoded_samples = DecodeNative(inst->decoder_right, NULL, 0, plc_samples,
 523                                  buffer, &audio_type);
 524   if (decoded_samples < 0) {
 525     return -1;
 526   }
 527
 528   /* The parameter |decoded_samples| holds the number of sample pairs,
 529    * The original number of samples in |buffer| equals |decoded_samples|
 530    * times 2. */
 531   for (i = 0; i < decoded_samples; i++) {
 532     /* Take every second sample, starting at the second sample. This gives
 533      * the right channel. */
 534     buffer[i] = buffer[i * 2 + 1];
 535   }
 536
 537   /* Resample from 48 kHz to 32 kHz for left channel. */
 538   resampled_samples = WebRtcOpus_Resample48to32(buffer,
 539                                                 decoded_samples,
 540                                                 inst->state_48_32_right,
 541                                                 decoded);
 542   return resampled_samples;
 543 }
 544
 545 int WebRtcOpus_DurationEst(OpusDecInst* inst,
 546                            const uint8_t* payload,
 547                            int payload_length_bytes) {
 548   int frames, samples;
 549   frames = opus_packet_get_nb_frames(payload, payload_length_bytes);
 550   if (frames < 0) {
 551     /* Invalid payload data. */
 552     return 0;
 553   }
 554   samples = frames * opus_packet_get_samples_per_frame(payload, 48000);
 555   if (samples < 120 || samples > 5760) {
 556     /* Invalid payload duration. */
 557     return 0;
 558   }
 559   /* Compensate for the down-sampling from 48 kHz to 32 kHz.
 560    * This should be removed when the resampling in WebRtcOpus_Decode is
 561    * removed. */
 562   samples = samples * 2 / 3;
 563   return samples;
 564 }