src/third_party/webrtc/modules/audio_coding/codecs/opus/opus_interface.c

   1 /*
   2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include "webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h"
  12 #include "webrtc/modules/audio_coding/codecs/opus/opus_inst.h"
  13
  14 #include <stdlib.h>
  15 #include <string.h>
  16
  17 enum {
  18   /* Maximum supported frame size in WebRTC is 60 ms. */
  19   kWebRtcOpusMaxEncodeFrameSizeMs = 60,
  20
  21   /* The format allows up to 120 ms frames. Since we don't control the other
  22    * side, we must allow for packets of that size. NetEq is currently limited
  23    * to 60 ms on the receive side. */
  24   kWebRtcOpusMaxDecodeFrameSizeMs = 120,
  25
  26   /* Maximum sample count per channel is 48 kHz * maximum frame size in
  27    * milliseconds. */
  28   kWebRtcOpusMaxFrameSizePerChannel = 48 * kWebRtcOpusMaxDecodeFrameSizeMs,
  29
  30   /* Default frame size, 20 ms @ 48 kHz, in samples (for one channel). */
  31   kWebRtcOpusDefaultFrameSize = 960,
  32 };
  33
  34 int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst, int32_t channels) {
  35   OpusEncInst* state;
  36   if (inst != NULL) {
  37     state = (OpusEncInst*) calloc(1, sizeof(OpusEncInst));
  38     if (state) {
  39       int error;
  40       /* Default to VoIP application for mono, and AUDIO for stereo. */
  41       int application = (channels == 1) ? OPUS_APPLICATION_VOIP :
  42           OPUS_APPLICATION_AUDIO;
  43
  44       state->encoder = opus_encoder_create(48000, channels, application,
  45                                            &error);
  46       if (error == OPUS_OK && state->encoder != NULL) {
  47         *inst = state;
  48         return 0;
  49       }
  50       free(state);
  51     }
  52   }
  53   return -1;
  54 }
  55
  56 int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst) {
  57   if (inst) {
  58     opus_encoder_destroy(inst->encoder);
  59     free(inst);
  60     return 0;
  61   } else {
  62     return -1;
  63   }
  64 }
  65
  66 int16_t WebRtcOpus_Encode(OpusEncInst* inst, int16_t* audio_in, int16_t samples,
  67                           int16_t length_encoded_buffer, uint8_t* encoded) {
  68   opus_int16* audio = (opus_int16*) audio_in;
  69   unsigned char* coded = encoded;
  70   int res;
  71
  72   if (samples > 48 * kWebRtcOpusMaxEncodeFrameSizeMs) {
  73     return -1;
  74   }
  75
  76   res = opus_encode(inst->encoder, audio, samples, coded,
  77                     length_encoded_buffer);
  78
  79   if (res > 0) {
  80     return res;
  81   }
  82   return -1;
  83 }
  84
  85 int16_t WebRtcOpus_SetBitRate(OpusEncInst* inst, int32_t rate) {
  86   if (inst) {
  87     return opus_encoder_ctl(inst->encoder, OPUS_SET_BITRATE(rate));
  88   } else {
  89     return -1;
  90   }
  91 }
  92
  93 int16_t WebRtcOpus_SetPacketLossRate(OpusEncInst* inst, int32_t loss_rate) {
  94   if (inst) {
  95     return opus_encoder_ctl(inst->encoder,
  96                             OPUS_SET_PACKET_LOSS_PERC(loss_rate));
  97   } else {
  98     return -1;
  99   }
 100 }
 101
 102 int16_t WebRtcOpus_SetMaxPlaybackRate(OpusEncInst* inst, int32_t frequency_hz) {
 103   opus_int32 set_bandwidth;
 104
 105   if (!inst)
 106     return -1;
 107
 108   if (frequency_hz <= 8000) {
 109     set_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
 110   } else if (frequency_hz <= 12000) {
 111     set_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
 112   } else if (frequency_hz <= 16000) {
 113     set_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
 114   } else if (frequency_hz <= 24000) {
 115     set_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
 116   } else {
 117     set_bandwidth = OPUS_BANDWIDTH_FULLBAND;
 118   }
 119   return opus_encoder_ctl(inst->encoder,
 120                           OPUS_SET_MAX_BANDWIDTH(set_bandwidth));
 121 }
 122
 123 int16_t WebRtcOpus_EnableFec(OpusEncInst* inst) {
 124   if (inst) {
 125     return opus_encoder_ctl(inst->encoder, OPUS_SET_INBAND_FEC(1));
 126   } else {
 127     return -1;
 128   }
 129 }
 130
 131 int16_t WebRtcOpus_DisableFec(OpusEncInst* inst) {
 132   if (inst) {
 133     return opus_encoder_ctl(inst->encoder, OPUS_SET_INBAND_FEC(0));
 134   } else {
 135     return -1;
 136   }
 137 }
 138
 139 int16_t WebRtcOpus_SetComplexity(OpusEncInst* inst, int32_t complexity) {
 140   if (inst) {
 141     return opus_encoder_ctl(inst->encoder, OPUS_SET_COMPLEXITY(complexity));
 142   } else {
 143     return -1;
 144   }
 145 }
 146
 147 int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst, int channels) {
 148   int error_l;
 149   int error_r;
 150   OpusDecInst* state;
 151
 152   if (inst != NULL) {
 153     /* Create Opus decoder state. */
 154     state = (OpusDecInst*) calloc(1, sizeof(OpusDecInst));
 155     if (state == NULL) {
 156       return -1;
 157     }
 158
 159     /* Create new memory for left and right channel, always at 48000 Hz. */
 160     state->decoder_left = opus_decoder_create(48000, channels, &error_l);
 161     state->decoder_right = opus_decoder_create(48000, channels, &error_r);
 162     if (error_l == OPUS_OK && error_r == OPUS_OK && state->decoder_left != NULL
 163         && state->decoder_right != NULL) {
 164       /* Creation of memory all ok. */
 165       state->channels = channels;
 166       state->prev_decoded_samples = kWebRtcOpusDefaultFrameSize;
 167       *inst = state;
 168       return 0;
 169     }
 170
 171     /* If memory allocation was unsuccessful, free the entire state. */
 172     if (state->decoder_left) {
 173       opus_decoder_destroy(state->decoder_left);
 174     }
 175     if (state->decoder_right) {
 176       opus_decoder_destroy(state->decoder_right);
 177     }
 178     free(state);
 179   }
 180   return -1;
 181 }
 182
 183 int16_t WebRtcOpus_DecoderFree(OpusDecInst* inst) {
 184   if (inst) {
 185     opus_decoder_destroy(inst->decoder_left);
 186     opus_decoder_destroy(inst->decoder_right);
 187     free(inst);
 188     return 0;
 189   } else {
 190     return -1;
 191   }
 192 }
 193
 194 int WebRtcOpus_DecoderChannels(OpusDecInst* inst) {
 195   return inst->channels;
 196 }
 197
 198 int16_t WebRtcOpus_DecoderInitNew(OpusDecInst* inst) {
 199   int error = opus_decoder_ctl(inst->decoder_left, OPUS_RESET_STATE);
 200   if (error == OPUS_OK) {
 201     return 0;
 202   }
 203   return -1;
 204 }
 205
 206 int16_t WebRtcOpus_DecoderInit(OpusDecInst* inst) {
 207   int error = opus_decoder_ctl(inst->decoder_left, OPUS_RESET_STATE);
 208   if (error == OPUS_OK) {
 209     return 0;
 210   }
 211   return -1;
 212 }
 213
 214 int16_t WebRtcOpus_DecoderInitSlave(OpusDecInst* inst) {
 215   int error = opus_decoder_ctl(inst->decoder_right, OPUS_RESET_STATE);
 216   if (error == OPUS_OK) {
 217     return 0;
 218   }
 219   return -1;
 220 }
 221
 222 /* |frame_size| is set to maximum Opus frame size in the normal case, and
 223  * is set to the number of samples needed for PLC in case of losses.
 224  * It is up to the caller to make sure the value is correct. */
 225 static int DecodeNative(OpusDecoder* inst, const int16_t* encoded,
 226                         int16_t encoded_bytes, int frame_size,
 227                         int16_t* decoded, int16_t* audio_type) {
 228   unsigned char* coded = (unsigned char*) encoded;
 229   opus_int16* audio = (opus_int16*) decoded;
 230
 231   int res = opus_decode(inst, coded, encoded_bytes, audio, frame_size, 0);
 232
 233   /* TODO(tlegrand): set to DTX for zero-length packets? */
 234   *audio_type = 0;
 235
 236   if (res > 0) {
 237     return res;
 238   }
 239   return -1;
 240 }
 241
 242 static int DecodeFec(OpusDecoder* inst, const int16_t* encoded,
 243                      int16_t encoded_bytes, int frame_size,
 244                      int16_t* decoded, int16_t* audio_type) {
 245   unsigned char* coded = (unsigned char*) encoded;
 246   opus_int16* audio = (opus_int16*) decoded;
 247
 248   int res = opus_decode(inst, coded, encoded_bytes, audio, frame_size, 1);
 249
 250   /* TODO(tlegrand): set to DTX for zero-length packets? */
 251   *audio_type = 0;
 252
 253   if (res > 0) {
 254     return res;
 255   }
 256   return -1;
 257 }
 258
 259 int16_t WebRtcOpus_DecodeNew(OpusDecInst* inst, const uint8_t* encoded,
 260                              int16_t encoded_bytes, int16_t* decoded,
 261                              int16_t* audio_type) {
 262   int16_t* coded = (int16_t*)encoded;
 263   int decoded_samples;
 264
 265   decoded_samples = DecodeNative(inst->decoder_left, coded, encoded_bytes,
 266                                  kWebRtcOpusMaxFrameSizePerChannel,
 267                                  decoded, audio_type);
 268   if (decoded_samples < 0) {
 269     return -1;
 270   }
 271
 272   /* Update decoded sample memory, to be used by the PLC in case of losses. */
 273   inst->prev_decoded_samples = decoded_samples;
 274
 275   return decoded_samples;
 276 }
 277
 278 int16_t WebRtcOpus_Decode(OpusDecInst* inst, const int16_t* encoded,
 279                           int16_t encoded_bytes, int16_t* decoded,
 280                           int16_t* audio_type) {
 281   int decoded_samples;
 282   int i;
 283
 284   /* If mono case, just do a regular call to the decoder.
 285    * If stereo, call to WebRtcOpus_Decode() gives left channel as output, and
 286    * calls to WebRtcOpus_Decode_slave() give right channel as output.
 287    * This is to make stereo work with the current setup of NetEQ, which
 288    * requires two calls to the decoder to produce stereo. */
 289
 290   decoded_samples = DecodeNative(inst->decoder_left, encoded, encoded_bytes,
 291                                  kWebRtcOpusMaxFrameSizePerChannel, decoded,
 292                                  audio_type);
 293   if (decoded_samples < 0) {
 294     return -1;
 295   }
 296   if (inst->channels == 2) {
 297     /* The parameter |decoded_samples| holds the number of samples pairs, in
 298      * case of stereo. Number of samples in |decoded| equals |decoded_samples|
 299      * times 2. */
 300     for (i = 0; i < decoded_samples; i++) {
 301       /* Take every second sample, starting at the first sample. This gives
 302        * the left channel. */
 303       decoded[i] = decoded[i * 2];
 304     }
 305   }
 306
 307   /* Update decoded sample memory, to be used by the PLC in case of losses. */
 308   inst->prev_decoded_samples = decoded_samples;
 309
 310   return decoded_samples;
 311 }
 312
 313 int16_t WebRtcOpus_DecodeSlave(OpusDecInst* inst, const int16_t* encoded,
 314                                int16_t encoded_bytes, int16_t* decoded,
 315                                int16_t* audio_type) {
 316   int decoded_samples;
 317   int i;
 318
 319   decoded_samples = DecodeNative(inst->decoder_right, encoded, encoded_bytes,
 320                                  kWebRtcOpusMaxFrameSizePerChannel, decoded,
 321                                  audio_type);
 322   if (decoded_samples < 0) {
 323     return -1;
 324   }
 325   if (inst->channels == 2) {
 326     /* The parameter |decoded_samples| holds the number of samples pairs, in
 327      * case of stereo. Number of samples in |decoded| equals |decoded_samples|
 328      * times 2. */
 329     for (i = 0; i < decoded_samples; i++) {
 330       /* Take every second sample, starting at the second sample. This gives
 331        * the right channel. */
 332       decoded[i] = decoded[i * 2 + 1];
 333     }
 334   } else {
 335     /* Decode slave should never be called for mono packets. */
 336     return -1;
 337   }
 338
 339   return decoded_samples;
 340 }
 341
 342 int16_t WebRtcOpus_DecodePlc(OpusDecInst* inst, int16_t* decoded,
 343                              int16_t number_of_lost_frames) {
 344   int16_t audio_type = 0;
 345   int decoded_samples;
 346   int plc_samples;
 347
 348   /* The number of samples we ask for is |number_of_lost_frames| times
 349    * |prev_decoded_samples_|. Limit the number of samples to maximum
 350    * |kWebRtcOpusMaxFrameSizePerChannel|. */
 351   plc_samples = number_of_lost_frames * inst->prev_decoded_samples;
 352   plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ?
 353       plc_samples : kWebRtcOpusMaxFrameSizePerChannel;
 354   decoded_samples = DecodeNative(inst->decoder_left, NULL, 0, plc_samples,
 355                                  decoded, &audio_type);
 356   if (decoded_samples < 0) {
 357     return -1;
 358   }
 359
 360   return decoded_samples;
 361 }
 362
 363 int16_t WebRtcOpus_DecodePlcMaster(OpusDecInst* inst, int16_t* decoded,
 364                                    int16_t number_of_lost_frames) {
 365   int decoded_samples;
 366   int16_t audio_type = 0;
 367   int plc_samples;
 368   int i;
 369
 370   /* If mono case, just do a regular call to the decoder.
 371    * If stereo, call to WebRtcOpus_DecodePlcMaster() gives left channel as
 372    * output, and calls to WebRtcOpus_DecodePlcSlave() give right channel as
 373    * output. This is to make stereo work with the current setup of NetEQ, which
 374    * requires two calls to the decoder to produce stereo. */
 375
 376   /* The number of samples we ask for is |number_of_lost_frames| times
 377    * |prev_decoded_samples_|. Limit the number of samples to maximum
 378    * |kWebRtcOpusMaxFrameSizePerChannel|. */
 379   plc_samples = number_of_lost_frames * inst->prev_decoded_samples;
 380   plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ?
 381       plc_samples : kWebRtcOpusMaxFrameSizePerChannel;
 382   decoded_samples = DecodeNative(inst->decoder_left, NULL, 0, plc_samples,
 383                                  decoded, &audio_type);
 384   if (decoded_samples < 0) {
 385     return -1;
 386   }
 387
 388   if (inst->channels == 2) {
 389     /* The parameter |decoded_samples| holds the number of sample pairs, in
 390      * case of stereo. The original number of samples in |decoded| equals
 391      * |decoded_samples| times 2. */
 392     for (i = 0; i < decoded_samples; i++) {
 393       /* Take every second sample, starting at the first sample. This gives
 394        * the left channel. */
 395       decoded[i] = decoded[i * 2];
 396     }
 397   }
 398
 399   return decoded_samples;
 400 }
 401
 402 int16_t WebRtcOpus_DecodePlcSlave(OpusDecInst* inst, int16_t* decoded,
 403                                   int16_t number_of_lost_frames) {
 404   int decoded_samples;
 405   int16_t audio_type = 0;
 406   int plc_samples;
 407   int i;
 408
 409   /* Calls to WebRtcOpus_DecodePlcSlave() give right channel as output.
 410    * The function should never be called in the mono case. */
 411   if (inst->channels != 2) {
 412     return -1;
 413   }
 414
 415   /* The number of samples we ask for is |number_of_lost_frames| times
 416    *  |prev_decoded_samples_|. Limit the number of samples to maximum
 417    *  |kWebRtcOpusMaxFrameSizePerChannel|. */
 418   plc_samples = number_of_lost_frames * inst->prev_decoded_samples;
 419   plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel)
 420       ? plc_samples : kWebRtcOpusMaxFrameSizePerChannel;
 421   decoded_samples = DecodeNative(inst->decoder_right, NULL, 0, plc_samples,
 422                                  decoded, &audio_type);
 423   if (decoded_samples < 0) {
 424     return -1;
 425   }
 426
 427   /* The parameter |decoded_samples| holds the number of sample pairs,
 428    * The original number of samples in |decoded| equals |decoded_samples|
 429    * times 2. */
 430   for (i = 0; i < decoded_samples; i++) {
 431     /* Take every second sample, starting at the second sample. This gives
 432      * the right channel. */
 433     decoded[i] = decoded[i * 2 + 1];
 434   }
 435
 436   return decoded_samples;
 437 }
 438
 439 int16_t WebRtcOpus_DecodeFec(OpusDecInst* inst, const uint8_t* encoded,
 440                              int16_t encoded_bytes, int16_t* decoded,
 441                              int16_t* audio_type) {
 442   int16_t* coded = (int16_t*)encoded;
 443   int decoded_samples;
 444   int fec_samples;
 445
 446   if (WebRtcOpus_PacketHasFec(encoded, encoded_bytes) != 1) {
 447     return 0;
 448   }
 449
 450   fec_samples = opus_packet_get_samples_per_frame(encoded, 48000);
 451
 452   decoded_samples = DecodeFec(inst->decoder_left, coded, encoded_bytes,
 453                               fec_samples, decoded, audio_type);
 454   if (decoded_samples < 0) {
 455     return -1;
 456   }
 457
 458   return decoded_samples;
 459 }
 460
 461 int WebRtcOpus_DurationEst(OpusDecInst* inst,
 462                            const uint8_t* payload,
 463                            int payload_length_bytes) {
 464   int frames, samples;
 465   frames = opus_packet_get_nb_frames(payload, payload_length_bytes);
 466   if (frames < 0) {
 467     /* Invalid payload data. */
 468     return 0;
 469   }
 470   samples = frames * opus_packet_get_samples_per_frame(payload, 48000);
 471   if (samples < 120 || samples > 5760) {
 472     /* Invalid payload duration. */
 473     return 0;
 474   }
 475   return samples;
 476 }
 477
 478 int WebRtcOpus_FecDurationEst(const uint8_t* payload,
 479                               int payload_length_bytes) {
 480   int samples;
 481   if (WebRtcOpus_PacketHasFec(payload, payload_length_bytes) != 1) {
 482     return 0;
 483   }
 484
 485   samples = opus_packet_get_samples_per_frame(payload, 48000);
 486   if (samples < 480 || samples > 5760) {
 487     /* Invalid payload duration. */
 488     return 0;
 489   }
 490   return samples;
 491 }
 492
 493 int WebRtcOpus_PacketHasFec(const uint8_t* payload,
 494                             int payload_length_bytes) {
 495   int frames, channels, payload_length_ms;
 496   int n;
 497   opus_int16 frame_sizes[48];
 498   const unsigned char *frame_data[48];
 499
 500   if (payload == NULL || payload_length_bytes <= 0)
 501     return 0;
 502
 503   /* In CELT_ONLY mode, packets should not have FEC. */
 504   if (payload[0] & 0x80)
 505     return 0;
 506
 507   payload_length_ms = opus_packet_get_samples_per_frame(payload, 48000) / 48;
 508   if (10 > payload_length_ms)
 509     payload_length_ms = 10;
 510
 511   channels = opus_packet_get_nb_channels(payload);
 512
 513   switch (payload_length_ms) {
 514     case 10:
 515     case 20: {
 516       frames = 1;
 517       break;
 518     }
 519     case 40: {
 520       frames = 2;
 521       break;
 522     }
 523     case 60: {
 524       frames = 3;
 525       break;
 526     }
 527     default: {
 528       return 0; // It is actually even an invalid packet.
 529     }
 530   }
 531
 532   /* The following is to parse the LBRR flags. */
 533   if (opus_packet_parse(payload, payload_length_bytes, NULL, frame_data,
 534                         frame_sizes, NULL) < 0) {
 535     return 0;
 536   }
 537
 538   if (frame_sizes[0] <= 1) {
 539     return 0;
 540   }
 541
 542   for (n = 0; n < channels; n++) {
 543     if (frame_data[0][0] & (0x80 >> ((n + 1) * (frames + 1) - 1)))
 544       return 1;
 545   }
 546
 547   return 0;
 548 }