2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
11 #include "webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h"
12 #include "webrtc/modules/audio_coding/codecs/opus/opus_inst.h"
18 /* Maximum supported frame size in WebRTC is 60 ms. */
19 kWebRtcOpusMaxEncodeFrameSizeMs = 60,
21 /* The format allows up to 120 ms frames. Since we don't control the other
22 * side, we must allow for packets of that size. NetEq is currently limited
23 * to 60 ms on the receive side. */
24 kWebRtcOpusMaxDecodeFrameSizeMs = 120,
26 /* Maximum sample count per channel is 48 kHz * maximum frame size in
28 kWebRtcOpusMaxFrameSizePerChannel = 48 * kWebRtcOpusMaxDecodeFrameSizeMs,
30 /* Default frame size, 20 ms @ 48 kHz, in samples (for one channel). */
31 kWebRtcOpusDefaultFrameSize = 960,
34 int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst, int32_t channels) {
37 state = (OpusEncInst*) calloc(1, sizeof(OpusEncInst));
40 /* Default to VoIP application for mono, and AUDIO for stereo. */
41 int application = (channels == 1) ? OPUS_APPLICATION_VOIP :
42 OPUS_APPLICATION_AUDIO;
44 state->encoder = opus_encoder_create(48000, channels, application,
46 if (error == OPUS_OK && state->encoder != NULL) {
56 int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst) {
58 opus_encoder_destroy(inst->encoder);
66 int16_t WebRtcOpus_Encode(OpusEncInst* inst, int16_t* audio_in, int16_t samples,
67 int16_t length_encoded_buffer, uint8_t* encoded) {
68 opus_int16* audio = (opus_int16*) audio_in;
69 unsigned char* coded = encoded;
72 if (samples > 48 * kWebRtcOpusMaxEncodeFrameSizeMs) {
76 res = opus_encode(inst->encoder, audio, samples, coded,
77 length_encoded_buffer);
85 int16_t WebRtcOpus_SetBitRate(OpusEncInst* inst, int32_t rate) {
87 return opus_encoder_ctl(inst->encoder, OPUS_SET_BITRATE(rate));
93 int16_t WebRtcOpus_SetPacketLossRate(OpusEncInst* inst, int32_t loss_rate) {
95 return opus_encoder_ctl(inst->encoder,
96 OPUS_SET_PACKET_LOSS_PERC(loss_rate));
102 int16_t WebRtcOpus_SetMaxPlaybackRate(OpusEncInst* inst, int32_t frequency_hz) {
103 opus_int32 set_bandwidth;
108 if (frequency_hz <= 8000) {
109 set_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
110 } else if (frequency_hz <= 12000) {
111 set_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
112 } else if (frequency_hz <= 16000) {
113 set_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
114 } else if (frequency_hz <= 24000) {
115 set_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
117 set_bandwidth = OPUS_BANDWIDTH_FULLBAND;
119 return opus_encoder_ctl(inst->encoder,
120 OPUS_SET_MAX_BANDWIDTH(set_bandwidth));
123 int16_t WebRtcOpus_EnableFec(OpusEncInst* inst) {
125 return opus_encoder_ctl(inst->encoder, OPUS_SET_INBAND_FEC(1));
131 int16_t WebRtcOpus_DisableFec(OpusEncInst* inst) {
133 return opus_encoder_ctl(inst->encoder, OPUS_SET_INBAND_FEC(0));
139 int16_t WebRtcOpus_SetComplexity(OpusEncInst* inst, int32_t complexity) {
141 return opus_encoder_ctl(inst->encoder, OPUS_SET_COMPLEXITY(complexity));
147 int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst, int channels) {
153 /* Create Opus decoder state. */
154 state = (OpusDecInst*) calloc(1, sizeof(OpusDecInst));
159 /* Create new memory for left and right channel, always at 48000 Hz. */
160 state->decoder_left = opus_decoder_create(48000, channels, &error_l);
161 state->decoder_right = opus_decoder_create(48000, channels, &error_r);
162 if (error_l == OPUS_OK && error_r == OPUS_OK && state->decoder_left != NULL
163 && state->decoder_right != NULL) {
164 /* Creation of memory all ok. */
165 state->channels = channels;
166 state->prev_decoded_samples = kWebRtcOpusDefaultFrameSize;
171 /* If memory allocation was unsuccessful, free the entire state. */
172 if (state->decoder_left) {
173 opus_decoder_destroy(state->decoder_left);
175 if (state->decoder_right) {
176 opus_decoder_destroy(state->decoder_right);
183 int16_t WebRtcOpus_DecoderFree(OpusDecInst* inst) {
185 opus_decoder_destroy(inst->decoder_left);
186 opus_decoder_destroy(inst->decoder_right);
194 int WebRtcOpus_DecoderChannels(OpusDecInst* inst) {
195 return inst->channels;
198 int16_t WebRtcOpus_DecoderInitNew(OpusDecInst* inst) {
199 int error = opus_decoder_ctl(inst->decoder_left, OPUS_RESET_STATE);
200 if (error == OPUS_OK) {
206 int16_t WebRtcOpus_DecoderInit(OpusDecInst* inst) {
207 int error = opus_decoder_ctl(inst->decoder_left, OPUS_RESET_STATE);
208 if (error == OPUS_OK) {
214 int16_t WebRtcOpus_DecoderInitSlave(OpusDecInst* inst) {
215 int error = opus_decoder_ctl(inst->decoder_right, OPUS_RESET_STATE);
216 if (error == OPUS_OK) {
222 /* |frame_size| is set to maximum Opus frame size in the normal case, and
223 * is set to the number of samples needed for PLC in case of losses.
224 * It is up to the caller to make sure the value is correct. */
225 static int DecodeNative(OpusDecoder* inst, const int16_t* encoded,
226 int16_t encoded_bytes, int frame_size,
227 int16_t* decoded, int16_t* audio_type) {
228 unsigned char* coded = (unsigned char*) encoded;
229 opus_int16* audio = (opus_int16*) decoded;
231 int res = opus_decode(inst, coded, encoded_bytes, audio, frame_size, 0);
233 /* TODO(tlegrand): set to DTX for zero-length packets? */
242 static int DecodeFec(OpusDecoder* inst, const int16_t* encoded,
243 int16_t encoded_bytes, int frame_size,
244 int16_t* decoded, int16_t* audio_type) {
245 unsigned char* coded = (unsigned char*) encoded;
246 opus_int16* audio = (opus_int16*) decoded;
248 int res = opus_decode(inst, coded, encoded_bytes, audio, frame_size, 1);
250 /* TODO(tlegrand): set to DTX for zero-length packets? */
259 int16_t WebRtcOpus_DecodeNew(OpusDecInst* inst, const uint8_t* encoded,
260 int16_t encoded_bytes, int16_t* decoded,
261 int16_t* audio_type) {
262 int16_t* coded = (int16_t*)encoded;
265 decoded_samples = DecodeNative(inst->decoder_left, coded, encoded_bytes,
266 kWebRtcOpusMaxFrameSizePerChannel,
267 decoded, audio_type);
268 if (decoded_samples < 0) {
272 /* Update decoded sample memory, to be used by the PLC in case of losses. */
273 inst->prev_decoded_samples = decoded_samples;
275 return decoded_samples;
278 int16_t WebRtcOpus_Decode(OpusDecInst* inst, const int16_t* encoded,
279 int16_t encoded_bytes, int16_t* decoded,
280 int16_t* audio_type) {
284 /* If mono case, just do a regular call to the decoder.
285 * If stereo, call to WebRtcOpus_Decode() gives left channel as output, and
286 * calls to WebRtcOpus_Decode_slave() give right channel as output.
287 * This is to make stereo work with the current setup of NetEQ, which
288 * requires two calls to the decoder to produce stereo. */
290 decoded_samples = DecodeNative(inst->decoder_left, encoded, encoded_bytes,
291 kWebRtcOpusMaxFrameSizePerChannel, decoded,
293 if (decoded_samples < 0) {
296 if (inst->channels == 2) {
297 /* The parameter |decoded_samples| holds the number of samples pairs, in
298 * case of stereo. Number of samples in |decoded| equals |decoded_samples|
300 for (i = 0; i < decoded_samples; i++) {
301 /* Take every second sample, starting at the first sample. This gives
302 * the left channel. */
303 decoded[i] = decoded[i * 2];
307 /* Update decoded sample memory, to be used by the PLC in case of losses. */
308 inst->prev_decoded_samples = decoded_samples;
310 return decoded_samples;
313 int16_t WebRtcOpus_DecodeSlave(OpusDecInst* inst, const int16_t* encoded,
314 int16_t encoded_bytes, int16_t* decoded,
315 int16_t* audio_type) {
319 decoded_samples = DecodeNative(inst->decoder_right, encoded, encoded_bytes,
320 kWebRtcOpusMaxFrameSizePerChannel, decoded,
322 if (decoded_samples < 0) {
325 if (inst->channels == 2) {
326 /* The parameter |decoded_samples| holds the number of samples pairs, in
327 * case of stereo. Number of samples in |decoded| equals |decoded_samples|
329 for (i = 0; i < decoded_samples; i++) {
330 /* Take every second sample, starting at the second sample. This gives
331 * the right channel. */
332 decoded[i] = decoded[i * 2 + 1];
335 /* Decode slave should never be called for mono packets. */
339 return decoded_samples;
342 int16_t WebRtcOpus_DecodePlc(OpusDecInst* inst, int16_t* decoded,
343 int16_t number_of_lost_frames) {
344 int16_t audio_type = 0;
348 /* The number of samples we ask for is |number_of_lost_frames| times
349 * |prev_decoded_samples_|. Limit the number of samples to maximum
350 * |kWebRtcOpusMaxFrameSizePerChannel|. */
351 plc_samples = number_of_lost_frames * inst->prev_decoded_samples;
352 plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ?
353 plc_samples : kWebRtcOpusMaxFrameSizePerChannel;
354 decoded_samples = DecodeNative(inst->decoder_left, NULL, 0, plc_samples,
355 decoded, &audio_type);
356 if (decoded_samples < 0) {
360 return decoded_samples;
363 int16_t WebRtcOpus_DecodePlcMaster(OpusDecInst* inst, int16_t* decoded,
364 int16_t number_of_lost_frames) {
366 int16_t audio_type = 0;
370 /* If mono case, just do a regular call to the decoder.
371 * If stereo, call to WebRtcOpus_DecodePlcMaster() gives left channel as
372 * output, and calls to WebRtcOpus_DecodePlcSlave() give right channel as
373 * output. This is to make stereo work with the current setup of NetEQ, which
374 * requires two calls to the decoder to produce stereo. */
376 /* The number of samples we ask for is |number_of_lost_frames| times
377 * |prev_decoded_samples_|. Limit the number of samples to maximum
378 * |kWebRtcOpusMaxFrameSizePerChannel|. */
379 plc_samples = number_of_lost_frames * inst->prev_decoded_samples;
380 plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ?
381 plc_samples : kWebRtcOpusMaxFrameSizePerChannel;
382 decoded_samples = DecodeNative(inst->decoder_left, NULL, 0, plc_samples,
383 decoded, &audio_type);
384 if (decoded_samples < 0) {
388 if (inst->channels == 2) {
389 /* The parameter |decoded_samples| holds the number of sample pairs, in
390 * case of stereo. The original number of samples in |decoded| equals
391 * |decoded_samples| times 2. */
392 for (i = 0; i < decoded_samples; i++) {
393 /* Take every second sample, starting at the first sample. This gives
394 * the left channel. */
395 decoded[i] = decoded[i * 2];
399 return decoded_samples;
402 int16_t WebRtcOpus_DecodePlcSlave(OpusDecInst* inst, int16_t* decoded,
403 int16_t number_of_lost_frames) {
405 int16_t audio_type = 0;
409 /* Calls to WebRtcOpus_DecodePlcSlave() give right channel as output.
410 * The function should never be called in the mono case. */
411 if (inst->channels != 2) {
415 /* The number of samples we ask for is |number_of_lost_frames| times
416 * |prev_decoded_samples_|. Limit the number of samples to maximum
417 * |kWebRtcOpusMaxFrameSizePerChannel|. */
418 plc_samples = number_of_lost_frames * inst->prev_decoded_samples;
419 plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel)
420 ? plc_samples : kWebRtcOpusMaxFrameSizePerChannel;
421 decoded_samples = DecodeNative(inst->decoder_right, NULL, 0, plc_samples,
422 decoded, &audio_type);
423 if (decoded_samples < 0) {
427 /* The parameter |decoded_samples| holds the number of sample pairs,
428 * The original number of samples in |decoded| equals |decoded_samples|
430 for (i = 0; i < decoded_samples; i++) {
431 /* Take every second sample, starting at the second sample. This gives
432 * the right channel. */
433 decoded[i] = decoded[i * 2 + 1];
436 return decoded_samples;
439 int16_t WebRtcOpus_DecodeFec(OpusDecInst* inst, const uint8_t* encoded,
440 int16_t encoded_bytes, int16_t* decoded,
441 int16_t* audio_type) {
442 int16_t* coded = (int16_t*)encoded;
446 if (WebRtcOpus_PacketHasFec(encoded, encoded_bytes) != 1) {
450 fec_samples = opus_packet_get_samples_per_frame(encoded, 48000);
452 decoded_samples = DecodeFec(inst->decoder_left, coded, encoded_bytes,
453 fec_samples, decoded, audio_type);
454 if (decoded_samples < 0) {
458 return decoded_samples;
461 int WebRtcOpus_DurationEst(OpusDecInst* inst,
462 const uint8_t* payload,
463 int payload_length_bytes) {
465 frames = opus_packet_get_nb_frames(payload, payload_length_bytes);
467 /* Invalid payload data. */
470 samples = frames * opus_packet_get_samples_per_frame(payload, 48000);
471 if (samples < 120 || samples > 5760) {
472 /* Invalid payload duration. */
478 int WebRtcOpus_FecDurationEst(const uint8_t* payload,
479 int payload_length_bytes) {
481 if (WebRtcOpus_PacketHasFec(payload, payload_length_bytes) != 1) {
485 samples = opus_packet_get_samples_per_frame(payload, 48000);
486 if (samples < 480 || samples > 5760) {
487 /* Invalid payload duration. */
493 int WebRtcOpus_PacketHasFec(const uint8_t* payload,
494 int payload_length_bytes) {
495 int frames, channels, payload_length_ms;
497 opus_int16 frame_sizes[48];
498 const unsigned char *frame_data[48];
500 if (payload == NULL || payload_length_bytes <= 0)
503 /* In CELT_ONLY mode, packets should not have FEC. */
504 if (payload[0] & 0x80)
507 payload_length_ms = opus_packet_get_samples_per_frame(payload, 48000) / 48;
508 if (10 > payload_length_ms)
509 payload_length_ms = 10;
511 channels = opus_packet_get_nb_channels(payload);
513 switch (payload_length_ms) {
528 return 0; // It is actually even an invalid packet.
532 /* The following is to parse the LBRR flags. */
533 if (opus_packet_parse(payload, payload_length_bytes, NULL, frame_data,
534 frame_sizes, NULL) < 0) {
538 if (frame_sizes[0] <= 1) {
542 for (n = 0; n < channels; n++) {
543 if (frame_data[0][0] & (0x80 >> ((n + 1) * (frames + 1) - 1)))