2 * Opus encoder using libopus
3 * Copyright (c) 2012 Nathan Caldwell
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #include <opus_multistream.h>
25 #include "libavutil/channel_layout.h"
26 #include "libavutil/opt.h"
28 #include "bytestream.h"
29 #include "codec_internal.h"
32 #include "audio_frame_queue.h"
33 #include "vorbis_data.h"
35 typedef struct LibopusEncOpts {
45 #ifdef OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST
50 typedef struct LibopusEncContext {
57 const uint8_t *encoder_channel_map;
60 static const uint8_t opus_coupled_streams[8] = {
61 0, 1, 1, 2, 2, 2, 2, 3
64 /* Opus internal to Vorbis channel order mapping written in the header */
65 static const uint8_t opus_vorbis_channel_map[8][8] = {
72 { 0, 4, 1, 2, 3, 5, 6 },
73 { 0, 6, 1, 2, 3, 4, 5, 7 },
76 /* libavcodec to libopus channel order mapping, passed to libopus */
77 static const uint8_t libavcodec_libopus_channel_map[8][8] = {
84 { 0, 1, 5, 6, 2, 4, 3 },
85 { 0, 1, 6, 7, 4, 5, 2, 3 },
88 static void libopus_write_header(AVCodecContext *avctx, int stream_count,
89 int coupled_stream_count,
91 const uint8_t *channel_mapping)
93 uint8_t *p = avctx->extradata;
94 int channels = avctx->ch_layout.nb_channels;
96 bytestream_put_buffer(&p, "OpusHead", 8);
97 bytestream_put_byte(&p, 1); /* Version */
98 bytestream_put_byte(&p, channels);
99 bytestream_put_le16(&p, avctx->initial_padding * 48000 / avctx->sample_rate); /* Lookahead samples at 48kHz */
100 bytestream_put_le32(&p, avctx->sample_rate); /* Original sample rate */
101 bytestream_put_le16(&p, 0); /* Gain of 0dB is recommended. */
103 /* Channel mapping */
104 bytestream_put_byte(&p, mapping_family);
105 if (mapping_family != 0) {
106 bytestream_put_byte(&p, stream_count);
107 bytestream_put_byte(&p, coupled_stream_count);
108 bytestream_put_buffer(&p, channel_mapping, channels);
112 static int libopus_configure_encoder(AVCodecContext *avctx, OpusMSEncoder *enc,
113 LibopusEncOpts *opts)
117 if (avctx->global_quality) {
118 av_log(avctx, AV_LOG_ERROR,
119 "Quality-based encoding not supported, "
120 "please specify a bitrate and VBR setting.\n");
121 return AVERROR(EINVAL);
124 ret = opus_multistream_encoder_ctl(enc, OPUS_SET_BITRATE(avctx->bit_rate));
125 if (ret != OPUS_OK) {
126 av_log(avctx, AV_LOG_ERROR,
127 "Failed to set bitrate: %s\n", opus_strerror(ret));
131 ret = opus_multistream_encoder_ctl(enc,
132 OPUS_SET_COMPLEXITY(opts->complexity));
134 av_log(avctx, AV_LOG_WARNING,
135 "Unable to set complexity: %s\n", opus_strerror(ret));
137 ret = opus_multistream_encoder_ctl(enc, OPUS_SET_VBR(!!opts->vbr));
139 av_log(avctx, AV_LOG_WARNING,
140 "Unable to set VBR: %s\n", opus_strerror(ret));
142 ret = opus_multistream_encoder_ctl(enc,
143 OPUS_SET_VBR_CONSTRAINT(opts->vbr == 2));
145 av_log(avctx, AV_LOG_WARNING,
146 "Unable to set constrained VBR: %s\n", opus_strerror(ret));
148 ret = opus_multistream_encoder_ctl(enc,
149 OPUS_SET_PACKET_LOSS_PERC(opts->packet_loss));
151 av_log(avctx, AV_LOG_WARNING,
152 "Unable to set expected packet loss percentage: %s\n",
155 ret = opus_multistream_encoder_ctl(enc,
156 OPUS_SET_INBAND_FEC(opts->fec));
158 av_log(avctx, AV_LOG_WARNING,
159 "Unable to set inband FEC: %s\n",
163 ret = opus_multistream_encoder_ctl(enc,
164 OPUS_SET_MAX_BANDWIDTH(opts->max_bandwidth));
166 av_log(avctx, AV_LOG_WARNING,
167 "Unable to set maximum bandwidth: %s\n", opus_strerror(ret));
170 #ifdef OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST
171 ret = opus_multistream_encoder_ctl(enc,
172 OPUS_SET_PHASE_INVERSION_DISABLED(!opts->apply_phase_inv));
174 av_log(avctx, AV_LOG_WARNING,
175 "Unable to set phase inversion: %s\n",
181 static int libopus_check_max_channels(AVCodecContext *avctx,
183 if (avctx->ch_layout.nb_channels > max_channels) {
184 av_log(avctx, AV_LOG_ERROR, "Opus mapping family undefined for %d channels.\n",
185 avctx->ch_layout.nb_channels);
186 return AVERROR(EINVAL);
192 static int libopus_check_vorbis_layout(AVCodecContext *avctx, int mapping_family) {
193 av_assert2(avctx->ch_layout.nb_channels < FF_ARRAY_ELEMS(ff_vorbis_ch_layouts));
195 if (avctx->ch_layout.order == AV_CHANNEL_ORDER_UNSPEC) {
196 av_log(avctx, AV_LOG_WARNING,
197 "No channel layout specified. Opus encoder will use Vorbis "
198 "channel layout for %d channels.\n", avctx->ch_layout.nb_channels);
199 } else if (av_channel_layout_compare(&avctx->ch_layout, &ff_vorbis_ch_layouts[avctx->ch_layout.nb_channels - 1])) {
202 av_channel_layout_describe(&avctx->ch_layout, name, sizeof(name));
203 av_log(avctx, AV_LOG_ERROR,
204 "Invalid channel layout %s for specified mapping family %d.\n",
205 name, mapping_family);
207 return AVERROR(EINVAL);
213 static int libopus_validate_layout_and_get_channel_map(
214 AVCodecContext *avctx,
216 const uint8_t ** channel_map_result)
218 const uint8_t * channel_map = NULL;
221 switch (mapping_family) {
223 ret = libopus_check_max_channels(avctx, 8);
225 ret = libopus_check_vorbis_layout(avctx, mapping_family);
226 /* Channels do not need to be reordered. */
231 ret = libopus_check_max_channels(avctx, 2);
233 ret = libopus_check_vorbis_layout(avctx, mapping_family);
237 /* Opus expects channels to be in Vorbis order. */
238 ret = libopus_check_max_channels(avctx, 8);
240 ret = libopus_check_vorbis_layout(avctx, mapping_family);
241 channel_map = ff_vorbis_channel_layout_offsets[avctx->ch_layout.nb_channels - 1];
245 ret = libopus_check_max_channels(avctx, 254);
248 av_log(avctx, AV_LOG_WARNING,
249 "Unknown channel mapping family %d. Output channel layout may be invalid.\n",
254 *channel_map_result = channel_map;
258 static av_cold int libopus_encode_init(AVCodecContext *avctx)
260 LibopusEncContext *opus = avctx->priv_data;
262 uint8_t libopus_channel_mapping[255];
264 int channels = avctx->ch_layout.nb_channels;
266 int coupled_stream_count, header_size, frame_size;
269 frame_size = opus->opts.frame_duration * 48000 / 1000;
270 switch (frame_size) {
273 if (opus->opts.application != OPUS_APPLICATION_RESTRICTED_LOWDELAY)
274 av_log(avctx, AV_LOG_WARNING,
275 "LPC mode cannot be used with a frame duration of less "
276 "than 10ms. Enabling restricted low-delay mode.\n"
277 "Use a longer frame duration if this is not what you want.\n");
278 /* Frame sizes less than 10 ms can only use MDCT mode, so switching to
279 * RESTRICTED_LOWDELAY avoids an unnecessary extra 2.5ms lookahead. */
280 opus->opts.application = OPUS_APPLICATION_RESTRICTED_LOWDELAY;
285 #ifdef OPUS_FRAMESIZE_120_MS
290 opus->opts.packet_size =
291 avctx->frame_size = frame_size * avctx->sample_rate / 48000;
294 av_log(avctx, AV_LOG_ERROR, "Invalid frame duration: %g.\n"
295 "Frame duration must be exactly one of: 2.5, 5, 10, 20, 40"
296 #ifdef OPUS_FRAMESIZE_120_MS
297 ", 60, 80, 100 or 120.\n",
301 opus->opts.frame_duration);
302 return AVERROR(EINVAL);
305 if (avctx->compression_level < 0 || avctx->compression_level > 10) {
306 av_log(avctx, AV_LOG_WARNING,
307 "Compression level must be in the range 0 to 10. "
308 "Defaulting to 10.\n");
309 opus->opts.complexity = 10;
311 opus->opts.complexity = avctx->compression_level;
315 switch (avctx->cutoff) {
317 opus->opts.max_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
320 opus->opts.max_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
323 opus->opts.max_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
326 opus->opts.max_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
329 opus->opts.max_bandwidth = OPUS_BANDWIDTH_FULLBAND;
332 av_log(avctx, AV_LOG_WARNING,
333 "Invalid frequency cutoff: %d. Using default maximum bandwidth.\n"
334 "Cutoff frequency must be exactly one of: 4000, 6000, 8000, 12000 or 20000.\n",
340 /* Channels may need to be reordered to match opus mapping. */
341 av_ret = libopus_validate_layout_and_get_channel_map(avctx, opus->opts.mapping_family,
342 &opus->encoder_channel_map);
347 if (opus->opts.mapping_family == -1) {
348 /* By default, use mapping family 1 for the header but use the older
349 * libopus multistream API to avoid surround masking. */
351 /* Set the mapping family so that the value is correct in the header */
352 mapping_family = channels > 2 ? 1 : 0;
353 coupled_stream_count = opus_coupled_streams[channels - 1];
354 opus->stream_count = channels - coupled_stream_count;
355 memcpy(libopus_channel_mapping,
356 opus_vorbis_channel_map[channels - 1],
357 channels * sizeof(*libopus_channel_mapping));
359 enc = opus_multistream_encoder_create(
360 avctx->sample_rate, channels, opus->stream_count,
361 coupled_stream_count,
362 libavcodec_libopus_channel_map[channels - 1],
363 opus->opts.application, &ret);
365 /* Use the newer multistream API. The encoder will set the channel
366 * mapping and coupled stream counts to its internal defaults and will
367 * use surround masking analysis to save bits. */
368 mapping_family = opus->opts.mapping_family;
369 enc = opus_multistream_surround_encoder_create(
370 avctx->sample_rate, channels, mapping_family,
371 &opus->stream_count, &coupled_stream_count, libopus_channel_mapping,
372 opus->opts.application, &ret);
375 if (ret != OPUS_OK) {
376 av_log(avctx, AV_LOG_ERROR,
377 "Failed to create encoder: %s\n", opus_strerror(ret));
378 return ff_opus_error_to_averror(ret);
381 if (!avctx->bit_rate) {
382 /* Sane default copied from opusenc */
383 avctx->bit_rate = 64000 * opus->stream_count +
384 32000 * coupled_stream_count;
385 av_log(avctx, AV_LOG_WARNING,
386 "No bit rate set. Defaulting to %"PRId64" bps.\n", avctx->bit_rate);
389 if (avctx->bit_rate < 500 || avctx->bit_rate > 256000 * channels) {
390 av_log(avctx, AV_LOG_ERROR, "The bit rate %"PRId64" bps is unsupported. "
391 "Please choose a value between 500 and %d.\n", avctx->bit_rate,
393 ret = AVERROR(EINVAL);
397 ret = libopus_configure_encoder(avctx, enc, &opus->opts);
398 if (ret != OPUS_OK) {
399 ret = ff_opus_error_to_averror(ret);
403 /* Header includes channel mapping table if and only if mapping family is NOT 0 */
404 header_size = 19 + (mapping_family == 0 ? 0 : 2 + channels);
405 avctx->extradata = av_malloc(header_size + AV_INPUT_BUFFER_PADDING_SIZE);
406 if (!avctx->extradata) {
407 av_log(avctx, AV_LOG_ERROR, "Failed to allocate extradata.\n");
408 ret = AVERROR(ENOMEM);
411 avctx->extradata_size = header_size;
413 opus->samples = av_calloc(frame_size, channels *
414 av_get_bytes_per_sample(avctx->sample_fmt));
415 if (!opus->samples) {
416 av_log(avctx, AV_LOG_ERROR, "Failed to allocate samples buffer.\n");
417 ret = AVERROR(ENOMEM);
421 ret = opus_multistream_encoder_ctl(enc, OPUS_GET_LOOKAHEAD(&avctx->initial_padding));
423 av_log(avctx, AV_LOG_WARNING,
424 "Unable to get number of lookahead samples: %s\n",
427 libopus_write_header(avctx, opus->stream_count, coupled_stream_count,
428 mapping_family, libopus_channel_mapping);
430 ff_af_queue_init(avctx, &opus->afq);
437 opus_multistream_encoder_destroy(enc);
441 static void libopus_copy_samples_with_channel_map(
442 uint8_t *dst, const uint8_t *src, const uint8_t *channel_map,
443 int nb_channels, int nb_samples, int bytes_per_sample) {
445 for (sample = 0; sample < nb_samples; ++sample) {
446 for (channel = 0; channel < nb_channels; ++channel) {
447 const size_t src_pos = bytes_per_sample * (nb_channels * sample + channel);
448 const size_t dst_pos = bytes_per_sample * (nb_channels * sample + channel_map[channel]);
450 memcpy(&dst[dst_pos], &src[src_pos], bytes_per_sample);
455 static int libopus_encode(AVCodecContext *avctx, AVPacket *avpkt,
456 const AVFrame *frame, int *got_packet_ptr)
458 LibopusEncContext *opus = avctx->priv_data;
459 const int bytes_per_sample = av_get_bytes_per_sample(avctx->sample_fmt);
460 const int channels = avctx->ch_layout.nb_channels;
461 const int sample_size = channels * bytes_per_sample;
462 const uint8_t *audio;
467 ret = ff_af_queue_add(&opus->afq, frame);
470 if (opus->encoder_channel_map != NULL) {
471 audio = opus->samples;
472 libopus_copy_samples_with_channel_map(
473 opus->samples, frame->data[0], opus->encoder_channel_map,
474 channels, frame->nb_samples, bytes_per_sample);
475 } else if (frame->nb_samples < opus->opts.packet_size) {
476 audio = opus->samples;
477 memcpy(opus->samples, frame->data[0], frame->nb_samples * sample_size);
479 audio = frame->data[0];
481 if (!opus->afq.remaining_samples || (!opus->afq.frame_alloc && !opus->afq.frame_count))
483 audio = opus->samples;
484 memset(opus->samples, 0, opus->opts.packet_size * sample_size);
487 /* Maximum packet size taken from opusenc in opus-tools. 120ms packets
488 * consist of 6 frames in one packet. The maximum frame size is 1275
489 * bytes along with the largest possible packet header of 7 bytes. */
490 if ((ret = ff_alloc_packet(avctx, avpkt, (1275 * 6 + 7) * opus->stream_count)) < 0)
493 if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT)
494 ret = opus_multistream_encode_float(opus->enc, (const float *)audio,
495 opus->opts.packet_size,
496 avpkt->data, avpkt->size);
498 ret = opus_multistream_encode(opus->enc, (const opus_int16 *)audio,
499 opus->opts.packet_size,
500 avpkt->data, avpkt->size);
503 av_log(avctx, AV_LOG_ERROR,
504 "Error encoding frame: %s\n", opus_strerror(ret));
505 return ff_opus_error_to_averror(ret);
508 av_shrink_packet(avpkt, ret);
510 ff_af_queue_remove(&opus->afq, opus->opts.packet_size,
511 &avpkt->pts, &avpkt->duration);
513 discard_padding = opus->opts.packet_size - avpkt->duration;
514 // Check if subtraction resulted in an overflow
515 if ((discard_padding < opus->opts.packet_size) != (avpkt->duration > 0))
516 return AVERROR(EINVAL);
517 if (discard_padding > 0) {
518 uint8_t* side_data = av_packet_new_side_data(avpkt,
519 AV_PKT_DATA_SKIP_SAMPLES,
522 return AVERROR(ENOMEM);
523 AV_WL32(side_data + 4, discard_padding);
531 static av_cold int libopus_encode_close(AVCodecContext *avctx)
533 LibopusEncContext *opus = avctx->priv_data;
535 opus_multistream_encoder_destroy(opus->enc);
537 ff_af_queue_close(&opus->afq);
539 av_freep(&opus->samples);
544 #define OFFSET(x) offsetof(LibopusEncContext, opts.x)
545 #define FLAGS AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
546 static const AVOption libopus_options[] = {
547 { "application", "Intended application type", OFFSET(application), AV_OPT_TYPE_INT, { .i64 = OPUS_APPLICATION_AUDIO }, OPUS_APPLICATION_VOIP, OPUS_APPLICATION_RESTRICTED_LOWDELAY, FLAGS, "application" },
548 { "voip", "Favor improved speech intelligibility", 0, AV_OPT_TYPE_CONST, { .i64 = OPUS_APPLICATION_VOIP }, 0, 0, FLAGS, "application" },
549 { "audio", "Favor faithfulness to the input", 0, AV_OPT_TYPE_CONST, { .i64 = OPUS_APPLICATION_AUDIO }, 0, 0, FLAGS, "application" },
550 { "lowdelay", "Restrict to only the lowest delay modes, disable voice-optimized modes", 0, AV_OPT_TYPE_CONST, { .i64 = OPUS_APPLICATION_RESTRICTED_LOWDELAY }, 0, 0, FLAGS, "application" },
551 { "frame_duration", "Duration of a frame in milliseconds", OFFSET(frame_duration), AV_OPT_TYPE_FLOAT, { .dbl = 20.0 }, 2.5, 120.0, FLAGS },
552 { "packet_loss", "Expected packet loss percentage", OFFSET(packet_loss), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 100, FLAGS },
553 { "fec", "Enable inband FEC. Expected packet loss must be non-zero", OFFSET(fec), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
554 { "vbr", "Variable bit rate mode", OFFSET(vbr), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 2, FLAGS, "vbr" },
555 { "off", "Use constant bit rate", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "vbr" },
556 { "on", "Use variable bit rate", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "vbr" },
557 { "constrained", "Use constrained VBR", 0, AV_OPT_TYPE_CONST, { .i64 = 2 }, 0, 0, FLAGS, "vbr" },
558 { "mapping_family", "Channel Mapping Family", OFFSET(mapping_family), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 255, FLAGS, "mapping_family" },
559 #ifdef OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST
560 { "apply_phase_inv", "Apply intensity stereo phase inversion", OFFSET(apply_phase_inv), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS },
565 static const AVClass libopus_class = {
566 .class_name = "libopus",
567 .item_name = av_default_item_name,
568 .option = libopus_options,
569 .version = LIBAVUTIL_VERSION_INT,
572 static const FFCodecDefault libopus_defaults[] = {
574 { "compression_level", "10" },
578 static const int libopus_sample_rates[] = {
579 48000, 24000, 16000, 12000, 8000, 0,
582 const FFCodec ff_libopus_encoder = {
584 CODEC_LONG_NAME("libopus Opus"),
585 .p.type = AVMEDIA_TYPE_AUDIO,
586 .p.id = AV_CODEC_ID_OPUS,
587 .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
588 AV_CODEC_CAP_SMALL_LAST_FRAME,
589 .caps_internal = FF_CODEC_CAP_NOT_INIT_THREADSAFE,
590 .priv_data_size = sizeof(LibopusEncContext),
591 .init = libopus_encode_init,
592 FF_CODEC_ENCODE_CB(libopus_encode),
593 .close = libopus_encode_close,
594 .p.sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
596 AV_SAMPLE_FMT_NONE },
597 .p.supported_samplerates = libopus_sample_rates,
598 .p.priv_class = &libopus_class,
599 .defaults = libopus_defaults,
600 .p.wrapper_name = "libopus",