2 * Copyright (C) 2016 Sebastian Dröge <sebastian@centricular.com>
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Library General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Library General Public License for more details.
14 * You should have received a copy of the GNU Library General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
17 * Boston, MA 02110-1301, USA.
24 #include "gstfdkaac.h"
25 #include "gstfdkaacenc.h"
27 #include <gst/pbutils/pbutils.h>
32 * - Add support for other AOT / profiles
33 * - Expose more properties, e.g. vbr
34 * - Signal encoder delay
35 * - LOAS / LATM support
45 #define DEFAULT_BITRATE (0)
47 #define SAMPLE_RATES " 8000, " \
60 static GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE ("sink",
63 GST_STATIC_CAPS ("audio/x-raw, "
64 "format = (string) " GST_AUDIO_NE (S16) ", "
65 "layout = (string) interleaved, "
66 "rate = (int) { " SAMPLE_RATES " }, "
67 "channels = (int) {1, 2, 3, 4, 5, 6, 8}")
70 static GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE ("src",
73 GST_STATIC_CAPS ("audio/mpeg, "
74 "mpegversion = (int) 4, "
75 "rate = (int) { " SAMPLE_RATES " }, "
76 "channels = (int) {1, 2, 3, 4, 5, 6, 8}, "
77 "stream-format = (string) { adts, adif, raw }, "
78 "profile = (string) { lc, he-aac-v1, he-aac-v2, ld }, "
79 "framed = (boolean) true")
82 GST_DEBUG_CATEGORY_STATIC (gst_fdkaacenc_debug);
83 #define GST_CAT_DEFAULT gst_fdkaacenc_debug
85 static void gst_fdkaacenc_set_property (GObject * object, guint prop_id,
86 const GValue * value, GParamSpec * pspec);
87 static void gst_fdkaacenc_get_property (GObject * object, guint prop_id,
88 GValue * value, GParamSpec * pspec);
89 static gboolean gst_fdkaacenc_start (GstAudioEncoder * enc);
90 static gboolean gst_fdkaacenc_stop (GstAudioEncoder * enc);
91 static gboolean gst_fdkaacenc_set_format (GstAudioEncoder * enc,
93 static GstFlowReturn gst_fdkaacenc_handle_frame (GstAudioEncoder * enc,
95 static GstCaps *gst_fdkaacenc_get_caps (GstAudioEncoder * enc,
97 static void gst_fdkaacenc_flush (GstAudioEncoder * enc);
99 G_DEFINE_TYPE (GstFdkAacEnc, gst_fdkaacenc, GST_TYPE_AUDIO_ENCODER);
100 GST_ELEMENT_REGISTER_DEFINE (fdkaacenc, "fdkaacenc", GST_RANK_PRIMARY,
104 gst_fdkaacenc_set_property (GObject * object, guint prop_id,
105 const GValue * value, GParamSpec * pspec)
107 GstFdkAacEnc *self = GST_FDKAACENC (object);
111 self->bitrate = g_value_get_int (value);
113 case PROP_AFTERBURNER:
114 self->afterburner = g_value_get_boolean (value);
117 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
124 gst_fdkaacenc_get_property (GObject * object, guint prop_id,
125 GValue * value, GParamSpec * pspec)
127 GstFdkAacEnc *self = GST_FDKAACENC (object);
131 g_value_set_int (value, self->bitrate);
133 case PROP_AFTERBURNER:
134 g_value_set_boolean (value, self->afterburner);
137 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
144 gst_fdkaacenc_start (GstAudioEncoder * enc)
146 GstFdkAacEnc *self = GST_FDKAACENC (enc);
148 GST_DEBUG_OBJECT (self, "start");
154 gst_fdkaacenc_stop (GstAudioEncoder * enc)
156 GstFdkAacEnc *self = GST_FDKAACENC (enc);
158 GST_DEBUG_OBJECT (self, "stop");
161 aacEncClose (&self->enc);
165 self->is_drained = TRUE;
170 gst_fdkaacenc_get_caps (GstAudioEncoder * enc, GstCaps * filter)
172 const GstFdkAacChannelLayout *layout;
173 GstCaps *res, *caps, *allowed_caps;
174 gboolean allow_mono = TRUE;
176 allowed_caps = gst_pad_get_allowed_caps (GST_AUDIO_ENCODER_SRC_PAD (enc));
177 GST_DEBUG_OBJECT (enc, "allowed caps %" GST_PTR_FORMAT, allowed_caps);
179 /* We need at least 2 channels if Parametric Stereo is in use. */
180 if (allowed_caps && gst_caps_get_size (allowed_caps) > 0) {
181 GstStructure *s = gst_caps_get_structure (allowed_caps, 0);
182 const gchar *profile = NULL;
184 if ((profile = gst_structure_get_string (s, "profile"))
185 && strcmp (profile, "he-aac-v2") == 0) {
189 gst_clear_caps (&allowed_caps);
191 caps = gst_caps_new_empty ();
193 for (layout = channel_layouts; layout->channels; layout++) {
195 gint channels = layout->channels;
197 if (channels == 1 && !allow_mono)
200 tmp = gst_caps_make_writable (gst_pad_get_pad_template_caps
201 (GST_AUDIO_ENCODER_SINK_PAD (enc)));
204 gst_caps_set_simple (tmp, "channels", G_TYPE_INT, channels, NULL);
206 guint64 channel_mask;
207 gst_audio_channel_positions_to_mask (layout->positions, channels, FALSE,
209 gst_caps_set_simple (tmp, "channels", G_TYPE_INT, channels,
210 "channel-mask", GST_TYPE_BITMASK, channel_mask, NULL);
213 gst_caps_append (caps, tmp);
216 res = gst_audio_encoder_proxy_getcaps (enc, caps, filter);
217 gst_caps_unref (caps);
223 gst_fdkaacenc_set_format (GstAudioEncoder * enc, GstAudioInfo * info)
225 GstFdkAacEnc *self = GST_FDKAACENC (enc);
226 gboolean ret = FALSE;
227 GstCaps *allowed_caps;
231 gint mpegversion = 4;
232 gint aot = AOT_AAC_LC;
233 const gchar *profile_str = "lc";
234 CHANNEL_MODE channel_mode;
235 AACENC_InfoStruct enc_info = { 0 };
236 gint bitrate, signaling_mode;
238 if (self->enc && !self->is_drained) {
240 gst_fdkaacenc_handle_frame (enc, NULL);
241 aacEncClose (&self->enc);
242 self->is_drained = TRUE;
245 allowed_caps = gst_pad_get_allowed_caps (GST_AUDIO_ENCODER_SRC_PAD (self));
247 GST_DEBUG_OBJECT (self, "allowed caps: %" GST_PTR_FORMAT, allowed_caps);
249 if (allowed_caps && gst_caps_get_size (allowed_caps) > 0) {
250 GstStructure *s = gst_caps_get_structure (allowed_caps, 0);
251 const gchar *str = NULL;
253 if ((str = gst_structure_get_string (s, "stream-format"))) {
254 if (strcmp (str, "adts") == 0) {
255 GST_DEBUG_OBJECT (self, "use ADTS format for output");
257 } else if (strcmp (str, "adif") == 0) {
258 GST_DEBUG_OBJECT (self, "use ADIF format for output");
260 } else if (strcmp (str, "raw") == 0) {
261 GST_DEBUG_OBJECT (self, "use RAW format for output");
266 if ((str = gst_structure_get_string (s, "profile"))) {
267 if (strcmp (str, "lc") == 0) {
268 GST_DEBUG_OBJECT (self, "using AAC-LC profile for output");
271 } else if (strcmp (str, "he-aac-v1") == 0) {
272 GST_DEBUG_OBJECT (self, "using SBR (HE-AACv1) profile for output");
274 profile_str = "he-aac-v1";
275 } else if (strcmp (str, "he-aac-v2") == 0) {
276 GST_DEBUG_OBJECT (self, "using PS (HE-AACv2) profile for output");
278 profile_str = "he-aac-v2";
279 } else if (strcmp (str, "ld") == 0) {
280 GST_DEBUG_OBJECT (self, "using AAC-LD profile for output");
286 gst_structure_get_int (s, "mpegversion", &mpegversion);
289 gst_caps_unref (allowed_caps);
291 err = aacEncOpen (&self->enc, 0, GST_AUDIO_INFO_CHANNELS (info));
292 if (err != AACENC_OK) {
293 GST_ERROR_OBJECT (self, "Unable to open encoder: %d", err);
297 if ((err = aacEncoder_SetParam (self->enc, AACENC_AOT, aot)) != AACENC_OK) {
298 GST_ERROR_OBJECT (self, "Unable to set AOT %d: %d", aot, err);
302 /* Use explicit hierarchical signaling (2) with raw output stream-format
303 * and implicit signaling (0) with ADTS/ADIF */
309 if ((err = aacEncoder_SetParam (self->enc, AACENC_SIGNALING_MODE,
310 signaling_mode)) != AACENC_OK) {
311 GST_ERROR_OBJECT (self, "Unable to set signaling mode %d: %d",
312 signaling_mode, err);
316 if ((err = aacEncoder_SetParam (self->enc, AACENC_SAMPLERATE,
317 GST_AUDIO_INFO_RATE (info))) != AACENC_OK) {
318 GST_ERROR_OBJECT (self, "Unable to set sample rate %d: %d",
319 GST_AUDIO_INFO_RATE (info), err);
323 if (GST_AUDIO_INFO_CHANNELS (info) == 1) {
324 channel_mode = MODE_1;
325 self->need_reorder = FALSE;
326 self->aac_positions = NULL;
328 gint in_channels = GST_AUDIO_INFO_CHANNELS (info);
329 const GstAudioChannelPosition *in_positions =
330 &GST_AUDIO_INFO_POSITION (info, 0);
331 guint64 in_channel_mask;
332 const GstFdkAacChannelLayout *layout;
334 gst_audio_channel_positions_to_mask (in_positions, in_channels, FALSE,
337 for (layout = channel_layouts; layout->channels; layout++) {
338 gint channels = layout->channels;
339 const GstAudioChannelPosition *positions = layout->positions;
340 guint64 channel_mask;
342 if (channels != in_channels)
345 gst_audio_channel_positions_to_mask (positions, channels, FALSE,
347 if (channel_mask != in_channel_mask)
350 channel_mode = layout->mode;
351 self->need_reorder = memcmp (positions, in_positions,
352 channels * sizeof *positions) != 0;
353 self->aac_positions = positions;
357 if (!layout->channels) {
358 GST_ERROR_OBJECT (self, "Couldn't find a valid channel layout");
363 if ((err = aacEncoder_SetParam (self->enc, AACENC_CHANNELMODE,
364 channel_mode)) != AACENC_OK) {
365 GST_ERROR_OBJECT (self, "Unable to set channel mode %d: %d", channel_mode,
370 /* MPEG channel order */
371 if ((err = aacEncoder_SetParam (self->enc, AACENC_CHANNELORDER,
373 GST_ERROR_OBJECT (self, "Unable to set channel order %d: %d", channel_mode,
378 bitrate = self->bitrate;
380 * http://wiki.hydrogenaud.io/index.php?title=Fraunhofer_FDK_AAC#Recommended_Sampling_Rate_and_Bitrate_Combinations
383 if (GST_AUDIO_INFO_CHANNELS (info) == 1) {
384 if (GST_AUDIO_INFO_RATE (info) < 16000) {
386 } else if (GST_AUDIO_INFO_RATE (info) == 16000) {
388 } else if (GST_AUDIO_INFO_RATE (info) < 32000) {
390 } else if (GST_AUDIO_INFO_RATE (info) == 32000) {
392 } else if (GST_AUDIO_INFO_RATE (info) <= 44100) {
397 } else if (GST_AUDIO_INFO_CHANNELS (info) == 2) {
398 if (GST_AUDIO_INFO_RATE (info) < 16000) {
400 } else if (GST_AUDIO_INFO_RATE (info) == 16000) {
402 } else if (GST_AUDIO_INFO_RATE (info) < 22050) {
404 } else if (GST_AUDIO_INFO_RATE (info) < 32000) {
406 } else if (GST_AUDIO_INFO_RATE (info) == 32000) {
408 } else if (GST_AUDIO_INFO_RATE (info) <= 44100) {
415 if (GST_AUDIO_INFO_RATE (info) < 32000) {
417 } else if (GST_AUDIO_INFO_RATE (info) <= 44100) {
425 if ((err = aacEncoder_SetParam (self->enc, AACENC_TRANSMUX,
426 transmux)) != AACENC_OK) {
427 GST_ERROR_OBJECT (self, "Unable to set transmux %d: %d", transmux, err);
431 if ((err = aacEncoder_SetParam (self->enc, AACENC_BITRATE,
432 bitrate)) != AACENC_OK) {
433 GST_ERROR_OBJECT (self, "Unable to set bitrate %d: %d", bitrate, err);
437 if (self->afterburner) {
439 aacEncoder_SetParam (self->enc, AACENC_AFTERBURNER,
441 GST_ERROR_OBJECT (self, "Could not enable afterburner: %d", err);
445 GST_INFO_OBJECT (self, "Afterburner enabled");
447 if ((err = aacEncEncode (self->enc, NULL, NULL, NULL, NULL)) != AACENC_OK) {
448 GST_ERROR_OBJECT (self, "Unable to initialize encoder: %d", err);
452 if ((err = aacEncInfo (self->enc, &enc_info)) != AACENC_OK) {
453 GST_ERROR_OBJECT (self, "Unable to get encoder info: %d", err);
457 gst_audio_encoder_set_frame_max (enc, 1);
458 gst_audio_encoder_set_frame_samples_min (enc, enc_info.frameLength);
459 gst_audio_encoder_set_frame_samples_max (enc, enc_info.frameLength);
460 gst_audio_encoder_set_hard_min (enc, FALSE);
461 self->outbuf_size = enc_info.maxOutBufBytes;
462 self->samples_per_frame = enc_info.frameLength;
464 src_caps = gst_caps_new_simple ("audio/mpeg",
465 "mpegversion", G_TYPE_INT, mpegversion,
466 "channels", G_TYPE_INT, GST_AUDIO_INFO_CHANNELS (info),
467 "framed", G_TYPE_BOOLEAN, TRUE,
468 "rate", G_TYPE_INT, GST_AUDIO_INFO_RATE (info), NULL);
472 GstBuffer *codec_data =
473 gst_buffer_new_memdup (enc_info.confBuf, enc_info.confSize);
474 gst_caps_set_simple (src_caps, "codec_data", GST_TYPE_BUFFER, codec_data,
475 "stream-format", G_TYPE_STRING, "raw", NULL);
476 gst_buffer_unref (codec_data);
477 } else if (transmux == 1) {
478 gst_caps_set_simple (src_caps, "stream-format", G_TYPE_STRING, "adif",
480 } else if (transmux == 2) {
481 gst_caps_set_simple (src_caps, "stream-format", G_TYPE_STRING, "adts",
484 g_assert_not_reached ();
487 gst_codec_utils_aac_caps_set_level_and_profile (src_caps, enc_info.confBuf,
490 /* The above only parses the "base" profile, which is always going to be LC.
491 * Set actual profile. */
492 gst_caps_set_simple (src_caps, "profile", G_TYPE_STRING, profile_str, NULL);
494 /* An AAC-LC-only decoder will not decode a stream that uses explicit
495 * hierarchical signaling */
496 if (signaling_mode == 2 && aot != AOT_AAC_LC) {
497 gst_structure_remove_field (gst_caps_get_structure (src_caps, 0),
501 ret = gst_audio_encoder_set_output_format (enc, src_caps);
502 gst_caps_unref (src_caps);
508 gst_fdkaacenc_handle_frame (GstAudioEncoder * enc, GstBuffer * inbuf)
510 GstFdkAacEnc *self = GST_FDKAACENC (enc);
511 GstFlowReturn ret = GST_FLOW_OK;
513 GstMapInfo imap, omap;
515 AACENC_BufDesc in_desc = { 0 };
516 AACENC_BufDesc out_desc = { 0 };
517 AACENC_InArgs in_args = { 0 };
518 AACENC_OutArgs out_args = { 0 };
519 gint in_id = IN_AUDIO_DATA, out_id = OUT_BITSTREAM_DATA;
520 gint in_sizes, out_sizes;
521 gint in_el_sizes, out_el_sizes;
524 info = gst_audio_encoder_get_audio_info (enc);
527 if (self->need_reorder) {
528 inbuf = gst_buffer_copy (inbuf);
529 gst_buffer_map (inbuf, &imap, GST_MAP_READWRITE);
530 gst_audio_reorder_channels (imap.data, imap.size,
531 GST_AUDIO_INFO_FORMAT (info), GST_AUDIO_INFO_CHANNELS (info),
532 &GST_AUDIO_INFO_POSITION (info, 0), self->aac_positions);
534 gst_buffer_map (inbuf, &imap, GST_MAP_READ);
537 in_args.numInSamples = imap.size / GST_AUDIO_INFO_BPS (info);
539 in_sizes = imap.size;
540 in_el_sizes = GST_AUDIO_INFO_BPS (info);
543 in_args.numInSamples = -1;
549 /* We unset is_drained even if there's no inbuf. Basically this is a
550 * workaround for aacEncEncode always producing 1024 bytes even without any
551 * input, thus messing up with the base class counting */
552 self->is_drained = FALSE;
554 in_desc.bufferIdentifiers = &in_id;
555 in_desc.bufs = (void *) &imap.data;
556 in_desc.bufSizes = &in_sizes;
557 in_desc.bufElSizes = &in_el_sizes;
559 outbuf = gst_audio_encoder_allocate_output_buffer (enc, self->outbuf_size);
561 ret = GST_FLOW_ERROR;
565 gst_buffer_map (outbuf, &omap, GST_MAP_WRITE);
566 out_sizes = omap.size;
568 out_desc.bufferIdentifiers = &out_id;
569 out_desc.numBufs = 1;
570 out_desc.bufs = (void *) &omap.data;
571 out_desc.bufSizes = &out_sizes;
572 out_desc.bufElSizes = &out_el_sizes;
574 err = aacEncEncode (self->enc, &in_desc, &out_desc, &in_args, &out_args);
575 if (err == AACENC_ENCODE_EOF && !inbuf)
577 else if (err != AACENC_OK) {
578 GST_ERROR_OBJECT (self, "Failed to encode data: %d", err);
579 ret = GST_FLOW_ERROR;
584 gst_buffer_unmap (inbuf, &imap);
585 if (self->need_reorder)
586 gst_buffer_unref (inbuf);
590 if (!out_args.numOutBytes)
593 gst_buffer_unmap (outbuf, &omap);
594 gst_buffer_set_size (outbuf, out_args.numOutBytes);
596 ret = gst_audio_encoder_finish_frame (enc, outbuf, self->samples_per_frame);
601 gst_buffer_unmap (outbuf, &omap);
602 gst_buffer_unref (outbuf);
605 gst_buffer_unmap (inbuf, &imap);
606 if (self->need_reorder)
607 gst_buffer_unref (inbuf);
614 gst_fdkaacenc_flush (GstAudioEncoder * enc)
616 GstFdkAacEnc *self = GST_FDKAACENC (enc);
617 GstAudioInfo *info = gst_audio_encoder_get_audio_info (enc);
619 aacEncClose (&self->enc);
621 self->is_drained = TRUE;
623 if (GST_AUDIO_INFO_IS_VALID (info))
624 gst_fdkaacenc_set_format (enc, info);
628 gst_fdkaacenc_init (GstFdkAacEnc * self)
630 self->bitrate = DEFAULT_BITRATE;
632 self->is_drained = TRUE;
633 self->afterburner = FALSE;
635 gst_audio_encoder_set_drainable (GST_AUDIO_ENCODER (self), TRUE);
639 gst_fdkaacenc_class_init (GstFdkAacEncClass * klass)
641 GObjectClass *object_class = G_OBJECT_CLASS (klass);
642 GstElementClass *element_class = GST_ELEMENT_CLASS (klass);
643 GstAudioEncoderClass *base_class = GST_AUDIO_ENCODER_CLASS (klass);
645 object_class->set_property = GST_DEBUG_FUNCPTR (gst_fdkaacenc_set_property);
646 object_class->get_property = GST_DEBUG_FUNCPTR (gst_fdkaacenc_get_property);
648 base_class->start = GST_DEBUG_FUNCPTR (gst_fdkaacenc_start);
649 base_class->stop = GST_DEBUG_FUNCPTR (gst_fdkaacenc_stop);
650 base_class->set_format = GST_DEBUG_FUNCPTR (gst_fdkaacenc_set_format);
651 base_class->getcaps = GST_DEBUG_FUNCPTR (gst_fdkaacenc_get_caps);
652 base_class->handle_frame = GST_DEBUG_FUNCPTR (gst_fdkaacenc_handle_frame);
653 base_class->flush = GST_DEBUG_FUNCPTR (gst_fdkaacenc_flush);
655 g_object_class_install_property (object_class, PROP_BITRATE,
656 g_param_spec_int ("bitrate",
658 "Target Audio Bitrate (0 = fixed value based on "
659 " sample rate and channel count)",
660 0, G_MAXINT, DEFAULT_BITRATE,
661 G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
664 * GstFdkAacEnc:afterburner:
666 * Afterburner - Quality Parameter.
670 g_object_class_install_property (object_class, PROP_AFTERBURNER,
671 g_param_spec_boolean ("afterburner", "Afterburner - Quality Parameter",
672 "Additional quality control parameter. Can cause workload increase.",
673 FALSE, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
674 gst_element_class_add_static_pad_template (element_class, &sink_template);
675 gst_element_class_add_static_pad_template (element_class, &src_template);
677 gst_element_class_set_static_metadata (element_class, "FDK AAC audio encoder",
678 "Codec/Encoder/Audio/Converter", "FDK AAC audio encoder",
679 "Sebastian Dröge <sebastian@centricular.com>");
681 GST_DEBUG_CATEGORY_INIT (gst_fdkaacenc_debug, "fdkaacenc", 0,