2 * Copyright (C) 2016 Sebastian Dröge <sebastian@centricular.com>
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Library General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Library General Public License for more details.
14 * You should have received a copy of the GNU Library General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
17 * Boston, MA 02110-1301, USA.
24 #include "gstfdkaac.h"
25 #include "gstfdkaacenc.h"
27 #include <gst/pbutils/pbutils.h>
32 * - Add support for other AOT / profiles
33 * - Expose more properties, e.g. afterburner and vbr
34 * - Signal encoder delay
35 * - LOAS / LATM support
44 #define DEFAULT_BITRATE (0)
46 #define SAMPLE_RATES " 8000, " \
59 static GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE ("sink",
62 GST_STATIC_CAPS ("audio/x-raw, "
63 "format = (string) " GST_AUDIO_NE (S16) ", "
64 "layout = (string) interleaved, "
65 "rate = (int) { " SAMPLE_RATES " }, "
66 "channels = (int) {1, 2, 3, 4, 5, 6, 8}")
69 static GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE ("src",
72 GST_STATIC_CAPS ("audio/mpeg, "
73 "mpegversion = (int) 4, "
74 "rate = (int) { " SAMPLE_RATES " }, "
75 "channels = (int) {1, 2, 3, 4, 5, 6, 8}, "
76 "stream-format = (string) { adts, adif, raw }, "
77 "profile = (string) { lc, sbr, ps, ld }, " "framed = (boolean) true")
80 GST_DEBUG_CATEGORY_STATIC (gst_fdkaacenc_debug);
81 #define GST_CAT_DEFAULT gst_fdkaacenc_debug
83 static void gst_fdkaacenc_set_property (GObject * object, guint prop_id,
84 const GValue * value, GParamSpec * pspec);
85 static void gst_fdkaacenc_get_property (GObject * object, guint prop_id,
86 GValue * value, GParamSpec * pspec);
87 static gboolean gst_fdkaacenc_start (GstAudioEncoder * enc);
88 static gboolean gst_fdkaacenc_stop (GstAudioEncoder * enc);
89 static gboolean gst_fdkaacenc_set_format (GstAudioEncoder * enc,
91 static GstFlowReturn gst_fdkaacenc_handle_frame (GstAudioEncoder * enc,
93 static GstCaps *gst_fdkaacenc_get_caps (GstAudioEncoder * enc,
95 static void gst_fdkaacenc_flush (GstAudioEncoder * enc);
97 G_DEFINE_TYPE (GstFdkAacEnc, gst_fdkaacenc, GST_TYPE_AUDIO_ENCODER);
98 GST_ELEMENT_REGISTER_DEFINE (fdkaacenc, "fdkaacenc", GST_RANK_PRIMARY,
102 gst_fdkaacenc_set_property (GObject * object, guint prop_id,
103 const GValue * value, GParamSpec * pspec)
105 GstFdkAacEnc *self = GST_FDKAACENC (object);
109 self->bitrate = g_value_get_int (value);
112 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
119 gst_fdkaacenc_get_property (GObject * object, guint prop_id,
120 GValue * value, GParamSpec * pspec)
122 GstFdkAacEnc *self = GST_FDKAACENC (object);
126 g_value_set_int (value, self->bitrate);
129 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
136 gst_fdkaacenc_start (GstAudioEncoder * enc)
138 GstFdkAacEnc *self = GST_FDKAACENC (enc);
140 GST_DEBUG_OBJECT (self, "start");
146 gst_fdkaacenc_stop (GstAudioEncoder * enc)
148 GstFdkAacEnc *self = GST_FDKAACENC (enc);
150 GST_DEBUG_OBJECT (self, "stop");
153 aacEncClose (&self->enc);
157 self->is_drained = TRUE;
162 gst_fdkaacenc_get_caps (GstAudioEncoder * enc, GstCaps * filter)
164 const GstFdkAacChannelLayout *layout;
165 GstCaps *res, *caps, *allowed_caps;
166 gboolean allow_mono = TRUE;
168 allowed_caps = gst_pad_get_allowed_caps (GST_AUDIO_ENCODER_SRC_PAD (enc));
169 GST_DEBUG_OBJECT (enc, "allowed caps %" GST_PTR_FORMAT, allowed_caps);
171 /* We need at least 2 channels if Parametric Stereo is in use. */
172 if (allowed_caps && gst_caps_get_size (allowed_caps) > 0) {
173 GstStructure *s = gst_caps_get_structure (allowed_caps, 0);
174 const gchar *profile = NULL;
176 if ((profile = gst_structure_get_string (s, "profile"))
177 && strcmp (profile, "ps") == 0) {
181 gst_clear_caps (&allowed_caps);
183 caps = gst_caps_new_empty ();
185 for (layout = channel_layouts; layout->channels; layout++) {
187 gint channels = layout->channels;
189 if (channels == 1 && !allow_mono)
192 tmp = gst_caps_make_writable (gst_pad_get_pad_template_caps
193 (GST_AUDIO_ENCODER_SINK_PAD (enc)));
196 gst_caps_set_simple (tmp, "channels", G_TYPE_INT, channels, NULL);
198 guint64 channel_mask;
199 gst_audio_channel_positions_to_mask (layout->positions, channels, FALSE,
201 gst_caps_set_simple (tmp, "channels", G_TYPE_INT, channels,
202 "channel-mask", GST_TYPE_BITMASK, channel_mask, NULL);
205 gst_caps_append (caps, tmp);
208 res = gst_audio_encoder_proxy_getcaps (enc, caps, filter);
209 gst_caps_unref (caps);
215 gst_fdkaacenc_set_format (GstAudioEncoder * enc, GstAudioInfo * info)
217 GstFdkAacEnc *self = GST_FDKAACENC (enc);
218 gboolean ret = FALSE;
219 GstCaps *allowed_caps;
222 gint transmux = 0, aot = AOT_AAC_LC;
223 gint mpegversion = 4;
224 CHANNEL_MODE channel_mode;
225 AACENC_InfoStruct enc_info = { 0 };
226 gint bitrate, signaling_mode;
227 const gchar *ext_profile;
229 if (self->enc && !self->is_drained) {
231 gst_fdkaacenc_handle_frame (enc, NULL);
232 aacEncClose (&self->enc);
233 self->is_drained = TRUE;
236 allowed_caps = gst_pad_get_allowed_caps (GST_AUDIO_ENCODER_SRC_PAD (self));
238 GST_DEBUG_OBJECT (self, "allowed caps: %" GST_PTR_FORMAT, allowed_caps);
240 if (allowed_caps && gst_caps_get_size (allowed_caps) > 0) {
241 GstStructure *s = gst_caps_get_structure (allowed_caps, 0);
242 const gchar *str = NULL;
244 if ((str = gst_structure_get_string (s, "stream-format"))) {
245 if (strcmp (str, "adts") == 0) {
246 GST_DEBUG_OBJECT (self, "use ADTS format for output");
248 } else if (strcmp (str, "adif") == 0) {
249 GST_DEBUG_OBJECT (self, "use ADIF format for output");
251 } else if (strcmp (str, "raw") == 0) {
252 GST_DEBUG_OBJECT (self, "use RAW format for output");
257 if ((str = gst_structure_get_string (s, "profile"))) {
258 if (strcmp (str, "lc") == 0) {
259 GST_DEBUG_OBJECT (self, "using AAC-LC profile for output");
261 } else if (strcmp (str, "sbr") == 0) {
262 GST_DEBUG_OBJECT (self, "using SBR (HE-AAC) profile for output");
264 } else if (strcmp (str, "ps") == 0) {
265 GST_DEBUG_OBJECT (self, "using PS (HE-AACv2) profile for output");
267 } else if (strcmp (str, "ld") == 0) {
268 GST_DEBUG_OBJECT (self, "using AAC-LD profile for output");
273 gst_structure_get_int (s, "mpegversion", &mpegversion);
276 gst_caps_unref (allowed_caps);
278 err = aacEncOpen (&self->enc, 0, GST_AUDIO_INFO_CHANNELS (info));
279 if (err != AACENC_OK) {
280 GST_ERROR_OBJECT (self, "Unable to open encoder: %d", err);
284 if ((err = aacEncoder_SetParam (self->enc, AACENC_AOT, aot)) != AACENC_OK) {
285 GST_ERROR_OBJECT (self, "Unable to set AOT %d: %d", aot, err);
289 /* Use explicit hierarchical signaling (2) with raw output stream-format
290 * and implicit signaling (0) with ADTS/ADIF */
296 if ((err = aacEncoder_SetParam (self->enc, AACENC_SIGNALING_MODE,
297 signaling_mode)) != AACENC_OK) {
298 GST_ERROR_OBJECT (self, "Unable to set signaling mode %d: %d",
299 signaling_mode, err);
303 if ((err = aacEncoder_SetParam (self->enc, AACENC_SAMPLERATE,
304 GST_AUDIO_INFO_RATE (info))) != AACENC_OK) {
305 GST_ERROR_OBJECT (self, "Unable to set sample rate %d: %d",
306 GST_AUDIO_INFO_RATE (info), err);
310 if (GST_AUDIO_INFO_CHANNELS (info) == 1) {
311 channel_mode = MODE_1;
312 self->need_reorder = FALSE;
313 self->aac_positions = NULL;
315 gint in_channels = GST_AUDIO_INFO_CHANNELS (info);
316 const GstAudioChannelPosition *in_positions =
317 &GST_AUDIO_INFO_POSITION (info, 0);
318 guint64 in_channel_mask;
319 const GstFdkAacChannelLayout *layout;
321 gst_audio_channel_positions_to_mask (in_positions, in_channels, FALSE,
324 for (layout = channel_layouts; layout->channels; layout++) {
325 gint channels = layout->channels;
326 const GstAudioChannelPosition *positions = layout->positions;
327 guint64 channel_mask;
329 if (channels != in_channels)
332 gst_audio_channel_positions_to_mask (positions, channels, FALSE,
334 if (channel_mask != in_channel_mask)
337 channel_mode = layout->mode;
338 self->need_reorder = memcmp (positions, in_positions,
339 channels * sizeof *positions) != 0;
340 self->aac_positions = positions;
344 if (!layout->channels) {
345 GST_ERROR_OBJECT (self, "Couldn't find a valid channel layout");
350 if ((err = aacEncoder_SetParam (self->enc, AACENC_CHANNELMODE,
351 channel_mode)) != AACENC_OK) {
352 GST_ERROR_OBJECT (self, "Unable to set channel mode %d: %d", channel_mode,
357 /* MPEG channel order */
358 if ((err = aacEncoder_SetParam (self->enc, AACENC_CHANNELORDER,
360 GST_ERROR_OBJECT (self, "Unable to set channel order %d: %d", channel_mode,
365 bitrate = self->bitrate;
367 * http://wiki.hydrogenaud.io/index.php?title=Fraunhofer_FDK_AAC#Recommended_Sampling_Rate_and_Bitrate_Combinations
370 if (GST_AUDIO_INFO_CHANNELS (info) == 1) {
371 if (GST_AUDIO_INFO_RATE (info) < 16000) {
373 } else if (GST_AUDIO_INFO_RATE (info) == 16000) {
375 } else if (GST_AUDIO_INFO_RATE (info) < 32000) {
377 } else if (GST_AUDIO_INFO_RATE (info) == 32000) {
379 } else if (GST_AUDIO_INFO_RATE (info) <= 44100) {
384 } else if (GST_AUDIO_INFO_CHANNELS (info) == 2) {
385 if (GST_AUDIO_INFO_RATE (info) < 16000) {
387 } else if (GST_AUDIO_INFO_RATE (info) == 16000) {
389 } else if (GST_AUDIO_INFO_RATE (info) < 22050) {
391 } else if (GST_AUDIO_INFO_RATE (info) < 32000) {
393 } else if (GST_AUDIO_INFO_RATE (info) == 32000) {
395 } else if (GST_AUDIO_INFO_RATE (info) <= 44100) {
402 if (GST_AUDIO_INFO_RATE (info) < 32000) {
404 } else if (GST_AUDIO_INFO_RATE (info) <= 44100) {
412 if ((err = aacEncoder_SetParam (self->enc, AACENC_TRANSMUX,
413 transmux)) != AACENC_OK) {
414 GST_ERROR_OBJECT (self, "Unable to set transmux %d: %d", transmux, err);
418 if ((err = aacEncoder_SetParam (self->enc, AACENC_BITRATE,
419 bitrate)) != AACENC_OK) {
420 GST_ERROR_OBJECT (self, "Unable to set bitrate %d: %d", bitrate, err);
424 if ((err = aacEncEncode (self->enc, NULL, NULL, NULL, NULL)) != AACENC_OK) {
425 GST_ERROR_OBJECT (self, "Unable to initialize encoder: %d", err);
429 if ((err = aacEncInfo (self->enc, &enc_info)) != AACENC_OK) {
430 GST_ERROR_OBJECT (self, "Unable to get encoder info: %d", err);
434 gst_audio_encoder_set_frame_max (enc, 1);
435 gst_audio_encoder_set_frame_samples_min (enc, enc_info.frameLength);
436 gst_audio_encoder_set_frame_samples_max (enc, enc_info.frameLength);
437 gst_audio_encoder_set_hard_min (enc, FALSE);
438 self->outbuf_size = enc_info.maxOutBufBytes;
439 self->samples_per_frame = enc_info.frameLength;
441 src_caps = gst_caps_new_simple ("audio/mpeg",
442 "mpegversion", G_TYPE_INT, mpegversion,
443 "channels", G_TYPE_INT, GST_AUDIO_INFO_CHANNELS (info),
444 "framed", G_TYPE_BOOLEAN, TRUE,
445 "rate", G_TYPE_INT, GST_AUDIO_INFO_RATE (info), NULL);
449 GstBuffer *codec_data =
450 gst_buffer_new_memdup (enc_info.confBuf, enc_info.confSize);
451 gst_caps_set_simple (src_caps, "codec_data", GST_TYPE_BUFFER, codec_data,
452 "stream-format", G_TYPE_STRING, "raw", NULL);
453 gst_buffer_unref (codec_data);
454 } else if (transmux == 1) {
455 gst_caps_set_simple (src_caps, "stream-format", G_TYPE_STRING, "adif",
457 } else if (transmux == 2) {
458 gst_caps_set_simple (src_caps, "stream-format", G_TYPE_STRING, "adts",
461 g_assert_not_reached ();
464 gst_codec_utils_aac_caps_set_level_and_profile (src_caps, enc_info.confBuf,
467 /* The above only parses the "base" profile, which is always going to be LC.
468 * Let's retrieve the extension AOT and set it as our profile in the caps. */
469 ext_profile = gst_codec_utils_aac_get_extension_profile (enc_info.confBuf,
473 gst_caps_set_simple (src_caps, "profile", G_TYPE_STRING, ext_profile, NULL);
475 ret = gst_audio_encoder_set_output_format (enc, src_caps);
476 gst_caps_unref (src_caps);
482 gst_fdkaacenc_handle_frame (GstAudioEncoder * enc, GstBuffer * inbuf)
484 GstFdkAacEnc *self = GST_FDKAACENC (enc);
485 GstFlowReturn ret = GST_FLOW_OK;
487 GstMapInfo imap, omap;
489 AACENC_BufDesc in_desc = { 0 };
490 AACENC_BufDesc out_desc = { 0 };
491 AACENC_InArgs in_args = { 0 };
492 AACENC_OutArgs out_args = { 0 };
493 gint in_id = IN_AUDIO_DATA, out_id = OUT_BITSTREAM_DATA;
494 gint in_sizes, out_sizes;
495 gint in_el_sizes, out_el_sizes;
498 info = gst_audio_encoder_get_audio_info (enc);
501 if (self->need_reorder) {
502 inbuf = gst_buffer_copy (inbuf);
503 gst_buffer_map (inbuf, &imap, GST_MAP_READWRITE);
504 gst_audio_reorder_channels (imap.data, imap.size,
505 GST_AUDIO_INFO_FORMAT (info), GST_AUDIO_INFO_CHANNELS (info),
506 &GST_AUDIO_INFO_POSITION (info, 0), self->aac_positions);
508 gst_buffer_map (inbuf, &imap, GST_MAP_READ);
511 in_args.numInSamples = imap.size / GST_AUDIO_INFO_BPS (info);
513 in_sizes = imap.size;
514 in_el_sizes = GST_AUDIO_INFO_BPS (info);
517 in_args.numInSamples = -1;
523 /* We unset is_drained even if there's no inbuf. Basically this is a
524 * workaround for aacEncEncode always producing 1024 bytes even without any
525 * input, thus messing up with the base class counting */
526 self->is_drained = FALSE;
528 in_desc.bufferIdentifiers = &in_id;
529 in_desc.bufs = (void *) &imap.data;
530 in_desc.bufSizes = &in_sizes;
531 in_desc.bufElSizes = &in_el_sizes;
533 outbuf = gst_audio_encoder_allocate_output_buffer (enc, self->outbuf_size);
535 ret = GST_FLOW_ERROR;
539 gst_buffer_map (outbuf, &omap, GST_MAP_WRITE);
540 out_sizes = omap.size;
542 out_desc.bufferIdentifiers = &out_id;
543 out_desc.numBufs = 1;
544 out_desc.bufs = (void *) &omap.data;
545 out_desc.bufSizes = &out_sizes;
546 out_desc.bufElSizes = &out_el_sizes;
548 err = aacEncEncode (self->enc, &in_desc, &out_desc, &in_args, &out_args);
549 if (err == AACENC_ENCODE_EOF && !inbuf)
551 else if (err != AACENC_OK) {
552 GST_ERROR_OBJECT (self, "Failed to encode data: %d", err);
553 ret = GST_FLOW_ERROR;
558 gst_buffer_unmap (inbuf, &imap);
559 if (self->need_reorder)
560 gst_buffer_unref (inbuf);
564 if (!out_args.numOutBytes)
567 gst_buffer_unmap (outbuf, &omap);
568 gst_buffer_set_size (outbuf, out_args.numOutBytes);
570 ret = gst_audio_encoder_finish_frame (enc, outbuf, self->samples_per_frame);
575 gst_buffer_unmap (outbuf, &omap);
576 gst_buffer_unref (outbuf);
579 gst_buffer_unmap (inbuf, &imap);
580 if (self->need_reorder)
581 gst_buffer_unref (inbuf);
588 gst_fdkaacenc_flush (GstAudioEncoder * enc)
590 GstFdkAacEnc *self = GST_FDKAACENC (enc);
591 GstAudioInfo *info = gst_audio_encoder_get_audio_info (enc);
593 aacEncClose (&self->enc);
595 self->is_drained = TRUE;
597 if (GST_AUDIO_INFO_IS_VALID (info))
598 gst_fdkaacenc_set_format (enc, info);
602 gst_fdkaacenc_init (GstFdkAacEnc * self)
604 self->bitrate = DEFAULT_BITRATE;
606 self->is_drained = TRUE;
608 gst_audio_encoder_set_drainable (GST_AUDIO_ENCODER (self), TRUE);
612 gst_fdkaacenc_class_init (GstFdkAacEncClass * klass)
614 GObjectClass *object_class = G_OBJECT_CLASS (klass);
615 GstElementClass *element_class = GST_ELEMENT_CLASS (klass);
616 GstAudioEncoderClass *base_class = GST_AUDIO_ENCODER_CLASS (klass);
618 object_class->set_property = GST_DEBUG_FUNCPTR (gst_fdkaacenc_set_property);
619 object_class->get_property = GST_DEBUG_FUNCPTR (gst_fdkaacenc_get_property);
621 base_class->start = GST_DEBUG_FUNCPTR (gst_fdkaacenc_start);
622 base_class->stop = GST_DEBUG_FUNCPTR (gst_fdkaacenc_stop);
623 base_class->set_format = GST_DEBUG_FUNCPTR (gst_fdkaacenc_set_format);
624 base_class->getcaps = GST_DEBUG_FUNCPTR (gst_fdkaacenc_get_caps);
625 base_class->handle_frame = GST_DEBUG_FUNCPTR (gst_fdkaacenc_handle_frame);
626 base_class->flush = GST_DEBUG_FUNCPTR (gst_fdkaacenc_flush);
628 g_object_class_install_property (object_class, PROP_BITRATE,
629 g_param_spec_int ("bitrate",
631 "Target Audio Bitrate (0 = fixed value based on "
632 " sample rate and channel count)",
633 0, G_MAXINT, DEFAULT_BITRATE,
634 G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
636 gst_element_class_add_static_pad_template (element_class, &sink_template);
637 gst_element_class_add_static_pad_template (element_class, &src_template);
639 gst_element_class_set_static_metadata (element_class, "FDK AAC audio encoder",
640 "Codec/Encoder/Audio/Converter", "FDK AAC audio encoder",
641 "Sebastian Dröge <sebastian@centricular.com>");
643 GST_DEBUG_CATEGORY_INIT (gst_fdkaacenc_debug, "fdkaacenc", 0,