1 /* GStreamer ReplayGain analysis
3 * Copyright (C) 2006 Rene Stadler <mail@renestadler.de>
5 * gstrganalysis.c: Element that performs the ReplayGain analysis
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public License
9 * as published by the Free Software Foundation; either version 2.1 of
10 * the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
24 * SECTION:element-rganalysis
25 * @see_also: #GstRgVolume
27 * This element analyzes raw audio sample data in accordance with the proposed
28 * <ulink url="http://replaygain.org">ReplayGain standard</ulink> for
29 * calculating the ideal replay gain for music tracks and albums. The element
30 * is designed as a pass-through filter that never modifies any data. As it
31 * receives an EOS event, it finalizes the ongoing analysis and generates a tag
32 * list containing the results. It is sent downstream with a tag event and
33 * posted on the message bus with a tag message. The EOS event is forwarded as
34 * normal afterwards. Result tag lists at least contain the tags
35 * #GST_TAG_TRACK_GAIN, #GST_TAG_TRACK_PEAK and #GST_TAG_REFERENCE_LEVEL.
37 * Because the generated metadata tags become available at the end of streams,
38 * downstream muxer and encoder elements are normally unable to save them in
39 * their output since they generally save metadata in the file header.
40 * Therefore, it is often necessary that applications read the results in a bus
41 * event handler for the tag message. Obtaining the values this way is always
42 * needed for <link linkend="GstRgAnalysis--num-tracks">album processing</link>
43 * since the album gain and peak values need to be associated with all tracks of
44 * an album, not just the last one.
47 * <title>Example launch lines</title>
49 * gst-launch -t audiotestsrc wave=sine num-buffers=512 ! rganalysis ! fakesink
50 * ]| Analyze a simple test waveform
52 * gst-launch -t filesrc location=filename.ext ! decodebin \
53 * ! audioconvert ! audioresample ! rganalysis ! fakesink
54 * ]| Analyze a given file
56 * gst-launch -t gnomevfssrc location=http://replaygain.hydrogenaudio.org/ref_pink.wav \
57 * ! wavparse ! rganalysis ! fakesink
58 * ]| Analyze the pink noise reference file
60 * The above launch line yields a result gain of +6 dB (instead of the expected
61 * +0 dB). This is not in error, refer to the #GstRgAnalysis:reference-level
62 * property documentation for more information.
66 * <title>Acknowledgements</title>
68 * This element is based on code used in the <ulink
69 * url="http://sjeng.org/vorbisgain.html">vorbisgain</ulink> program and many
70 * others. The relevant parts are copyrighted by David Robinson, Glen Sawyer
81 #include <gst/base/gstbasetransform.h>
82 #include <gst/audio/audio.h>
84 #include "gstrganalysis.h"
85 #include "replaygain.h"
87 GST_DEBUG_CATEGORY_STATIC (gst_rg_analysis_debug);
88 #define GST_CAT_DEFAULT gst_rg_analysis_debug
90 /* Default property value. */
91 #define FORCED_DEFAULT TRUE
92 #define DEFAULT_MESSAGE FALSE
103 /* The ReplayGain algorithm is intended for use with mono and stereo
104 * audio. The used implementation has filter coefficients for the
105 * "usual" sample rates in the 8000 to 48000 Hz range. */
106 #define REPLAY_GAIN_CAPS "audio/x-raw," \
107 "format = (string) { "GST_AUDIO_NE(F32)","GST_AUDIO_NE(S16)" }, " \
108 "layout = (string) interleaved, " \
109 "channels = (int) 1, " \
110 "rate = (int) { 8000, 11025, 12000, 16000, 22050, 24000, 32000, " \
113 "format = (string) { "GST_AUDIO_NE(F32)","GST_AUDIO_NE(S16)" }, " \
114 "layout = (string) interleaved, " \
115 "channels = (int) 2, " \
116 "channel-mask = (bitmask) 0x3, " \
117 "rate = (int) { 8000, 11025, 12000, 16000, 22050, 24000, 32000, " \
120 static GstStaticPadTemplate sink_factory = GST_STATIC_PAD_TEMPLATE ("sink",
123 GST_STATIC_CAPS (REPLAY_GAIN_CAPS));
125 static GstStaticPadTemplate src_factory = GST_STATIC_PAD_TEMPLATE ("src",
128 GST_STATIC_CAPS (REPLAY_GAIN_CAPS));
130 #define gst_rg_analysis_parent_class parent_class
131 G_DEFINE_TYPE (GstRgAnalysis, gst_rg_analysis, GST_TYPE_BASE_TRANSFORM);
133 static void gst_rg_analysis_set_property (GObject * object, guint prop_id,
134 const GValue * value, GParamSpec * pspec);
135 static void gst_rg_analysis_get_property (GObject * object, guint prop_id,
136 GValue * value, GParamSpec * pspec);
138 static gboolean gst_rg_analysis_start (GstBaseTransform * base);
139 static gboolean gst_rg_analysis_set_caps (GstBaseTransform * base,
140 GstCaps * incaps, GstCaps * outcaps);
141 static GstFlowReturn gst_rg_analysis_transform_ip (GstBaseTransform * base,
143 static gboolean gst_rg_analysis_sink_event (GstBaseTransform * base,
145 static gboolean gst_rg_analysis_stop (GstBaseTransform * base);
147 static void gst_rg_analysis_handle_tags (GstRgAnalysis * filter,
148 const GstTagList * tag_list);
149 static void gst_rg_analysis_handle_eos (GstRgAnalysis * filter);
150 static gboolean gst_rg_analysis_track_result (GstRgAnalysis * filter,
151 GstTagList ** tag_list);
152 static gboolean gst_rg_analysis_album_result (GstRgAnalysis * filter,
153 GstTagList ** tag_list);
156 gst_rg_analysis_class_init (GstRgAnalysisClass * klass)
158 GObjectClass *gobject_class;
159 GstElementClass *element_class;
160 GstBaseTransformClass *trans_class;
162 gobject_class = (GObjectClass *) klass;
163 element_class = (GstElementClass *) klass;
165 gobject_class->set_property = gst_rg_analysis_set_property;
166 gobject_class->get_property = gst_rg_analysis_get_property;
169 * GstRgAnalysis:num-tracks:
171 * Number of remaining album tracks.
173 * Analyzing several streams sequentially and assigning them a common result
174 * gain is known as "album processing". If this gain is used during playback
175 * (by switching to "album mode"), all tracks of an album receive the same
176 * amplification. This keeps the relative volume levels between the tracks
177 * intact. To enable this, set this property to the number of streams that
178 * will be processed as album tracks.
180 * Every time an EOS event is received, the value of this property is
181 * decremented by one. As it reaches zero, it is assumed that the last track
182 * of the album finished. The tag list for the final stream will contain the
183 * additional tags #GST_TAG_ALBUM_GAIN and #GST_TAG_ALBUM_PEAK. All other
184 * streams just get the two track tags posted because the values for the album
185 * tags are not known before all tracks are analyzed. Applications need to
186 * ensure that the album gain and peak values are also associated with the
187 * other tracks when storing the results.
189 * If the total number of album tracks is unknown beforehand, just ensure that
190 * the value is greater than 1 before each track starts. Then before the end
191 * of the last track, set it to the value 1.
193 * To perform album processing, the element has to preserve data between
194 * streams. This cannot survive a state change to the NULL or READY state.
195 * If you change your pipeline's state to NULL or READY between tracks, lock
196 * the element's state using gst_element_set_locked_state() when it is in
199 g_object_class_install_property (gobject_class, PROP_NUM_TRACKS,
200 g_param_spec_int ("num-tracks", "Number of album tracks",
201 "Number of remaining album tracks", 0, G_MAXINT, 0,
202 G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
204 * GstRgAnalysis:forced:
206 * Whether to analyze streams even when ReplayGain tags exist.
208 * For assisting transcoder/converter applications, the element can silently
209 * skip the processing of streams that already contain the necessary tags.
210 * Data will flow as usual but the element will not consume CPU time and will
211 * not generate result tags. To enable possible skipping, set this property
214 * If used in conjunction with <link linkend="GstRgAnalysis--num-tracks">album
215 * processing</link>, the element will skip the number of remaining album
216 * tracks if a full set of tags is found for the first track. If a subsequent
217 * track of the album is missing tags, processing cannot start again. If this
218 * is undesired, the application has to scan all files beforehand and enable
219 * forcing of processing if needed.
221 g_object_class_install_property (gobject_class, PROP_FORCED,
222 g_param_spec_boolean ("forced", "Forced",
223 "Analyze even if ReplayGain tags exist",
224 FORCED_DEFAULT, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
226 * GstRgAnalysis:reference-level:
228 * Reference level [dB].
230 * Analyzing the ReplayGain pink noise reference waveform computes a result of
231 * +6 dB instead of the expected 0 dB. This is because the default reference
232 * level is 89 dB. To obtain values as lined out in the original proposal of
233 * ReplayGain, set this property to 83.
235 * Almost all software uses 89 dB as a reference however, and this value has
236 * become the new official value. That is to say, while the change has been
237 * acclaimed by the author of the ReplayGain proposal, the <ulink
238 * url="http://replaygain.org">webpage</ulink> is still outdated at the time
241 * The value was changed because the original proposal recommends a default
242 * pre-amp value of +6 dB for playback. This seemed a bit odd, as it means
243 * that the algorithm has the general tendency to produce adjustment values
244 * that are 6 dB too low. Bumping the reference level by 6 dB compensated for
247 * The problem of the reference level being ambiguous for lack of concise
248 * standardization is to be solved by adopting the #GST_TAG_REFERENCE_LEVEL
249 * tag, which allows to store the used value alongside the gain values.
251 g_object_class_install_property (gobject_class, PROP_REFERENCE_LEVEL,
252 g_param_spec_double ("reference-level", "Reference level",
253 "Reference level [dB]", 0.0, 150., RG_REFERENCE_LEVEL,
254 G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
256 g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_MESSAGE,
257 g_param_spec_boolean ("message", "Message",
258 "Post statics messages",
260 G_PARAM_READWRITE | G_PARAM_CONSTRUCT | G_PARAM_STATIC_STRINGS));
262 trans_class = (GstBaseTransformClass *) klass;
263 trans_class->start = GST_DEBUG_FUNCPTR (gst_rg_analysis_start);
264 trans_class->set_caps = GST_DEBUG_FUNCPTR (gst_rg_analysis_set_caps);
265 trans_class->transform_ip = GST_DEBUG_FUNCPTR (gst_rg_analysis_transform_ip);
266 trans_class->sink_event = GST_DEBUG_FUNCPTR (gst_rg_analysis_sink_event);
267 trans_class->stop = GST_DEBUG_FUNCPTR (gst_rg_analysis_stop);
268 trans_class->passthrough_on_same_caps = TRUE;
270 gst_element_class_add_pad_template (element_class,
271 gst_static_pad_template_get (&src_factory));
272 gst_element_class_add_pad_template (element_class,
273 gst_static_pad_template_get (&sink_factory));
274 gst_element_class_set_details_simple (element_class, "ReplayGain analysis",
275 "Filter/Analyzer/Audio",
276 "Perform the ReplayGain analysis",
277 "Ren\xc3\xa9 Stadler <mail@renestadler.de>");
279 GST_DEBUG_CATEGORY_INIT (gst_rg_analysis_debug, "rganalysis", 0,
280 "ReplayGain analysis element");
284 gst_rg_analysis_init (GstRgAnalysis * filter)
286 GstBaseTransform *base = GST_BASE_TRANSFORM (filter);
288 gst_base_transform_set_gap_aware (base, TRUE);
290 filter->num_tracks = 0;
291 filter->forced = FORCED_DEFAULT;
292 filter->message = DEFAULT_MESSAGE;
293 filter->reference_level = RG_REFERENCE_LEVEL;
296 filter->analyze = NULL;
300 gst_rg_analysis_set_property (GObject * object, guint prop_id,
301 const GValue * value, GParamSpec * pspec)
303 GstRgAnalysis *filter = GST_RG_ANALYSIS (object);
305 GST_OBJECT_LOCK (filter);
307 case PROP_NUM_TRACKS:
308 filter->num_tracks = g_value_get_int (value);
311 filter->forced = g_value_get_boolean (value);
313 case PROP_REFERENCE_LEVEL:
314 filter->reference_level = g_value_get_double (value);
317 filter->message = g_value_get_boolean (value);
320 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
323 GST_OBJECT_UNLOCK (filter);
327 gst_rg_analysis_get_property (GObject * object, guint prop_id,
328 GValue * value, GParamSpec * pspec)
330 GstRgAnalysis *filter = GST_RG_ANALYSIS (object);
332 GST_OBJECT_LOCK (filter);
334 case PROP_NUM_TRACKS:
335 g_value_set_int (value, filter->num_tracks);
338 g_value_set_boolean (value, filter->forced);
340 case PROP_REFERENCE_LEVEL:
341 g_value_set_double (value, filter->reference_level);
344 g_value_set_boolean (value, filter->message);
347 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
350 GST_OBJECT_UNLOCK (filter);
354 gst_rg_analysis_post_message (gpointer rganalysis, GstClockTime timestamp,
355 GstClockTime duration, gdouble rglevel)
357 GstRgAnalysis *filter = GST_RG_ANALYSIS (rganalysis);
358 if (filter->message) {
361 m = gst_message_new_element (GST_OBJECT_CAST (rganalysis),
362 gst_structure_new ("rganalysis",
363 "timestamp", G_TYPE_UINT64, timestamp,
364 "duration", G_TYPE_UINT64, duration,
365 "rglevel", G_TYPE_DOUBLE, rglevel, NULL));
367 gst_element_post_message (GST_ELEMENT_CAST (rganalysis), m);
373 gst_rg_analysis_start (GstBaseTransform * base)
375 GstRgAnalysis *filter = GST_RG_ANALYSIS (base);
377 filter->ignore_tags = FALSE;
378 filter->skip = FALSE;
379 filter->has_track_gain = FALSE;
380 filter->has_track_peak = FALSE;
381 filter->has_album_gain = FALSE;
382 filter->has_album_peak = FALSE;
384 filter->ctx = rg_analysis_new ();
385 GST_OBJECT_LOCK (filter);
386 rg_analysis_init_silence_detection (filter->ctx, gst_rg_analysis_post_message,
388 GST_OBJECT_UNLOCK (filter);
389 filter->analyze = NULL;
391 GST_LOG_OBJECT (filter, "started");
397 gst_rg_analysis_set_caps (GstBaseTransform * base, GstCaps * in_caps,
400 GstRgAnalysis *filter = GST_RG_ANALYSIS (base);
404 g_return_val_if_fail (filter->ctx != NULL, FALSE);
406 GST_DEBUG_OBJECT (filter,
407 "set_caps in %" GST_PTR_FORMAT " out %" GST_PTR_FORMAT,
410 if (!gst_audio_info_from_caps (&info, in_caps))
413 rate = GST_AUDIO_INFO_RATE (&info);
415 if (!rg_analysis_set_sample_rate (filter->ctx, rate))
418 channels = GST_AUDIO_INFO_CHANNELS (&info);
420 if (channels < 1 || channels > 2)
423 switch (GST_AUDIO_INFO_FORMAT (&info)) {
424 case GST_AUDIO_FORMAT_F32:
425 /* The depth is not variable for float formats of course. It just
426 * makes the transform function nice and simple if the
427 * rg_analysis_analyze_* functions have a common signature. */
428 filter->depth = sizeof (gfloat) * 8;
431 filter->analyze = rg_analysis_analyze_mono_float;
433 filter->analyze = rg_analysis_analyze_stereo_float;
436 case GST_AUDIO_FORMAT_S16:
437 filter->depth = sizeof (gint16) * 8;
440 filter->analyze = rg_analysis_analyze_mono_int16;
442 filter->analyze = rg_analysis_analyze_stereo_int16;
453 filter->analyze = NULL;
454 GST_ELEMENT_ERROR (filter, CORE, NEGOTIATION,
455 ("Invalid incoming caps: %" GST_PTR_FORMAT, in_caps), (NULL));
461 gst_rg_analysis_transform_ip (GstBaseTransform * base, GstBuffer * buf)
463 GstRgAnalysis *filter = GST_RG_ANALYSIS (base);
466 g_return_val_if_fail (filter->ctx != NULL, GST_FLOW_FLUSHING);
467 g_return_val_if_fail (filter->analyze != NULL, GST_FLOW_NOT_NEGOTIATED);
472 gst_buffer_map (buf, &map, GST_MAP_READ);
473 GST_LOG_OBJECT (filter, "processing buffer of size %" G_GSIZE_FORMAT,
476 rg_analysis_start_buffer (filter->ctx, GST_BUFFER_TIMESTAMP (buf));
477 filter->analyze (filter->ctx, map.data, map.size, filter->depth);
479 gst_buffer_unmap (buf, &map);
485 gst_rg_analysis_sink_event (GstBaseTransform * base, GstEvent * event)
487 GstRgAnalysis *filter = GST_RG_ANALYSIS (base);
489 g_return_val_if_fail (filter->ctx != NULL, TRUE);
491 switch (GST_EVENT_TYPE (event)) {
495 GST_LOG_OBJECT (filter, "received EOS event");
497 gst_rg_analysis_handle_eos (filter);
499 GST_LOG_OBJECT (filter, "passing on EOS event");
505 GstTagList *tag_list;
507 /* The reference to the tag list is borrowed. */
508 gst_event_parse_tag (event, &tag_list);
509 gst_rg_analysis_handle_tags (filter, tag_list);
517 return GST_BASE_TRANSFORM_CLASS (parent_class)->sink_event (base, event);
521 gst_rg_analysis_stop (GstBaseTransform * base)
523 GstRgAnalysis *filter = GST_RG_ANALYSIS (base);
525 g_return_val_if_fail (filter->ctx != NULL, FALSE);
527 rg_analysis_destroy (filter->ctx);
530 GST_LOG_OBJECT (filter, "stopped");
536 gst_rg_analysis_handle_tags (GstRgAnalysis * filter,
537 const GstTagList * tag_list)
539 gboolean album_processing = (filter->num_tracks > 0);
542 if (!album_processing)
543 filter->ignore_tags = FALSE;
545 if (filter->skip && album_processing) {
546 GST_DEBUG_OBJECT (filter, "ignoring tag event: skipping album");
548 } else if (filter->skip) {
549 GST_DEBUG_OBJECT (filter, "ignoring tag event: skipping track");
551 } else if (filter->ignore_tags) {
552 GST_DEBUG_OBJECT (filter, "ignoring tag event: cannot skip anyways");
556 filter->has_track_gain |= gst_tag_list_get_double (tag_list,
557 GST_TAG_TRACK_GAIN, &dummy);
558 filter->has_track_peak |= gst_tag_list_get_double (tag_list,
559 GST_TAG_TRACK_PEAK, &dummy);
560 filter->has_album_gain |= gst_tag_list_get_double (tag_list,
561 GST_TAG_ALBUM_GAIN, &dummy);
562 filter->has_album_peak |= gst_tag_list_get_double (tag_list,
563 GST_TAG_ALBUM_PEAK, &dummy);
565 if (!(filter->has_track_gain && filter->has_track_peak)) {
566 GST_DEBUG_OBJECT (filter, "track tags not complete yet");
570 if (album_processing && !(filter->has_album_gain && filter->has_album_peak)) {
571 GST_DEBUG_OBJECT (filter, "album tags not complete yet");
575 if (filter->forced) {
576 GST_DEBUG_OBJECT (filter,
577 "existing tags are sufficient, but processing anyway (forced)");
582 rg_analysis_reset (filter->ctx);
584 if (!album_processing) {
585 GST_DEBUG_OBJECT (filter,
586 "existing tags are sufficient, will not process this track");
588 GST_DEBUG_OBJECT (filter,
589 "existing tags are sufficient, will not process this album");
594 gst_rg_analysis_handle_eos (GstRgAnalysis * filter)
596 gboolean album_processing = (filter->num_tracks > 0);
597 gboolean album_finished = (filter->num_tracks == 1);
598 gboolean album_skipping = album_processing && filter->skip;
600 filter->has_track_gain = FALSE;
601 filter->has_track_peak = FALSE;
603 if (album_finished) {
604 filter->ignore_tags = FALSE;
605 filter->skip = FALSE;
606 filter->has_album_gain = FALSE;
607 filter->has_album_peak = FALSE;
608 } else if (!album_skipping) {
609 filter->skip = FALSE;
612 /* We might have just fully processed a track because it has
613 * incomplete tags. If we do album processing and allow skipping
614 * (not forced), prevent switching to skipping if a later track with
615 * full tags comes along: */
616 if (!filter->forced && album_processing && !album_finished)
617 filter->ignore_tags = TRUE;
620 GstTagList *tag_list = NULL;
621 gboolean track_success;
622 gboolean album_success = FALSE;
624 track_success = gst_rg_analysis_track_result (filter, &tag_list);
627 album_success = gst_rg_analysis_album_result (filter, &tag_list);
628 else if (!album_processing)
629 rg_analysis_reset_album (filter->ctx);
631 if (track_success || album_success) {
632 GST_LOG_OBJECT (filter, "posting tag list with results");
633 gst_tag_list_add (tag_list, GST_TAG_MERGE_APPEND,
634 GST_TAG_REFERENCE_LEVEL, filter->reference_level, NULL);
635 /* This steals our reference to the list: */
636 gst_pad_push_event (GST_BASE_TRANSFORM_SRC_PAD (GST_BASE_TRANSFORM
637 (filter)), gst_event_new_tag (tag_list));
641 if (album_processing) {
642 filter->num_tracks--;
644 if (!album_finished) {
645 GST_DEBUG_OBJECT (filter, "album not finished yet (num-tracks is now %u)",
648 GST_DEBUG_OBJECT (filter, "album finished (num-tracks is now 0)");
652 if (album_processing)
653 g_object_notify (G_OBJECT (filter), "num-tracks");
657 gst_rg_analysis_track_result (GstRgAnalysis * filter, GstTagList ** tag_list)
659 gboolean track_success;
660 gdouble track_gain, track_peak;
662 track_success = rg_analysis_track_result (filter->ctx, &track_gain,
666 track_gain += filter->reference_level - RG_REFERENCE_LEVEL;
667 GST_INFO_OBJECT (filter, "track gain is %+.2f dB, peak %.6f", track_gain,
670 GST_INFO_OBJECT (filter, "track was too short to analyze");
674 if (*tag_list == NULL)
675 *tag_list = gst_tag_list_new_empty ();
676 gst_tag_list_add (*tag_list, GST_TAG_MERGE_APPEND,
677 GST_TAG_TRACK_PEAK, track_peak, GST_TAG_TRACK_GAIN, track_gain, NULL);
680 return track_success;
684 gst_rg_analysis_album_result (GstRgAnalysis * filter, GstTagList ** tag_list)
686 gboolean album_success;
687 gdouble album_gain, album_peak;
689 album_success = rg_analysis_album_result (filter->ctx, &album_gain,
693 album_gain += filter->reference_level - RG_REFERENCE_LEVEL;
694 GST_INFO_OBJECT (filter, "album gain is %+.2f dB, peak %.6f", album_gain,
697 GST_INFO_OBJECT (filter, "album was too short to analyze");
701 if (*tag_list == NULL)
702 *tag_list = gst_tag_list_new_empty ();
703 gst_tag_list_add (*tag_list, GST_TAG_MERGE_APPEND,
704 GST_TAG_ALBUM_PEAK, album_peak, GST_TAG_ALBUM_GAIN, album_gain, NULL);
707 return album_success;