1 /* GStreamer ReplayGain analysis
3 * Copyright (C) 2006 Rene Stadler <mail@renestadler.de>
5 * gstrganalysis.c: Element that performs the ReplayGain analysis
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public License
9 * as published by the Free Software Foundation; either version 2.1 of
10 * the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
24 * SECTION:element-rganalysis
25 * @see_also: #GstRgVolume
27 * This element analyzes raw audio sample data in accordance with the proposed
28 * <ulink url="http://replaygain.org">ReplayGain standard</ulink> for
29 * calculating the ideal replay gain for music tracks and albums. The element
30 * is designed as a pass-through filter that never modifies any data. As it
31 * receives an EOS event, it finalizes the ongoing analysis and generates a tag
32 * list containing the results. It is sent downstream with a tag event and
33 * posted on the message bus with a tag message. The EOS event is forwarded as
34 * normal afterwards. Result tag lists at least contain the tags
35 * #GST_TAG_TRACK_GAIN, #GST_TAG_TRACK_PEAK and #GST_TAG_REFERENCE_LEVEL.
37 * Because the generated metadata tags become available at the end of streams,
38 * downstream muxer and encoder elements are normally unable to save them in
39 * their output since they generally save metadata in the file header.
40 * Therefore, it is often necessary that applications read the results in a bus
41 * event handler for the tag message. Obtaining the values this way is always
42 * needed for <link linkend="GstRgAnalysis--num-tracks">album processing</link>
43 * since the album gain and peak values need to be associated with all tracks of
44 * an album, not just the last one.
47 * <title>Example launch lines</title>
49 * gst-launch -t audiotestsrc wave=sine num-buffers=512 ! rganalysis ! fakesink
50 * ]| Analyze a simple test waveform
52 * gst-launch -t filesrc location=filename.ext ! decodebin \
53 * ! audioconvert ! audioresample ! rganalysis ! fakesink
54 * ]| Analyze a given file
56 * gst-launch -t gnomevfssrc location=http://replaygain.hydrogenaudio.org/ref_pink.wav \
57 * ! wavparse ! rganalysis ! fakesink
58 * ]| Analyze the pink noise reference file
60 * The above launch line yields a result gain of +6 dB (instead of the expected
61 * +0 dB). This is not in error, refer to the #GstRgAnalysis:reference-level
62 * property documentation for more information.
66 * <title>Acknowledgements</title>
68 * This element is based on code used in the <ulink
69 * url="http://sjeng.org/vorbisgain.html">vorbisgain</ulink> program and many
70 * others. The relevant parts are copyrighted by David Robinson, Glen Sawyer
81 #include <gst/base/gstbasetransform.h>
82 #include <gst/audio/audio.h>
84 #include "gstrganalysis.h"
85 #include "replaygain.h"
87 GST_DEBUG_CATEGORY_STATIC (gst_rg_analysis_debug);
88 #define GST_CAT_DEFAULT gst_rg_analysis_debug
90 /* Default property value. */
91 #define FORCED_DEFAULT TRUE
92 #define DEFAULT_MESSAGE FALSE
103 /* The ReplayGain algorithm is intended for use with mono and stereo
104 * audio. The used implementation has filter coefficients for the
105 * "usual" sample rates in the 8000 to 48000 Hz range. */
106 #define REPLAY_GAIN_CAPS \
107 "format = (string) { "GST_AUDIO_NE(F32)","GST_AUDIO_NE(S16)" }, " \
108 "channels = (int) { 1, 2 }, " \
109 "rate = (int) { 8000, 11025, 12000, 16000, 22050, 24000, 32000, " \
112 static GstStaticPadTemplate sink_factory = GST_STATIC_PAD_TEMPLATE ("sink",
115 GST_STATIC_CAPS (REPLAY_GAIN_CAPS));
117 static GstStaticPadTemplate src_factory = GST_STATIC_PAD_TEMPLATE ("src",
120 GST_STATIC_CAPS (REPLAY_GAIN_CAPS));
122 #define gst_rg_analysis_parent_class parent_class
123 G_DEFINE_TYPE (GstRgAnalysis, gst_rg_analysis, GST_TYPE_BASE_TRANSFORM);
125 static void gst_rg_analysis_set_property (GObject * object, guint prop_id,
126 const GValue * value, GParamSpec * pspec);
127 static void gst_rg_analysis_get_property (GObject * object, guint prop_id,
128 GValue * value, GParamSpec * pspec);
130 static gboolean gst_rg_analysis_start (GstBaseTransform * base);
131 static gboolean gst_rg_analysis_set_caps (GstBaseTransform * base,
132 GstCaps * incaps, GstCaps * outcaps);
133 static GstFlowReturn gst_rg_analysis_transform_ip (GstBaseTransform * base,
135 static gboolean gst_rg_analysis_sink_event (GstBaseTransform * base,
137 static gboolean gst_rg_analysis_stop (GstBaseTransform * base);
139 static void gst_rg_analysis_handle_tags (GstRgAnalysis * filter,
140 const GstTagList * tag_list);
141 static void gst_rg_analysis_handle_eos (GstRgAnalysis * filter);
142 static gboolean gst_rg_analysis_track_result (GstRgAnalysis * filter,
143 GstTagList ** tag_list);
144 static gboolean gst_rg_analysis_album_result (GstRgAnalysis * filter,
145 GstTagList ** tag_list);
148 gst_rg_analysis_class_init (GstRgAnalysisClass * klass)
150 GObjectClass *gobject_class;
151 GstElementClass *element_class;
152 GstBaseTransformClass *trans_class;
154 gobject_class = (GObjectClass *) klass;
155 element_class = (GstElementClass *) klass;
157 gobject_class->set_property = gst_rg_analysis_set_property;
158 gobject_class->get_property = gst_rg_analysis_get_property;
161 * GstRgAnalysis:num-tracks:
163 * Number of remaining album tracks.
165 * Analyzing several streams sequentially and assigning them a common result
166 * gain is known as "album processing". If this gain is used during playback
167 * (by switching to "album mode"), all tracks of an album receive the same
168 * amplification. This keeps the relative volume levels between the tracks
169 * intact. To enable this, set this property to the number of streams that
170 * will be processed as album tracks.
172 * Every time an EOS event is received, the value of this property is
173 * decremented by one. As it reaches zero, it is assumed that the last track
174 * of the album finished. The tag list for the final stream will contain the
175 * additional tags #GST_TAG_ALBUM_GAIN and #GST_TAG_ALBUM_PEAK. All other
176 * streams just get the two track tags posted because the values for the album
177 * tags are not known before all tracks are analyzed. Applications need to
178 * ensure that the album gain and peak values are also associated with the
179 * other tracks when storing the results.
181 * If the total number of album tracks is unknown beforehand, just ensure that
182 * the value is greater than 1 before each track starts. Then before the end
183 * of the last track, set it to the value 1.
185 * To perform album processing, the element has to preserve data between
186 * streams. This cannot survive a state change to the NULL or READY state.
187 * If you change your pipeline's state to NULL or READY between tracks, lock
188 * the element's state using gst_element_set_locked_state() when it is in
191 g_object_class_install_property (gobject_class, PROP_NUM_TRACKS,
192 g_param_spec_int ("num-tracks", "Number of album tracks",
193 "Number of remaining album tracks", 0, G_MAXINT, 0,
194 G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
196 * GstRgAnalysis:forced:
198 * Whether to analyze streams even when ReplayGain tags exist.
200 * For assisting transcoder/converter applications, the element can silently
201 * skip the processing of streams that already contain the necessary tags.
202 * Data will flow as usual but the element will not consume CPU time and will
203 * not generate result tags. To enable possible skipping, set this property
206 * If used in conjunction with <link linkend="GstRgAnalysis--num-tracks">album
207 * processing</link>, the element will skip the number of remaining album
208 * tracks if a full set of tags is found for the first track. If a subsequent
209 * track of the album is missing tags, processing cannot start again. If this
210 * is undesired, the application has to scan all files beforehand and enable
211 * forcing of processing if needed.
213 g_object_class_install_property (gobject_class, PROP_FORCED,
214 g_param_spec_boolean ("forced", "Forced",
215 "Analyze even if ReplayGain tags exist",
216 FORCED_DEFAULT, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
218 * GstRgAnalysis:reference-level:
220 * Reference level [dB].
222 * Analyzing the ReplayGain pink noise reference waveform computes a result of
223 * +6 dB instead of the expected 0 dB. This is because the default reference
224 * level is 89 dB. To obtain values as lined out in the original proposal of
225 * ReplayGain, set this property to 83.
227 * Almost all software uses 89 dB as a reference however, and this value has
228 * become the new official value. That is to say, while the change has been
229 * acclaimed by the author of the ReplayGain proposal, the <ulink
230 * url="http://replaygain.org">webpage</ulink> is still outdated at the time
233 * The value was changed because the original proposal recommends a default
234 * pre-amp value of +6 dB for playback. This seemed a bit odd, as it means
235 * that the algorithm has the general tendency to produce adjustment values
236 * that are 6 dB too low. Bumping the reference level by 6 dB compensated for
239 * The problem of the reference level being ambiguous for lack of concise
240 * standardization is to be solved by adopting the #GST_TAG_REFERENCE_LEVEL
241 * tag, which allows to store the used value alongside the gain values.
243 g_object_class_install_property (gobject_class, PROP_REFERENCE_LEVEL,
244 g_param_spec_double ("reference-level", "Reference level",
245 "Reference level [dB]", 0.0, 150., RG_REFERENCE_LEVEL,
246 G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
248 g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_MESSAGE,
249 g_param_spec_boolean ("message", "Message",
250 "Post statics messages",
252 G_PARAM_READWRITE | G_PARAM_CONSTRUCT | G_PARAM_STATIC_STRINGS));
254 trans_class = (GstBaseTransformClass *) klass;
255 trans_class->start = GST_DEBUG_FUNCPTR (gst_rg_analysis_start);
256 trans_class->set_caps = GST_DEBUG_FUNCPTR (gst_rg_analysis_set_caps);
257 trans_class->transform_ip = GST_DEBUG_FUNCPTR (gst_rg_analysis_transform_ip);
258 trans_class->sink_event = GST_DEBUG_FUNCPTR (gst_rg_analysis_sink_event);
259 trans_class->stop = GST_DEBUG_FUNCPTR (gst_rg_analysis_stop);
260 trans_class->passthrough_on_same_caps = TRUE;
262 gst_element_class_add_pad_template (element_class,
263 gst_static_pad_template_get (&src_factory));
264 gst_element_class_add_pad_template (element_class,
265 gst_static_pad_template_get (&sink_factory));
266 gst_element_class_set_details_simple (element_class, "ReplayGain analysis",
267 "Filter/Analyzer/Audio",
268 "Perform the ReplayGain analysis",
269 "Ren\xc3\xa9 Stadler <mail@renestadler.de>");
271 GST_DEBUG_CATEGORY_INIT (gst_rg_analysis_debug, "rganalysis", 0,
272 "ReplayGain analysis element");
276 gst_rg_analysis_init (GstRgAnalysis * filter)
278 GstBaseTransform *base = GST_BASE_TRANSFORM (filter);
280 gst_base_transform_set_gap_aware (base, TRUE);
282 filter->num_tracks = 0;
283 filter->forced = FORCED_DEFAULT;
284 filter->message = DEFAULT_MESSAGE;
285 filter->reference_level = RG_REFERENCE_LEVEL;
288 filter->analyze = NULL;
292 gst_rg_analysis_set_property (GObject * object, guint prop_id,
293 const GValue * value, GParamSpec * pspec)
295 GstRgAnalysis *filter = GST_RG_ANALYSIS (object);
297 GST_OBJECT_LOCK (filter);
299 case PROP_NUM_TRACKS:
300 filter->num_tracks = g_value_get_int (value);
303 filter->forced = g_value_get_boolean (value);
305 case PROP_REFERENCE_LEVEL:
306 filter->reference_level = g_value_get_double (value);
309 filter->message = g_value_get_boolean (value);
312 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
315 GST_OBJECT_UNLOCK (filter);
319 gst_rg_analysis_get_property (GObject * object, guint prop_id,
320 GValue * value, GParamSpec * pspec)
322 GstRgAnalysis *filter = GST_RG_ANALYSIS (object);
324 GST_OBJECT_LOCK (filter);
326 case PROP_NUM_TRACKS:
327 g_value_set_int (value, filter->num_tracks);
330 g_value_set_boolean (value, filter->forced);
332 case PROP_REFERENCE_LEVEL:
333 g_value_set_double (value, filter->reference_level);
336 g_value_set_boolean (value, filter->message);
339 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
342 GST_OBJECT_UNLOCK (filter);
346 gst_rg_analysis_post_message (gpointer rganalysis, GstClockTime timestamp,
347 GstClockTime duration, gdouble rglevel)
349 GstRgAnalysis *filter = GST_RG_ANALYSIS (rganalysis);
350 if (filter->message) {
353 m = gst_message_new_element (GST_OBJECT_CAST (rganalysis),
354 gst_structure_new ("rganalysis",
355 "timestamp", G_TYPE_UINT64, timestamp,
356 "duration", G_TYPE_UINT64, duration,
357 "rglevel", G_TYPE_DOUBLE, rglevel, NULL));
359 gst_element_post_message (GST_ELEMENT_CAST (rganalysis), m);
365 gst_rg_analysis_start (GstBaseTransform * base)
367 GstRgAnalysis *filter = GST_RG_ANALYSIS (base);
369 filter->ignore_tags = FALSE;
370 filter->skip = FALSE;
371 filter->has_track_gain = FALSE;
372 filter->has_track_peak = FALSE;
373 filter->has_album_gain = FALSE;
374 filter->has_album_peak = FALSE;
376 filter->ctx = rg_analysis_new ();
377 GST_OBJECT_LOCK (filter);
378 rg_analysis_init_silence_detection (filter->ctx, gst_rg_analysis_post_message,
380 GST_OBJECT_UNLOCK (filter);
381 filter->analyze = NULL;
383 GST_LOG_OBJECT (filter, "started");
389 gst_rg_analysis_set_caps (GstBaseTransform * base, GstCaps * in_caps,
392 GstRgAnalysis *filter = GST_RG_ANALYSIS (base);
396 g_return_val_if_fail (filter->ctx != NULL, FALSE);
398 GST_DEBUG_OBJECT (filter,
399 "set_caps in %" GST_PTR_FORMAT " out %" GST_PTR_FORMAT,
402 if (!gst_audio_info_from_caps (&info, in_caps))
405 rate = GST_AUDIO_INFO_RATE (&info);
407 if (!rg_analysis_set_sample_rate (filter->ctx, rate))
410 channels = GST_AUDIO_INFO_CHANNELS (&info);
412 if (channels < 1 || channels > 2)
415 switch (GST_AUDIO_INFO_FORMAT (&info)) {
416 case GST_AUDIO_FORMAT_F32:
417 /* The depth is not variable for float formats of course. It just
418 * makes the transform function nice and simple if the
419 * rg_analysis_analyze_* functions have a common signature. */
420 filter->depth = sizeof (gfloat) * 8;
423 filter->analyze = rg_analysis_analyze_mono_float;
425 filter->analyze = rg_analysis_analyze_stereo_float;
428 case GST_AUDIO_FORMAT_S16:
429 filter->depth = sizeof (gint16) * 8;
432 filter->analyze = rg_analysis_analyze_mono_int16;
434 filter->analyze = rg_analysis_analyze_stereo_int16;
445 filter->analyze = NULL;
446 GST_ELEMENT_ERROR (filter, CORE, NEGOTIATION,
447 ("Invalid incoming caps: %" GST_PTR_FORMAT, in_caps), (NULL));
453 gst_rg_analysis_transform_ip (GstBaseTransform * base, GstBuffer * buf)
455 GstRgAnalysis *filter = GST_RG_ANALYSIS (base);
459 g_return_val_if_fail (filter->ctx != NULL, GST_FLOW_WRONG_STATE);
460 g_return_val_if_fail (filter->analyze != NULL, GST_FLOW_NOT_NEGOTIATED);
465 data = gst_buffer_map (buf, &size, NULL, GST_MAP_READ);
466 GST_LOG_OBJECT (filter, "processing buffer of size %" G_GSIZE_FORMAT, size);
468 rg_analysis_start_buffer (filter->ctx, GST_BUFFER_TIMESTAMP (buf));
469 filter->analyze (filter->ctx, data, size, filter->depth);
471 gst_buffer_unmap (buf, data, size);
477 gst_rg_analysis_sink_event (GstBaseTransform * base, GstEvent * event)
479 GstRgAnalysis *filter = GST_RG_ANALYSIS (base);
481 g_return_val_if_fail (filter->ctx != NULL, TRUE);
483 switch (GST_EVENT_TYPE (event)) {
487 GST_LOG_OBJECT (filter, "received EOS event");
489 gst_rg_analysis_handle_eos (filter);
491 GST_LOG_OBJECT (filter, "passing on EOS event");
497 GstTagList *tag_list;
499 /* The reference to the tag list is borrowed. */
500 gst_event_parse_tag (event, &tag_list);
501 gst_rg_analysis_handle_tags (filter, tag_list);
509 return GST_BASE_TRANSFORM_CLASS (parent_class)->sink_event (base, event);
513 gst_rg_analysis_stop (GstBaseTransform * base)
515 GstRgAnalysis *filter = GST_RG_ANALYSIS (base);
517 g_return_val_if_fail (filter->ctx != NULL, FALSE);
519 rg_analysis_destroy (filter->ctx);
522 GST_LOG_OBJECT (filter, "stopped");
528 gst_rg_analysis_handle_tags (GstRgAnalysis * filter,
529 const GstTagList * tag_list)
531 gboolean album_processing = (filter->num_tracks > 0);
534 if (!album_processing)
535 filter->ignore_tags = FALSE;
537 if (filter->skip && album_processing) {
538 GST_DEBUG_OBJECT (filter, "ignoring tag event: skipping album");
540 } else if (filter->skip) {
541 GST_DEBUG_OBJECT (filter, "ignoring tag event: skipping track");
543 } else if (filter->ignore_tags) {
544 GST_DEBUG_OBJECT (filter, "ignoring tag event: cannot skip anyways");
548 filter->has_track_gain |= gst_tag_list_get_double (tag_list,
549 GST_TAG_TRACK_GAIN, &dummy);
550 filter->has_track_peak |= gst_tag_list_get_double (tag_list,
551 GST_TAG_TRACK_PEAK, &dummy);
552 filter->has_album_gain |= gst_tag_list_get_double (tag_list,
553 GST_TAG_ALBUM_GAIN, &dummy);
554 filter->has_album_peak |= gst_tag_list_get_double (tag_list,
555 GST_TAG_ALBUM_PEAK, &dummy);
557 if (!(filter->has_track_gain && filter->has_track_peak)) {
558 GST_DEBUG_OBJECT (filter, "track tags not complete yet");
562 if (album_processing && !(filter->has_album_gain && filter->has_album_peak)) {
563 GST_DEBUG_OBJECT (filter, "album tags not complete yet");
567 if (filter->forced) {
568 GST_DEBUG_OBJECT (filter,
569 "existing tags are sufficient, but processing anyway (forced)");
574 rg_analysis_reset (filter->ctx);
576 if (!album_processing) {
577 GST_DEBUG_OBJECT (filter,
578 "existing tags are sufficient, will not process this track");
580 GST_DEBUG_OBJECT (filter,
581 "existing tags are sufficient, will not process this album");
586 gst_rg_analysis_handle_eos (GstRgAnalysis * filter)
588 gboolean album_processing = (filter->num_tracks > 0);
589 gboolean album_finished = (filter->num_tracks == 1);
590 gboolean album_skipping = album_processing && filter->skip;
592 filter->has_track_gain = FALSE;
593 filter->has_track_peak = FALSE;
595 if (album_finished) {
596 filter->ignore_tags = FALSE;
597 filter->skip = FALSE;
598 filter->has_album_gain = FALSE;
599 filter->has_album_peak = FALSE;
600 } else if (!album_skipping) {
601 filter->skip = FALSE;
604 /* We might have just fully processed a track because it has
605 * incomplete tags. If we do album processing and allow skipping
606 * (not forced), prevent switching to skipping if a later track with
607 * full tags comes along: */
608 if (!filter->forced && album_processing && !album_finished)
609 filter->ignore_tags = TRUE;
612 GstTagList *tag_list = NULL;
613 gboolean track_success;
614 gboolean album_success = FALSE;
616 track_success = gst_rg_analysis_track_result (filter, &tag_list);
619 album_success = gst_rg_analysis_album_result (filter, &tag_list);
620 else if (!album_processing)
621 rg_analysis_reset_album (filter->ctx);
623 if (track_success || album_success) {
624 GST_LOG_OBJECT (filter, "posting tag list with results");
625 gst_tag_list_add (tag_list, GST_TAG_MERGE_APPEND,
626 GST_TAG_REFERENCE_LEVEL, filter->reference_level, NULL);
627 /* This steals our reference to the list: */
628 gst_element_found_tags_for_pad (GST_ELEMENT (filter),
629 GST_BASE_TRANSFORM_SRC_PAD (GST_BASE_TRANSFORM (filter)), tag_list);
633 if (album_processing) {
634 filter->num_tracks--;
636 if (!album_finished) {
637 GST_DEBUG_OBJECT (filter, "album not finished yet (num-tracks is now %u)",
640 GST_DEBUG_OBJECT (filter, "album finished (num-tracks is now 0)");
644 if (album_processing)
645 g_object_notify (G_OBJECT (filter), "num-tracks");
649 gst_rg_analysis_track_result (GstRgAnalysis * filter, GstTagList ** tag_list)
651 gboolean track_success;
652 gdouble track_gain, track_peak;
654 track_success = rg_analysis_track_result (filter->ctx, &track_gain,
658 track_gain += filter->reference_level - RG_REFERENCE_LEVEL;
659 GST_INFO_OBJECT (filter, "track gain is %+.2f dB, peak %.6f", track_gain,
662 GST_INFO_OBJECT (filter, "track was too short to analyze");
666 if (*tag_list == NULL)
667 *tag_list = gst_tag_list_new ();
668 gst_tag_list_add (*tag_list, GST_TAG_MERGE_APPEND,
669 GST_TAG_TRACK_PEAK, track_peak, GST_TAG_TRACK_GAIN, track_gain, NULL);
672 return track_success;
676 gst_rg_analysis_album_result (GstRgAnalysis * filter, GstTagList ** tag_list)
678 gboolean album_success;
679 gdouble album_gain, album_peak;
681 album_success = rg_analysis_album_result (filter->ctx, &album_gain,
685 album_gain += filter->reference_level - RG_REFERENCE_LEVEL;
686 GST_INFO_OBJECT (filter, "album gain is %+.2f dB, peak %.6f", album_gain,
689 GST_INFO_OBJECT (filter, "album was too short to analyze");
693 if (*tag_list == NULL)
694 *tag_list = gst_tag_list_new ();
695 gst_tag_list_add (*tag_list, GST_TAG_MERGE_APPEND,
696 GST_TAG_ALBUM_PEAK, album_peak, GST_TAG_ALBUM_GAIN, album_gain, NULL);
699 return album_success;