1 /* GStreamer ReplayGain analysis
3 * Copyright (C) 2006 Rene Stadler <mail@renestadler.de>
5 * gstrganalysis.c: Element that performs the ReplayGain analysis
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public License
9 * as published by the Free Software Foundation; either version 2.1 of
10 * the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
24 * SECTION:element-rganalysis
25 * @see_also: #GstRgVolume
27 * This element analyzes raw audio sample data in accordance with the proposed
28 * <ulink url="http://replaygain.org">ReplayGain standard</ulink> for
29 * calculating the ideal replay gain for music tracks and albums. The element
30 * is designed as a pass-through filter that never modifies any data. As it
31 * receives an EOS event, it finalizes the ongoing analysis and generates a tag
32 * list containing the results. It is sent downstream with a tag event and
33 * posted on the message bus with a tag message. The EOS event is forwarded as
34 * normal afterwards. Result tag lists at least contain the tags
35 * #GST_TAG_TRACK_GAIN, #GST_TAG_TRACK_PEAK and #GST_TAG_REFERENCE_LEVEL.
37 * Because the generated metadata tags become available at the end of streams,
38 * downstream muxer and encoder elements are normally unable to save them in
39 * their output since they generally save metadata in the file header.
40 * Therefore, it is often necessary that applications read the results in a bus
41 * event handler for the tag message. Obtaining the values this way is always
42 * needed for album processing (see #GstRgAnalysis:num-tracks property) since
43 * the album gain and peak values need to be associated with all tracks of an
44 * album, not just the last one.
47 * <title>Example launch lines</title>
49 * gst-launch-1.0 -t audiotestsrc wave=sine num-buffers=512 ! rganalysis ! fakesink
50 * ]| Analyze a simple test waveform
52 * gst-launch-1.0 -t filesrc location=filename.ext ! decodebin \
53 * ! audioconvert ! audioresample ! rganalysis ! fakesink
54 * ]| Analyze a given file
56 * gst-launch-1.0 -t gnomevfssrc location=http://replaygain.hydrogenaudio.org/ref_pink.wav \
57 * ! wavparse ! rganalysis ! fakesink
58 * ]| Analyze the pink noise reference file
60 * The above launch line yields a result gain of +6 dB (instead of the expected
61 * +0 dB). This is not in error, refer to the #GstRgAnalysis:reference-level
62 * property documentation for more information.
66 * <title>Acknowledgements</title>
68 * This element is based on code used in the <ulink
69 * url="http://sjeng.org/vorbisgain.html">vorbisgain</ulink> program and many
70 * others. The relevant parts are copyrighted by David Robinson, Glen Sawyer
81 #include <gst/base/gstbasetransform.h>
82 #include <gst/audio/audio.h>
84 #include "gstrganalysis.h"
85 #include "replaygain.h"
87 GST_DEBUG_CATEGORY_STATIC (gst_rg_analysis_debug);
88 #define GST_CAT_DEFAULT gst_rg_analysis_debug
90 /* Default property value. */
91 #define FORCED_DEFAULT TRUE
92 #define DEFAULT_MESSAGE FALSE
103 /* The ReplayGain algorithm is intended for use with mono and stereo
104 * audio. The used implementation has filter coefficients for the
105 * "usual" sample rates in the 8000 to 48000 Hz range. */
106 #define REPLAY_GAIN_CAPS "audio/x-raw," \
107 "format = (string) { "GST_AUDIO_NE(F32)","GST_AUDIO_NE(S16)" }, " \
108 "layout = (string) interleaved, " \
109 "channels = (int) 1, " \
110 "rate = (int) { 8000, 11025, 12000, 16000, 22050, 24000, 32000, " \
113 "format = (string) { "GST_AUDIO_NE(F32)","GST_AUDIO_NE(S16)" }, " \
114 "layout = (string) interleaved, " \
115 "channels = (int) 2, " \
116 "channel-mask = (bitmask) 0x3, " \
117 "rate = (int) { 8000, 11025, 12000, 16000, 22050, 24000, 32000, " \
120 static GstStaticPadTemplate sink_factory = GST_STATIC_PAD_TEMPLATE ("sink",
123 GST_STATIC_CAPS (REPLAY_GAIN_CAPS));
125 static GstStaticPadTemplate src_factory = GST_STATIC_PAD_TEMPLATE ("src",
128 GST_STATIC_CAPS (REPLAY_GAIN_CAPS));
130 #define gst_rg_analysis_parent_class parent_class
131 G_DEFINE_TYPE (GstRgAnalysis, gst_rg_analysis, GST_TYPE_BASE_TRANSFORM);
133 static void gst_rg_analysis_set_property (GObject * object, guint prop_id,
134 const GValue * value, GParamSpec * pspec);
135 static void gst_rg_analysis_get_property (GObject * object, guint prop_id,
136 GValue * value, GParamSpec * pspec);
138 static gboolean gst_rg_analysis_start (GstBaseTransform * base);
139 static gboolean gst_rg_analysis_set_caps (GstBaseTransform * base,
140 GstCaps * incaps, GstCaps * outcaps);
141 static GstFlowReturn gst_rg_analysis_transform_ip (GstBaseTransform * base,
143 static gboolean gst_rg_analysis_sink_event (GstBaseTransform * base,
145 static gboolean gst_rg_analysis_stop (GstBaseTransform * base);
147 static void gst_rg_analysis_handle_tags (GstRgAnalysis * filter,
148 const GstTagList * tag_list);
149 static void gst_rg_analysis_handle_eos (GstRgAnalysis * filter);
150 static gboolean gst_rg_analysis_track_result (GstRgAnalysis * filter,
151 GstTagList ** tag_list);
152 static gboolean gst_rg_analysis_album_result (GstRgAnalysis * filter,
153 GstTagList ** tag_list);
156 gst_rg_analysis_class_init (GstRgAnalysisClass * klass)
158 GObjectClass *gobject_class;
159 GstElementClass *element_class;
160 GstBaseTransformClass *trans_class;
162 gobject_class = (GObjectClass *) klass;
163 element_class = (GstElementClass *) klass;
165 gobject_class->set_property = gst_rg_analysis_set_property;
166 gobject_class->get_property = gst_rg_analysis_get_property;
169 * GstRgAnalysis:num-tracks:
171 * Number of remaining album tracks.
173 * Analyzing several streams sequentially and assigning them a common result
174 * gain is known as "album processing". If this gain is used during playback
175 * (by switching to "album mode"), all tracks of an album receive the same
176 * amplification. This keeps the relative volume levels between the tracks
177 * intact. To enable this, set this property to the number of streams that
178 * will be processed as album tracks.
180 * Every time an EOS event is received, the value of this property is
181 * decremented by one. As it reaches zero, it is assumed that the last track
182 * of the album finished. The tag list for the final stream will contain the
183 * additional tags #GST_TAG_ALBUM_GAIN and #GST_TAG_ALBUM_PEAK. All other
184 * streams just get the two track tags posted because the values for the album
185 * tags are not known before all tracks are analyzed. Applications need to
186 * ensure that the album gain and peak values are also associated with the
187 * other tracks when storing the results.
189 * If the total number of album tracks is unknown beforehand, just ensure that
190 * the value is greater than 1 before each track starts. Then before the end
191 * of the last track, set it to the value 1.
193 * To perform album processing, the element has to preserve data between
194 * streams. This cannot survive a state change to the NULL or READY state.
195 * If you change your pipeline's state to NULL or READY between tracks, lock
196 * the element's state using gst_element_set_locked_state() when it is in
199 g_object_class_install_property (gobject_class, PROP_NUM_TRACKS,
200 g_param_spec_int ("num-tracks", "Number of album tracks",
201 "Number of remaining album tracks", 0, G_MAXINT, 0,
202 G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
204 * GstRgAnalysis:forced:
206 * Whether to analyze streams even when ReplayGain tags exist.
208 * For assisting transcoder/converter applications, the element can silently
209 * skip the processing of streams that already contain the necessary tags.
210 * Data will flow as usual but the element will not consume CPU time and will
211 * not generate result tags. To enable possible skipping, set this property
214 * If used in conjunction with <link linkend="GstRgAnalysis--num-tracks">album
215 * processing</link>, the element will skip the number of remaining album
216 * tracks if a full set of tags is found for the first track. If a subsequent
217 * track of the album is missing tags, processing cannot start again. If this
218 * is undesired, the application has to scan all files beforehand and enable
219 * forcing of processing if needed.
221 g_object_class_install_property (gobject_class, PROP_FORCED,
222 g_param_spec_boolean ("forced", "Forced",
223 "Analyze even if ReplayGain tags exist",
224 FORCED_DEFAULT, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
226 * GstRgAnalysis:reference-level:
228 * Reference level [dB].
230 * Analyzing the ReplayGain pink noise reference waveform computes a result of
231 * +6 dB instead of the expected 0 dB. This is because the default reference
232 * level is 89 dB. To obtain values as lined out in the original proposal of
233 * ReplayGain, set this property to 83.
235 * Almost all software uses 89 dB as a reference however, and this value has
236 * become the new official value. That is to say, while the change has been
237 * acclaimed by the author of the ReplayGain proposal, the <ulink
238 * url="http://replaygain.org">webpage</ulink> is still outdated at the time
241 * The value was changed because the original proposal recommends a default
242 * pre-amp value of +6 dB for playback. This seemed a bit odd, as it means
243 * that the algorithm has the general tendency to produce adjustment values
244 * that are 6 dB too low. Bumping the reference level by 6 dB compensated for
247 * The problem of the reference level being ambiguous for lack of concise
248 * standardization is to be solved by adopting the #GST_TAG_REFERENCE_LEVEL
249 * tag, which allows to store the used value alongside the gain values.
251 g_object_class_install_property (gobject_class, PROP_REFERENCE_LEVEL,
252 g_param_spec_double ("reference-level", "Reference level",
253 "Reference level [dB]", 0.0, 150., RG_REFERENCE_LEVEL,
254 G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
256 g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_MESSAGE,
257 g_param_spec_boolean ("message", "Message",
258 "Post statics messages",
260 G_PARAM_READWRITE | G_PARAM_CONSTRUCT | G_PARAM_STATIC_STRINGS));
262 trans_class = (GstBaseTransformClass *) klass;
263 trans_class->start = GST_DEBUG_FUNCPTR (gst_rg_analysis_start);
264 trans_class->set_caps = GST_DEBUG_FUNCPTR (gst_rg_analysis_set_caps);
265 trans_class->transform_ip = GST_DEBUG_FUNCPTR (gst_rg_analysis_transform_ip);
266 trans_class->sink_event = GST_DEBUG_FUNCPTR (gst_rg_analysis_sink_event);
267 trans_class->stop = GST_DEBUG_FUNCPTR (gst_rg_analysis_stop);
268 trans_class->passthrough_on_same_caps = TRUE;
270 gst_element_class_add_static_pad_template (element_class, &src_factory);
271 gst_element_class_add_static_pad_template (element_class, &sink_factory);
272 gst_element_class_set_static_metadata (element_class, "ReplayGain analysis",
273 "Filter/Analyzer/Audio",
274 "Perform the ReplayGain analysis",
275 "Ren\xc3\xa9 Stadler <mail@renestadler.de>");
277 GST_DEBUG_CATEGORY_INIT (gst_rg_analysis_debug, "rganalysis", 0,
278 "ReplayGain analysis element");
282 gst_rg_analysis_init (GstRgAnalysis * filter)
284 GstBaseTransform *base = GST_BASE_TRANSFORM (filter);
286 gst_base_transform_set_gap_aware (base, TRUE);
288 filter->num_tracks = 0;
289 filter->forced = FORCED_DEFAULT;
290 filter->message = DEFAULT_MESSAGE;
291 filter->reference_level = RG_REFERENCE_LEVEL;
294 filter->analyze = NULL;
298 gst_rg_analysis_set_property (GObject * object, guint prop_id,
299 const GValue * value, GParamSpec * pspec)
301 GstRgAnalysis *filter = GST_RG_ANALYSIS (object);
303 GST_OBJECT_LOCK (filter);
305 case PROP_NUM_TRACKS:
306 filter->num_tracks = g_value_get_int (value);
309 filter->forced = g_value_get_boolean (value);
311 case PROP_REFERENCE_LEVEL:
312 filter->reference_level = g_value_get_double (value);
315 filter->message = g_value_get_boolean (value);
318 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
321 GST_OBJECT_UNLOCK (filter);
325 gst_rg_analysis_get_property (GObject * object, guint prop_id,
326 GValue * value, GParamSpec * pspec)
328 GstRgAnalysis *filter = GST_RG_ANALYSIS (object);
330 GST_OBJECT_LOCK (filter);
332 case PROP_NUM_TRACKS:
333 g_value_set_int (value, filter->num_tracks);
336 g_value_set_boolean (value, filter->forced);
338 case PROP_REFERENCE_LEVEL:
339 g_value_set_double (value, filter->reference_level);
342 g_value_set_boolean (value, filter->message);
345 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
348 GST_OBJECT_UNLOCK (filter);
352 gst_rg_analysis_post_message (gpointer rganalysis, GstClockTime timestamp,
353 GstClockTime duration, gdouble rglevel)
355 GstRgAnalysis *filter = GST_RG_ANALYSIS (rganalysis);
356 if (filter->message) {
359 m = gst_message_new_element (GST_OBJECT_CAST (rganalysis),
360 gst_structure_new ("rganalysis",
361 "timestamp", G_TYPE_UINT64, timestamp,
362 "duration", G_TYPE_UINT64, duration,
363 "rglevel", G_TYPE_DOUBLE, rglevel, NULL));
365 gst_element_post_message (GST_ELEMENT_CAST (rganalysis), m);
371 gst_rg_analysis_start (GstBaseTransform * base)
373 GstRgAnalysis *filter = GST_RG_ANALYSIS (base);
375 filter->ignore_tags = FALSE;
376 filter->skip = FALSE;
377 filter->has_track_gain = FALSE;
378 filter->has_track_peak = FALSE;
379 filter->has_album_gain = FALSE;
380 filter->has_album_peak = FALSE;
382 filter->ctx = rg_analysis_new ();
383 GST_OBJECT_LOCK (filter);
384 rg_analysis_init_silence_detection (filter->ctx, gst_rg_analysis_post_message,
386 GST_OBJECT_UNLOCK (filter);
387 filter->analyze = NULL;
389 GST_LOG_OBJECT (filter, "started");
395 gst_rg_analysis_set_caps (GstBaseTransform * base, GstCaps * in_caps,
398 GstRgAnalysis *filter = GST_RG_ANALYSIS (base);
402 g_return_val_if_fail (filter->ctx != NULL, FALSE);
404 GST_DEBUG_OBJECT (filter,
405 "set_caps in %" GST_PTR_FORMAT " out %" GST_PTR_FORMAT,
408 if (!gst_audio_info_from_caps (&info, in_caps))
411 rate = GST_AUDIO_INFO_RATE (&info);
413 if (!rg_analysis_set_sample_rate (filter->ctx, rate))
416 channels = GST_AUDIO_INFO_CHANNELS (&info);
418 if (channels < 1 || channels > 2)
421 switch (GST_AUDIO_INFO_FORMAT (&info)) {
422 case GST_AUDIO_FORMAT_F32:
423 /* The depth is not variable for float formats of course. It just
424 * makes the transform function nice and simple if the
425 * rg_analysis_analyze_* functions have a common signature. */
426 filter->depth = sizeof (gfloat) * 8;
429 filter->analyze = rg_analysis_analyze_mono_float;
431 filter->analyze = rg_analysis_analyze_stereo_float;
434 case GST_AUDIO_FORMAT_S16:
435 filter->depth = sizeof (gint16) * 8;
438 filter->analyze = rg_analysis_analyze_mono_int16;
440 filter->analyze = rg_analysis_analyze_stereo_int16;
451 filter->analyze = NULL;
452 GST_ELEMENT_ERROR (filter, CORE, NEGOTIATION,
453 ("Invalid incoming caps: %" GST_PTR_FORMAT, in_caps), (NULL));
459 gst_rg_analysis_transform_ip (GstBaseTransform * base, GstBuffer * buf)
461 GstRgAnalysis *filter = GST_RG_ANALYSIS (base);
464 g_return_val_if_fail (filter->ctx != NULL, GST_FLOW_FLUSHING);
465 g_return_val_if_fail (filter->analyze != NULL, GST_FLOW_NOT_NEGOTIATED);
470 gst_buffer_map (buf, &map, GST_MAP_READ);
471 GST_LOG_OBJECT (filter, "processing buffer of size %" G_GSIZE_FORMAT,
474 rg_analysis_start_buffer (filter->ctx, GST_BUFFER_TIMESTAMP (buf));
475 filter->analyze (filter->ctx, map.data, map.size, filter->depth);
477 gst_buffer_unmap (buf, &map);
483 gst_rg_analysis_sink_event (GstBaseTransform * base, GstEvent * event)
485 GstRgAnalysis *filter = GST_RG_ANALYSIS (base);
487 g_return_val_if_fail (filter->ctx != NULL, TRUE);
489 switch (GST_EVENT_TYPE (event)) {
493 GST_LOG_OBJECT (filter, "received EOS event");
495 gst_rg_analysis_handle_eos (filter);
497 GST_LOG_OBJECT (filter, "passing on EOS event");
503 GstTagList *tag_list;
505 /* The reference to the tag list is borrowed. */
506 gst_event_parse_tag (event, &tag_list);
507 gst_rg_analysis_handle_tags (filter, tag_list);
515 return GST_BASE_TRANSFORM_CLASS (parent_class)->sink_event (base, event);
519 gst_rg_analysis_stop (GstBaseTransform * base)
521 GstRgAnalysis *filter = GST_RG_ANALYSIS (base);
523 g_return_val_if_fail (filter->ctx != NULL, FALSE);
525 rg_analysis_destroy (filter->ctx);
528 GST_LOG_OBJECT (filter, "stopped");
533 /* FIXME: handle global vs. stream-tags? */
535 gst_rg_analysis_handle_tags (GstRgAnalysis * filter,
536 const GstTagList * tag_list)
538 gboolean album_processing = (filter->num_tracks > 0);
541 if (!album_processing)
542 filter->ignore_tags = FALSE;
544 if (filter->skip && album_processing) {
545 GST_DEBUG_OBJECT (filter, "ignoring tag event: skipping album");
547 } else if (filter->skip) {
548 GST_DEBUG_OBJECT (filter, "ignoring tag event: skipping track");
550 } else if (filter->ignore_tags) {
551 GST_DEBUG_OBJECT (filter, "ignoring tag event: cannot skip anyways");
555 filter->has_track_gain |= gst_tag_list_get_double (tag_list,
556 GST_TAG_TRACK_GAIN, &dummy);
557 filter->has_track_peak |= gst_tag_list_get_double (tag_list,
558 GST_TAG_TRACK_PEAK, &dummy);
559 filter->has_album_gain |= gst_tag_list_get_double (tag_list,
560 GST_TAG_ALBUM_GAIN, &dummy);
561 filter->has_album_peak |= gst_tag_list_get_double (tag_list,
562 GST_TAG_ALBUM_PEAK, &dummy);
564 if (!(filter->has_track_gain && filter->has_track_peak)) {
565 GST_DEBUG_OBJECT (filter, "track tags not complete yet");
569 if (album_processing && !(filter->has_album_gain && filter->has_album_peak)) {
570 GST_DEBUG_OBJECT (filter, "album tags not complete yet");
574 if (filter->forced) {
575 GST_DEBUG_OBJECT (filter,
576 "existing tags are sufficient, but processing anyway (forced)");
581 rg_analysis_reset (filter->ctx);
583 if (!album_processing) {
584 GST_DEBUG_OBJECT (filter,
585 "existing tags are sufficient, will not process this track");
587 GST_DEBUG_OBJECT (filter,
588 "existing tags are sufficient, will not process this album");
593 gst_rg_analysis_handle_eos (GstRgAnalysis * filter)
595 gboolean album_processing = (filter->num_tracks > 0);
596 gboolean album_finished = (filter->num_tracks == 1);
597 gboolean album_skipping = album_processing && filter->skip;
599 filter->has_track_gain = FALSE;
600 filter->has_track_peak = FALSE;
602 if (album_finished) {
603 filter->ignore_tags = FALSE;
604 filter->skip = FALSE;
605 filter->has_album_gain = FALSE;
606 filter->has_album_peak = FALSE;
607 } else if (!album_skipping) {
608 filter->skip = FALSE;
611 /* We might have just fully processed a track because it has
612 * incomplete tags. If we do album processing and allow skipping
613 * (not forced), prevent switching to skipping if a later track with
614 * full tags comes along: */
615 if (!filter->forced && album_processing && !album_finished)
616 filter->ignore_tags = TRUE;
619 GstTagList *tag_list = NULL;
620 gboolean track_success;
621 gboolean album_success = FALSE;
623 track_success = gst_rg_analysis_track_result (filter, &tag_list);
626 album_success = gst_rg_analysis_album_result (filter, &tag_list);
627 else if (!album_processing)
628 rg_analysis_reset_album (filter->ctx);
630 if (track_success || album_success) {
631 GST_LOG_OBJECT (filter, "posting tag list with results");
632 gst_tag_list_add (tag_list, GST_TAG_MERGE_APPEND,
633 GST_TAG_REFERENCE_LEVEL, filter->reference_level, NULL);
634 /* This takes ownership of our reference to the list */
635 gst_pad_push_event (GST_BASE_TRANSFORM_SRC_PAD (filter),
636 gst_event_new_tag (tag_list));
641 if (album_processing) {
642 filter->num_tracks--;
644 if (!album_finished) {
645 GST_DEBUG_OBJECT (filter, "album not finished yet (num-tracks is now %u)",
648 GST_DEBUG_OBJECT (filter, "album finished (num-tracks is now 0)");
652 if (album_processing)
653 g_object_notify (G_OBJECT (filter), "num-tracks");
656 /* FIXME: return tag list (lists?) based on input tags.. */
658 gst_rg_analysis_track_result (GstRgAnalysis * filter, GstTagList ** tag_list)
660 gboolean track_success;
661 gdouble track_gain, track_peak;
663 track_success = rg_analysis_track_result (filter->ctx, &track_gain,
667 track_gain += filter->reference_level - RG_REFERENCE_LEVEL;
668 GST_INFO_OBJECT (filter, "track gain is %+.2f dB, peak %.6f", track_gain,
671 GST_INFO_OBJECT (filter, "track was too short to analyze");
675 if (*tag_list == NULL)
676 *tag_list = gst_tag_list_new_empty ();
677 gst_tag_list_add (*tag_list, GST_TAG_MERGE_APPEND,
678 GST_TAG_TRACK_PEAK, track_peak, GST_TAG_TRACK_GAIN, track_gain, NULL);
681 return track_success;
685 gst_rg_analysis_album_result (GstRgAnalysis * filter, GstTagList ** tag_list)
687 gboolean album_success;
688 gdouble album_gain, album_peak;
690 album_success = rg_analysis_album_result (filter->ctx, &album_gain,
694 album_gain += filter->reference_level - RG_REFERENCE_LEVEL;
695 GST_INFO_OBJECT (filter, "album gain is %+.2f dB, peak %.6f", album_gain,
698 GST_INFO_OBJECT (filter, "album was too short to analyze");
702 if (*tag_list == NULL)
703 *tag_list = gst_tag_list_new_empty ();
704 gst_tag_list_add (*tag_list, GST_TAG_MERGE_APPEND,
705 GST_TAG_ALBUM_PEAK, album_peak, GST_TAG_ALBUM_GAIN, album_gain, NULL);
708 return album_success;