3 * Copyright (C) 2008 Rov Juvano <rovjuvano@users.sourceforge.net>
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with this library; if not, write to the
17 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 * Boston, MA 02111-1307, USA.
22 * SECTION:element-scaletempo
24 * Scale tempo while maintaining pitch
25 * (WSOLA-like technique with cross correlation)
26 * Inspired by SoundTouch library by Olli Parviainen
28 * Use Sceletempo to apply playback rates without the chipmunk effect.
31 * <title>Example pipelines</title>
34 * filesrc location=media.ext ! decodebin name=d \
35 * d. ! queue ! audioconvert ! audioresample ! scaletempo ! audioconvert ! audioresample ! autoaudiosink \
36 * d. ! queue ! videoconvert ! autovideosink
40 * playbin uri=... audio_sink="scaletempo ! audioconvert ! audioresample ! autoaudiosink"
42 * When an application sends a seek event with rate != 1.0, Scaletempo applies
43 * the rate change by scaling the tempo without scaling the pitch.
45 * Scaletempo works by producing audio in constant sized chunks
46 * (#GstScaletempo:stride) but consuming chunks proportional to the playback
49 * Scaletempo then smooths the output by blending the end of one stride with
50 * the next (#GstScaletempo:overlap).
52 * Scaletempo smooths the overlap further by searching within the input buffer
53 * for the best overlap position. Scaletempo uses a statistical cross
54 * correlation (roughly a dot-product). Scaletempo consumes most of its CPU
55 * cycles here. One can use the #GstScaletempo:search propery to tune how far
62 * Note: frame = audio key unit (i.e. one sample for each channel)
70 #include <gst/base/gstbasetransform.h>
71 #include <gst/audio/audio.h>
72 #include <string.h> /* for memset */
74 #include "gstscaletempo.h"
76 GST_DEBUG_CATEGORY_STATIC (gst_scaletempo_debug);
77 #define GST_CAT_DEFAULT gst_scaletempo_debug
79 /* Filter signals and args */
94 #define SUPPORTED_CAPS \
96 GST_AUDIO_CAPS_MAKE (GST_AUDIO_NE (F32)) "; " \
97 GST_AUDIO_CAPS_MAKE (GST_AUDIO_NE (S16)) \
100 static GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE ("sink",
105 static GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE ("src",
110 #define DEBUG_INIT(bla) GST_DEBUG_CATEGORY_INIT (gst_scaletempo_debug, "scaletempo", 0, "scaletempo element");
112 #define gst_scaletempo_parent_class parent_class
113 G_DEFINE_TYPE_WITH_CODE (GstScaletempo, gst_scaletempo,
114 GST_TYPE_BASE_TRANSFORM, DEBUG_INIT (0));
116 typedef struct _GstScaletempoPrivate
121 gdouble percent_overlap;
125 guint samples_per_frame; /* AKA number of channels */
126 guint bytes_per_sample;
127 guint bytes_per_frame;
130 gdouble frames_stride_scaled;
131 gdouble frames_stride_error;
133 gdouble bytes_stride_scaled;
134 guint bytes_queue_max;
136 guint bytes_to_slide;
139 guint samples_overlap;
140 guint samples_standing;
142 guint bytes_standing;
143 gpointer buf_overlap;
144 gpointer table_blend;
145 void (*output_overlap) (GstScaletempo * scaletempo, gpointer out_buf,
149 gpointer buf_pre_corr;
150 gpointer table_window;
151 guint (*best_overlap_offset) (GstScaletempo * scaletempo);
153 gint64 segment_start;
155 gboolean reinit_buffers;
156 } GstScaletempoPrivate;
157 #define GST_SCALETEMPO_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), GST_TYPE_SCALETEMPO, GstScaletempoPrivate))
161 best_overlap_offset_float (GstScaletempo * scaletempo)
163 GstScaletempoPrivate *p = GST_SCALETEMPO_GET_PRIVATE (scaletempo);
164 gfloat *pw, *po, *ppc, *search_start;
165 gfloat best_corr = G_MININT;
169 pw = p->table_window;
171 po += p->samples_per_frame;
172 ppc = p->buf_pre_corr;
173 for (i = p->samples_per_frame; i < p->samples_overlap; i++) {
174 *ppc++ = *pw++ * *po++;
177 search_start = (gfloat *) p->buf_queue + p->samples_per_frame;
178 for (off = 0; off < p->frames_search; off++) {
180 gfloat *ps = search_start;
181 ppc = p->buf_pre_corr;
182 for (i = p->samples_per_frame; i < p->samples_overlap; i++) {
183 corr += *ppc++ * *ps++;
185 if (corr > best_corr) {
189 search_start += p->samples_per_frame;
192 return best_off * p->bytes_per_frame;
195 /* buffer padding for loop optimization: sizeof(gint32) * (loop_size - 1) */
196 #define UNROLL_PADDING (4*3)
198 best_overlap_offset_s16 (GstScaletempo * scaletempo)
200 GstScaletempoPrivate *p = GST_SCALETEMPO_GET_PRIVATE (scaletempo);
202 gint16 *po, *search_start;
203 gint64 best_corr = G_MININT64;
208 pw = p->table_window;
210 po += p->samples_per_frame;
211 ppc = p->buf_pre_corr;
212 for (i = p->samples_per_frame; i < p->samples_overlap; i++) {
213 *ppc++ = (*pw++ * *po++) >> 15;
216 search_start = (gint16 *) p->buf_queue + p->samples_per_frame;
217 for (off = 0; off < p->frames_search; off++) {
219 gint16 *ps = search_start;
220 ppc = p->buf_pre_corr;
221 ppc += p->samples_overlap - p->samples_per_frame;
222 ps += p->samples_overlap - p->samples_per_frame;
223 i = -((glong) p->samples_overlap - (glong) p->samples_per_frame);
225 corr += ppc[i + 0] * ps[i + 0];
226 corr += ppc[i + 1] * ps[i + 1];
227 corr += ppc[i + 2] * ps[i + 2];
228 corr += ppc[i + 3] * ps[i + 3];
231 if (corr > best_corr) {
235 search_start += p->samples_per_frame;
238 return best_off * p->bytes_per_frame;
242 output_overlap_float (GstScaletempo * scaletempo,
243 gpointer buf_out, guint bytes_off)
245 GstScaletempoPrivate *p = GST_SCALETEMPO_GET_PRIVATE (scaletempo);
246 gfloat *pout = buf_out;
247 gfloat *pb = p->table_blend;
248 gfloat *po = p->buf_overlap;
249 gfloat *pin = (gfloat *) (p->buf_queue + bytes_off);
251 for (i = 0; i < p->samples_overlap; i++) {
252 *pout++ = *po - *pb++ * (*po - *pin++);
258 output_overlap_s16 (GstScaletempo * scaletempo,
259 gpointer buf_out, guint bytes_off)
261 GstScaletempoPrivate *p = GST_SCALETEMPO_GET_PRIVATE (scaletempo);
262 gint16 *pout = buf_out;
263 gint32 *pb = p->table_blend;
264 gint16 *po = p->buf_overlap;
265 gint16 *pin = (gint16 *) (p->buf_queue + bytes_off);
267 for (i = 0; i < p->samples_overlap; i++) {
268 *pout++ = *po - ((*pb++ * (*po - *pin++)) >> 16);
274 fill_queue (GstScaletempo * scaletempo, GstBuffer * buf_in, guint offset)
276 GstScaletempoPrivate *p = GST_SCALETEMPO_GET_PRIVATE (scaletempo);
277 guint bytes_in = gst_buffer_get_size (buf_in) - offset;
278 guint offset_unchanged = offset;
281 gst_buffer_map (buf_in, &map, GST_MAP_READ);
282 if (p->bytes_to_slide > 0) {
283 if (p->bytes_to_slide < p->bytes_queued) {
284 guint bytes_in_move = p->bytes_queued - p->bytes_to_slide;
285 memmove (p->buf_queue, p->buf_queue + p->bytes_to_slide, bytes_in_move);
286 p->bytes_to_slide = 0;
287 p->bytes_queued = bytes_in_move;
290 p->bytes_to_slide -= p->bytes_queued;
291 bytes_in_skip = MIN (p->bytes_to_slide, bytes_in);
293 p->bytes_to_slide -= bytes_in_skip;
294 offset += bytes_in_skip;
295 bytes_in -= bytes_in_skip;
300 guint bytes_in_copy = MIN (p->bytes_queue_max - p->bytes_queued, bytes_in);
301 memcpy (p->buf_queue + p->bytes_queued, map.data + offset, bytes_in_copy);
302 p->bytes_queued += bytes_in_copy;
303 offset += bytes_in_copy;
305 gst_buffer_unmap (buf_in, &map);
307 return offset - offset_unchanged;
311 reinit_buffers (GstScaletempo * scaletempo)
313 GstScaletempoPrivate *p = GST_SCALETEMPO_GET_PRIVATE (scaletempo);
315 guint frames_overlap;
318 guint frames_stride = p->ms_stride * p->sample_rate / 1000.0;
319 p->bytes_stride = frames_stride * p->bytes_per_frame;
322 frames_overlap = frames_stride * p->percent_overlap;
323 if (frames_overlap < 1) { /* if no overlap */
324 p->bytes_overlap = 0;
325 p->bytes_standing = p->bytes_stride;
326 p->samples_standing = p->bytes_standing / p->bytes_per_sample;
327 p->output_overlap = NULL;
329 guint prev_overlap = p->bytes_overlap;
330 p->bytes_overlap = frames_overlap * p->bytes_per_frame;
331 p->samples_overlap = frames_overlap * p->samples_per_frame;
332 p->bytes_standing = p->bytes_stride - p->bytes_overlap;
333 p->samples_standing = p->bytes_standing / p->bytes_per_sample;
334 p->buf_overlap = g_realloc (p->buf_overlap, p->bytes_overlap);
335 p->table_blend = g_realloc (p->table_blend, p->samples_overlap * 4); /* sizeof (gint32|gfloat) */
336 if (p->bytes_overlap > prev_overlap) {
337 memset ((guint8 *) p->buf_overlap + prev_overlap, 0,
338 p->bytes_overlap - prev_overlap);
341 gint32 *pb = p->table_blend;
343 for (i = 0; i < frames_overlap; i++) {
344 gint32 v = blend / frames_overlap;
345 for (j = 0; j < p->samples_per_frame; j++) {
348 blend += 65535; /* 2^16 */
350 p->output_overlap = output_overlap_s16;
352 gfloat *pb = p->table_blend;
353 gfloat t = (gfloat) frames_overlap;
354 for (i = 0; i < frames_overlap; i++) {
356 for (j = 0; j < p->samples_per_frame; j++) {
360 p->output_overlap = output_overlap_float;
366 (frames_overlap <= 1) ? 0 : p->ms_search * p->sample_rate / 1000.0;
367 if (p->frames_search < 1) { /* if no search */
368 p->best_overlap_offset = NULL;
370 guint bytes_pre_corr = (p->samples_overlap - p->samples_per_frame) * 4; /* sizeof (gint32|gfloat) */
372 g_realloc (p->buf_pre_corr, bytes_pre_corr + UNROLL_PADDING);
373 p->table_window = g_realloc (p->table_window, bytes_pre_corr);
375 gint64 t = frames_overlap;
376 gint32 n = 8589934588LL / (t * t); /* 4 * (2^31 - 1) / t^2 */
379 memset ((guint8 *) p->buf_pre_corr + bytes_pre_corr, 0, UNROLL_PADDING);
380 pw = p->table_window;
381 for (i = 1; i < frames_overlap; i++) {
382 gint32 v = (i * (t - i) * n) >> 15;
383 for (j = 0; j < p->samples_per_frame; j++) {
387 p->best_overlap_offset = best_overlap_offset_s16;
389 gfloat *pw = p->table_window;
390 for (i = 1; i < frames_overlap; i++) {
391 gfloat v = i * (frames_overlap - i);
392 for (j = 0; j < p->samples_per_frame; j++) {
396 p->best_overlap_offset = best_overlap_offset_float;
401 (p->frames_search + frames_stride + frames_overlap) * p->bytes_per_frame;
402 if (p->bytes_queued > new_size) {
403 if (p->bytes_to_slide > p->bytes_queued) {
404 p->bytes_to_slide -= p->bytes_queued;
407 guint new_queued = MIN (p->bytes_queued - p->bytes_to_slide, new_size);
408 memmove (p->buf_queue,
409 p->buf_queue + p->bytes_queued - new_queued, new_queued);
410 p->bytes_to_slide = 0;
411 p->bytes_queued = new_queued;
414 p->bytes_queue_max = new_size;
415 p->buf_queue = g_realloc (p->buf_queue, p->bytes_queue_max);
417 p->bytes_stride_scaled = p->bytes_stride * p->scale;
418 p->frames_stride_scaled = p->bytes_stride_scaled / p->bytes_per_frame;
421 ("%.3f scale, %.3f stride_in, %i stride_out, %i standing, %i overlap, %i search, %i queue, %s mode",
422 p->scale, p->frames_stride_scaled,
423 (gint) (p->bytes_stride / p->bytes_per_frame),
424 (gint) (p->bytes_standing / p->bytes_per_frame),
425 (gint) (p->bytes_overlap / p->bytes_per_frame), p->frames_search,
426 (gint) (p->bytes_queue_max / p->bytes_per_frame),
427 (p->use_int ? "s16" : "float"));
429 p->reinit_buffers = FALSE;
433 /* GstBaseTransform vmethod implementations */
435 gst_scaletempo_transform (GstBaseTransform * trans,
436 GstBuffer * inbuf, GstBuffer * outbuf)
438 GstScaletempo *scaletempo = GST_SCALETEMPO (trans);
439 GstScaletempoPrivate *p = GST_SCALETEMPO_GET_PRIVATE (scaletempo);
441 guint offset_in, bytes_out;
444 gst_buffer_map (outbuf, &omap, GST_MAP_WRITE);
445 pout = (gint8 *) omap.data;
446 offset_in = fill_queue (scaletempo, inbuf, 0);
448 while (p->bytes_queued >= p->bytes_queue_max) {
450 gdouble frames_to_slide;
451 guint frames_to_stride_whole;
454 if (p->output_overlap) {
455 if (p->best_overlap_offset) {
456 bytes_off = p->best_overlap_offset (scaletempo);
458 p->output_overlap (scaletempo, pout, bytes_off);
460 memcpy (pout + p->bytes_overlap,
461 p->buf_queue + bytes_off + p->bytes_overlap, p->bytes_standing);
462 pout += p->bytes_stride;
463 bytes_out += p->bytes_stride;
466 memcpy (p->buf_overlap,
467 p->buf_queue + bytes_off + p->bytes_stride, p->bytes_overlap);
468 frames_to_slide = p->frames_stride_scaled + p->frames_stride_error;
469 frames_to_stride_whole = (gint) frames_to_slide;
470 p->bytes_to_slide = frames_to_stride_whole * p->bytes_per_frame;
471 p->frames_stride_error = frames_to_slide - frames_to_stride_whole;
473 offset_in += fill_queue (scaletempo, inbuf, offset_in);
476 gst_buffer_unmap (outbuf, &omap);
478 gst_buffer_set_size (outbuf, bytes_out);
479 GST_BUFFER_TIMESTAMP (outbuf) =
480 (GST_BUFFER_TIMESTAMP (outbuf) - p->segment_start) / p->scale +
482 //GST_BUFFER_DURATION (outbuf) = bytes_out * GST_SECOND / (p->bytes_per_frame * p->sample_rate);
487 gst_scaletempo_transform_size (GstBaseTransform * trans,
488 GstPadDirection direction,
489 GstCaps * caps, gsize size, GstCaps * othercaps, gsize * othersize)
491 if (direction == GST_PAD_SINK) {
492 GstScaletempo *scaletempo = GST_SCALETEMPO (trans);
493 GstScaletempoPrivate *priv = GST_SCALETEMPO_GET_PRIVATE (scaletempo);
496 if (priv->reinit_buffers)
497 reinit_buffers (scaletempo);
499 bytes_to_out = size + priv->bytes_queued - priv->bytes_to_slide;
500 if (bytes_to_out < (gint) priv->bytes_queue_max) {
503 /* while (total_buffered - stride_length * n >= queue_max) n++ */
504 *othersize = priv->bytes_stride * ((guint) (
505 (bytes_to_out - priv->bytes_queue_max +
506 /* rounding protection */ priv->bytes_per_frame)
507 / priv->bytes_stride_scaled) + 1);
516 gst_scaletempo_sink_event (GstBaseTransform * trans, GstEvent * event)
518 if (GST_EVENT_TYPE (event) == GST_EVENT_SEGMENT) {
519 GstScaletempo *scaletempo = GST_SCALETEMPO (trans);
520 GstScaletempoPrivate *priv = GST_SCALETEMPO_GET_PRIVATE (scaletempo);
523 gst_event_copy_segment (event, &segment);
525 if (priv->scale != segment.rate) {
526 if (ABS (segment.rate - 1.0) < 1e-10) {
528 gst_base_transform_set_passthrough (GST_BASE_TRANSFORM (scaletempo),
531 gst_base_transform_set_passthrough (GST_BASE_TRANSFORM (scaletempo),
533 priv->scale = segment.rate;
534 priv->bytes_stride_scaled = priv->bytes_stride * priv->scale;
535 priv->frames_stride_scaled =
536 priv->bytes_stride_scaled / priv->bytes_per_frame;
537 GST_DEBUG ("%.3f scale, %.3f stride_in, %i stride_out", priv->scale,
538 priv->frames_stride_scaled,
539 (gint) (priv->bytes_stride / priv->bytes_per_frame));
541 priv->bytes_to_slide = 0;
545 if (priv->scale != 1.0) {
546 priv->segment_start = segment.start;
547 segment.applied_rate = priv->scale;
549 //gst_event_unref (event);
551 if (segment.stop != -1) {
552 segment.stop = (segment.stop - segment.start) / segment.applied_rate +
556 event = gst_event_new_segment (&segment);
557 gst_pad_push_event (GST_BASE_TRANSFORM_SRC_PAD (trans), event);
561 return GST_BASE_TRANSFORM_CLASS (parent_class)->sink_event (trans, event);
565 gst_scaletempo_set_caps (GstBaseTransform * trans,
566 GstCaps * incaps, GstCaps * outcaps)
568 GstScaletempo *scaletempo = GST_SCALETEMPO (trans);
569 GstScaletempoPrivate *priv = GST_SCALETEMPO_GET_PRIVATE (scaletempo);
571 gint width, bps, nch, rate;
575 if (!gst_audio_info_from_caps (&info, incaps))
578 nch = GST_AUDIO_INFO_CHANNELS (&info);
579 rate = GST_AUDIO_INFO_RATE (&info);
580 width = GST_AUDIO_INFO_WIDTH (&info);
581 use_int = GST_AUDIO_INFO_IS_INTEGER (&info);
585 GST_DEBUG ("caps: %" GST_PTR_FORMAT ", %d bps", incaps, bps);
587 if (rate != priv->sample_rate
588 || nch != priv->samples_per_frame
589 || bps != priv->bytes_per_sample || use_int != priv->use_int) {
590 priv->sample_rate = rate;
591 priv->samples_per_frame = nch;
592 priv->bytes_per_sample = bps;
593 priv->bytes_per_frame = nch * bps;
594 priv->use_int = use_int;
595 priv->reinit_buffers = TRUE;
602 /* GObject vmethod implementations */
604 gst_scaletempo_get_property (GObject * object,
605 guint prop_id, GValue * value, GParamSpec * pspec)
607 GstScaletempo *scaletempo = GST_SCALETEMPO (object);
608 GstScaletempoPrivate *priv = GST_SCALETEMPO_GET_PRIVATE (scaletempo);
612 g_value_set_double (value, priv->scale);
615 g_value_set_uint (value, priv->ms_stride);
618 g_value_set_double (value, priv->percent_overlap);
621 g_value_set_uint (value, priv->ms_search);
624 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
630 gst_scaletempo_set_property (GObject * object,
631 guint prop_id, const GValue * value, GParamSpec * pspec)
633 GstScaletempo *scaletempo = GST_SCALETEMPO (object);
634 GstScaletempoPrivate *priv = GST_SCALETEMPO_GET_PRIVATE (scaletempo);
638 guint new_value = g_value_get_uint (value);
639 if (priv->ms_stride != new_value) {
640 priv->ms_stride = new_value;
641 priv->reinit_buffers = TRUE;
646 gdouble new_value = g_value_get_double (value);
647 if (priv->percent_overlap != new_value) {
648 priv->percent_overlap = new_value;
649 priv->reinit_buffers = TRUE;
654 guint new_value = g_value_get_uint (value);
655 if (priv->ms_search != new_value) {
656 priv->ms_search = new_value;
657 priv->reinit_buffers = TRUE;
662 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
668 gst_scaletempo_class_init (GstScaletempoClass * klass)
670 GObjectClass *gobject_class = G_OBJECT_CLASS (klass);
671 GstElementClass *gstelement_class = GST_ELEMENT_CLASS (klass);
672 GstBaseTransformClass *basetransform_class = GST_BASE_TRANSFORM_CLASS (klass);
674 g_type_class_add_private (klass, sizeof (GstScaletempoPrivate));
676 gobject_class->get_property = GST_DEBUG_FUNCPTR (gst_scaletempo_get_property);
677 gobject_class->set_property = GST_DEBUG_FUNCPTR (gst_scaletempo_set_property);
679 g_object_class_install_property (gobject_class, PROP_RATE,
680 g_param_spec_double ("rate", "Playback Rate", "Current playback rate",
681 G_MININT, G_MAXINT, 1.0, G_PARAM_READABLE | G_PARAM_STATIC_STRINGS));
683 g_object_class_install_property (gobject_class, PROP_STRIDE,
684 g_param_spec_uint ("stride", "Stride Length",
685 "Length in milliseconds to output each stride", 1, 5000, 30,
686 G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
688 g_object_class_install_property (gobject_class, PROP_OVERLAP,
689 g_param_spec_double ("overlap", "Overlap Length",
690 "Percentage of stride to overlap", 0, 1, .2,
691 G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
693 g_object_class_install_property (gobject_class, PROP_SEARCH,
694 g_param_spec_uint ("search", "Search Length",
695 "Length in milliseconds to search for best overlap position", 0, 500,
696 14, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
698 gst_element_class_add_pad_template (gstelement_class,
699 gst_static_pad_template_get (&src_template));
700 gst_element_class_add_pad_template (gstelement_class,
701 gst_static_pad_template_get (&sink_template));
702 gst_element_class_set_details_simple (gstelement_class, "Scaletempo",
703 "Filter/Effect/Rate",
704 "Sync audio tempo with playback rate",
705 "Rov Juvano <rovjuvano@users.sourceforge.net>");
707 basetransform_class->sink_event =
708 GST_DEBUG_FUNCPTR (gst_scaletempo_sink_event);
709 basetransform_class->set_caps = GST_DEBUG_FUNCPTR (gst_scaletempo_set_caps);
710 basetransform_class->transform_size =
711 GST_DEBUG_FUNCPTR (gst_scaletempo_transform_size);
712 basetransform_class->transform = GST_DEBUG_FUNCPTR (gst_scaletempo_transform);
716 gst_scaletempo_init (GstScaletempo * scaletempo)
718 GstScaletempoPrivate *priv = GST_SCALETEMPO_GET_PRIVATE (scaletempo);
720 priv->ms_stride = 30;
721 priv->percent_overlap = .2;
722 priv->ms_search = 14;
726 priv->sample_rate = 0;
727 priv->frames_stride_error = 0;
728 priv->bytes_stride = 0;
729 priv->bytes_queued = 0;
730 priv->bytes_to_slide = 0;
731 priv->segment_start = 0;