From 385e84596f7aebf3fb01f20a30877e5bbcc9dc89 Mon Sep 17 00:00:00 2001
From: David Schleef <ds@schleef.org>
Date: Tue, 6 Nov 2001 04:15:35 +0000
Subject: [PATCH] New audio resampling library, created from code in the
 audioscale plugin.

Original commit message from CVS:
New audio resampling library, created from code in the audioscale
plugin.
---
 libs/resample/README      |  62 +++++
 libs/resample/dtos.c      | 172 +++++++++++++
 libs/resample/functable.c | 311 +++++++++++++++++++++++
 libs/resample/resample.c  | 523 ++++++++++++++++++++++++++++++++++++++
 libs/resample/resample.h  | 145 +++++++++++
 libs/resample/test.c      | 280 ++++++++++++++++++++
 6 files changed, 1493 insertions(+)
 create mode 100644 libs/resample/README
 create mode 100644 libs/resample/dtos.c
 create mode 100644 libs/resample/functable.c
 create mode 100644 libs/resample/resample.c
 create mode 100644 libs/resample/resample.h
 create mode 100644 libs/resample/test.c

diff --git a/libs/resample/README b/libs/resample/README
new file mode 100644
index 0000000000..f7db11054d
--- /dev/null
+++ b/libs/resample/README
@@ -0,0 +1,62 @@
+
+This is a snapshot of my current work developing an audio
+resampling library.  While working on this library, I started
+writing lots of general purpose functions that should really
+be part of a larger library.  Rather than have a constantly
+changing library, and since the current code is capable, I
+decided to freeze this codebase for use with gstreamer, and
+move active development of the code elsewhere.
+
+The algorithm used is based on Shannon's theorem, which says
+that you can recreate an input signal from equidistant samples
+using a sin(x)/x filter; thus, you can create new samples from
+the regenerated input signal.  Since sin(x)/x is expensive to
+evaluate, an interpolated lookup table is used.  Also, a
+windowing function (1-x^2)^2 is used, which aids the convergence
+of sin(x)/x for lower frequencies at the expense of higher.
+
+There is one tunable parameter, which is the filter length.
+Longer filter lengths are obviously slower, but more accurate.
+There's not much reason to use a filter length longer than 64,
+since other approximations start to dominate.  Filter lengths
+as short as 8 are audially acceptable, but should not be
+considered for serious work.
+
+Performance:  A PowerPC G4 at 400 Mhz can resample 2 audio
+channels at almost 10x speed with a filter length of 64, without
+using Altivec extensions.  (My goal was 10x speed, which I almost
+reached.  Maybe later.)
+
+Limitations: Currently only supports streams in the form of
+interleaved signed 16-bit samples.
+
+The test.c program is a simple regression test.  It creates a
+test input pattern (1 sec at 48 khz) that is a frequency ramp
+from 0 to 24000 hz, and then converts it to 44100 hz using a
+filter length of 64.  It then compares the result to the same
+pattern generated at 44100 hz, and outputs the result to the
+file "out".
+
+A graph of the correct output should have field 2 and field 4
+almost equal (plus/minus 1) up to about sample 40000 (which
+corresponds to 20 khz), and then field 2 should be close to 0
+above that.  Running the test program will print to stdout
+something like the following:
+
+  time 0.112526
+  average error 10k=0.4105 22k=639.34
+
+The average error is RMS error over the range [0-10khz] and
+[0-22khz], and is expressed in sample values, for an input
+amplitude of 16000.  Note that RMS errors below 1.0 can't
+really be compared, but basically this shows that below
+10 khz, the resampler is nearly perfect.  Most of the error
+is concentrated above 20 khz.
+
+If the average error is significantly larger after modifying
+the code, it's probably not good.
+
+
+
+dave...
+
diff --git a/libs/resample/dtos.c b/libs/resample/dtos.c
new file mode 100644
index 0000000000..ce4a1c01c6
--- /dev/null
+++ b/libs/resample/dtos.c
@@ -0,0 +1,172 @@
+/* Resampling library
+ * Copyright (C) <2001> David A. Schleef <ds@schleef.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+
+#include <string.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+//#include <ml.h>
+#include <resample.h>
+
+
+
+#define short_to_double_table
+#define short_to_double_altivec
+#define short_to_double_unroll
+
+#ifdef short_to_double_table
+float ints_high[256];
+float ints_low[256];
+
+void conv_double_short_table(double *dest, short *src, int n)
+{
+	static int init = 0;
+	int i;
+	unsigned int idx;
+	if(!init){
+		for(i=0;i<256;i++){
+			ints_high[i]=256.0*((i<128)?i:i-256);
+			ints_low[i]=i;
+		}
+		init = 1;
+	}
+
+	if(n&1){
+		idx = (unsigned short)*src++;
+		*dest++ = ints_high[(idx>>8)] + ints_low[(idx&0xff)];
+		n-=1;
+	}
+	for(i=0;i<n;i+=2){
+		idx = (unsigned short)*src++;
+		*dest++ = ints_high[(idx>>8)] + ints_low[(idx&0xff)];
+		idx = (unsigned short)*src++;
+		*dest++ = ints_high[(idx>>8)] + ints_low[(idx&0xff)];
+	}
+}
+
+#endif
+
+#ifdef short_to_double_unroll
+void conv_double_short_unroll(double *dest, short *src, int n)
+{
+	if(n&1){
+		*dest++ = *src++;
+		n--;
+	}
+	if(n&2){
+		*dest++ = *src++;
+		*dest++ = *src++;
+		n-=2;
+	}
+	while(n>0){
+		*dest++ = *src++;
+		*dest++ = *src++;
+		*dest++ = *src++;
+		*dest++ = *src++;
+		n-=4;
+	}
+}
+#endif
+
+void conv_double_short_ref(double *dest, short *src, int n)
+{
+	int i;
+	for(i=0;i<n;i++){
+		dest[i]=src[i];
+	}
+}
+
+#ifdef short_to_double_altivec
+static union { int i[4]; float f[4]; } av_tmp __attribute__ ((__aligned__ (16)));
+
+void conv_double_short_altivec(double *dest, short *src, int n)
+{
+	int i;
+
+	for(i=0;i<n;i+=4){
+		av_tmp.i[0] = src[0];
+		av_tmp.i[1] = src[1];
+		av_tmp.i[2] = src[2];
+		av_tmp.i[3] = src[3];
+
+		asm(
+		"	lvx 0,0,%0\n"
+		"	vcfsx 1,0,0\n"
+		"	stvx 1,0,%0\n"
+		: : "r" (&av_tmp)
+		);
+
+		dest[0]=av_tmp.f[0];
+		dest[1]=av_tmp.f[1];
+		dest[2]=av_tmp.f[2];
+		dest[3]=av_tmp.f[3];
+		src += 4;
+		dest += 4;
+	}
+}
+#endif
+
+
+
+/* double to short */
+
+void conv_short_double_ref(short *dest, double *src, int n)
+{
+	int i;
+	double x;
+
+	for(i=0;i<n;i++){
+		x = *src++;
+		if(x<-32768.0)x=-32768.0;
+		if(x>32767.0)x=32767.0;
+		*dest++ = rint(x);
+	}
+}
+
+void conv_short_double_ppcasm(short *dest, double *src, int n)
+{
+	int tmp[2];
+	double min = -32768.0;
+	double max = 32767.0;
+	double ftmp0, ftmp1;
+
+	asm __volatile__(
+	"\taddic. %3,%3,-8\n"
+	"\taddic. %6,%6,-2\n"
+	"loop:\n"
+	"\tlfdu %0,8(%3)\n"
+	"\tfsub %1,%0,%4\n"
+	"\tfsel %0,%1,%0,%4\n"
+	"\tfsub %1,%0,%5\n"
+	"\tfsel %0,%1,%5,%0\n"
+	"\tfctiw %1,%0\n"
+	"\taddic. 5,5,-1\n"
+	"\tstfd %1,0(%2)\n"
+	"\tlhz 9,6(%2)\n"
+	"\tsthu 9,2(%6)\n"
+	"\tbne loop\n"
+	: "=&f" (ftmp0), "=&f" (ftmp1)
+	: "b" (tmp), "r" (src), "f" (min), "f" (max), "r" (dest)
+	: "r9", "r5" );
+
+}
+
+
diff --git a/libs/resample/functable.c b/libs/resample/functable.c
new file mode 100644
index 0000000000..d61efca498
--- /dev/null
+++ b/libs/resample/functable.c
@@ -0,0 +1,311 @@
+/* Resampling library
+ * Copyright (C) <2001> David A. Schleef <ds@schleef.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+
+#include <string.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <resample.h>
+
+
+
+double functable_sinc(void *p,double x)
+{
+	if(x==0)return 1;
+	return sin(x)/x;
+}
+
+double functable_dsinc(void *p,double x)
+{
+	if(x==0)return 0;
+	return cos(x)/x - sin(x)/(x*x);
+}
+
+double functable_window_boxcar(void *p,double x)
+{
+	if(x<-1 || x>1)return 0;
+	return 1;
+}
+
+double functable_window_dboxcar(void *p,double x)
+{
+	return 0;
+}
+
+double functable_window_std(void *p,double x)
+{
+	if(x<-1 || x>1)return 0;
+	return (1-x*x)*(1-x*x);
+}
+
+double functable_window_dstd(void *p,double x)
+{
+	if(x<-1 || x>1)return 0;
+	return -4*x*(1-x*x);
+}
+
+
+
+void functable_init(functable_t *t)
+{
+	int i;
+	double x;
+
+	t->fx = malloc(sizeof(double)*(t->len+1));
+	t->fdx = malloc(sizeof(double)*(t->len+1));
+
+	t->invoffset = 1.0 / t->offset;
+
+	for(i=0;i<t->len+1;i++){
+		x = t->start + t->offset * i;
+		x *= t->scale;
+
+		t->fx[i] = t->func_x(t->priv,x);
+		t->fdx[i] = t->scale * t->func_dx(t->priv,x);
+	}
+	if(t->func2_x){
+		double f1x,f1dx;
+		double f2x,f2dx;
+
+		for(i=0;i<t->len+1;i++){
+			x = t->start + t->offset * i;
+			x *= t->scale2;
+
+			f2x = t->func2_x(t->priv,x);
+			f2dx = t->scale2 * t->func2_dx(t->priv,x);
+
+			f1x = t->fx[i];
+			f1dx = t->fdx[i];
+
+			t->fx[i] = f1x * f2x;
+			t->fdx[i] = f1x * f2dx + f1dx * f2x;
+		}
+	}
+}
+
+double functable_eval(functable_t *t,double x)
+{
+	int i;
+	double f0, f1, w0, w1;
+	double x2, x3;
+	double w;
+
+	if(x<t->start || x>(t->start+(t->len+1)*t->offset)){
+		printf("x out of range %g\n",x);
+	}
+	x -= t->start;
+	x /= t->offset;
+	i = floor(x);
+	x -= i;
+
+	x2 = x * x;
+	x3 = x2 * x;
+
+	f1 = 3 * x2 - 2 * x3;
+	f0 = 1 - f1;
+	w0 = (x - 2 * x2 + x3) * t->offset;
+	w1 = (-x2 + x3) * t->offset;
+
+	//printf("i=%d x=%g f0=%g f1=%g w0=%g w1=%g\n",i,x,f0,f1,w0,w1);
+
+	w = t->fx[i] * f0 + t->fx[i + 1] * f1 +
+		t->fdx[i] * w0 + t->fdx[i + 1] * w1;
+
+	//w = t->fx[i] * (1-x) + t->fx[i+1] * x;
+
+	return w;
+}
+
+
+double functable_fir(functable_t *t, double x, int n, double *data, int len)
+{
+	int i,j;
+	double f0, f1, w0, w1;
+	double x2, x3;
+	double w;
+	double sum;
+
+	x -= t->start;
+	x /= t->offset;
+	i = floor(x);
+	x -= i;
+
+	x2 = x * x;
+	x3 = x2 * x;
+
+	f1 = 3 * x2 - 2 * x3;
+	f0 = 1 - f1;
+	w0 = (x - 2 * x2 + x3) * t->offset;
+	w1 = (-x2 + x3) * t->offset;
+
+	sum = 0;
+	for(j=0;j<len;j++){
+		w = t->fx[i] * f0 + t->fx[i + 1] * f1 +
+			t->fdx[i] * w0 + t->fdx[i + 1] * w1;
+		sum += data[j*2] * w;
+		i += n;
+	}
+
+	return sum;
+}
+
+void functable_fir2(functable_t *t, double *r0, double *r1, double x,
+	int n, double *data, int len)
+{
+	int i,j;
+	double f0, f1, w0, w1;
+	double x2, x3;
+	double w;
+	double sum0, sum1;
+	double floor_x;
+
+	x -= t->start;
+	x *= t->invoffset;
+	floor_x = floor(x);
+	i = floor_x;
+	x -= floor_x;
+
+	x2 = x * x;
+	x3 = x2 * x;
+
+	f1 = 3 * x2 - 2 * x3;
+	f0 = 1 - f1;
+	w0 = (x - 2 * x2 + x3) * t->offset;
+	w1 = (-x2 + x3) * t->offset;
+
+	sum0 = 0;
+	sum1 = 0;
+	for(j=0;j<len;j++){
+		w = t->fx[i] * f0 + t->fx[i + 1] * f1 +
+			t->fdx[i] * w0 + t->fdx[i + 1] * w1;
+		sum0 += data[j*2] * w;
+		sum1 += data[j*2+1] * w;
+		i += n;
+
+#define unroll2
+#define unroll3
+#define unroll4
+#ifdef unroll2
+		j++;
+
+		w = t->fx[i] * f0 + t->fx[i + 1] * f1 +
+			t->fdx[i] * w0 + t->fdx[i + 1] * w1;
+		sum0 += data[j*2] * w;
+		sum1 += data[j*2+1] * w;
+		i += n;
+#endif
+#ifdef unroll3
+		j++;
+
+		w = t->fx[i] * f0 + t->fx[i + 1] * f1 +
+			t->fdx[i] * w0 + t->fdx[i + 1] * w1;
+		sum0 += data[j*2] * w;
+		sum1 += data[j*2+1] * w;
+		i += n;
+#endif
+#ifdef unroll4
+		j++;
+
+		w = t->fx[i] * f0 + t->fx[i + 1] * f1 +
+			t->fdx[i] * w0 + t->fdx[i + 1] * w1;
+		sum0 += data[j*2] * w;
+		sum1 += data[j*2+1] * w;
+		i += n;
+#endif
+	}
+
+	*r0 = sum0;
+	*r1 = sum1;
+}
+
+
+
+#ifdef unused
+void functable_fir2_altivec(functable_t *t, float *r0, float *r1,
+	double x, int n, float *data, int len)
+{
+	int i,j;
+	double f0, f1, w0, w1;
+	double x2, x3;
+	double w;
+	double sum0, sum1;
+	double floor_x;
+
+	x -= t->start;
+	x *= t->invoffset;
+	floor_x = floor(x);
+	i = floor_x;
+	x -= floor_x;
+
+	x2 = x * x;
+	x3 = x2 * x;
+
+	f1 = 3 * x2 - 2 * x3;
+	f0 = 1 - f1;
+	w0 = (x - 2 * x2 + x3) * t->offset;
+	w1 = (-x2 + x3) * t->offset;
+
+	sum0 = 0;
+	sum1 = 0;
+	for(j=0;j<len;j++){
+		// t->fx, t->fdx needs to be multiplexed by n
+		// we need 5 consecutive floats, which fit into 2 vecs
+		// load v0, t->fx[i]
+		// load v1, t->fx[i+n]
+		// v2 = v0 (not correct)
+		// v3 = (v0>>32) || (v1<<3*32) (not correct)
+		//
+		// load v4, t->dfx[i]
+		// load v5, t->dfx[i+n]
+		// v6 = v4 (not correct)
+		// v7 = (v4>>32) || (v5<<3*32) (not correct)
+		// 
+		// v8 = splat(f0)
+		// v9 = splat(f1)
+		// v10 = splat(w0)
+		// v11 = splat(w1)
+		//
+		// v12 = v2 * v8
+		// v12 += v3 * v9
+		// v12 += v6 * v10
+		// v12 += v7 * v11
+		
+		w = t->fx[i] * f0 + t->fx[i + 1] * f1 +
+			t->fdx[i] * w0 + t->fdx[i + 1] * w1;
+		
+		// v13 = data[j*2]
+		// v14 = data[j*2+4]
+		// v15 = deinterlace_high(v13,v14)
+		// v16 = deinterlace_low(v13,v14)
+		// (sum0) v17 += multsum(v13,v15)
+		// (sum1) v18 += multsum(v14,v16)
+		
+		sum0 += data[j*2] * w;
+		sum1 += data[j*2+1] * w;
+		i += n;
+
+	}
+
+	*r0 = sum0;
+	*r1 = sum1;
+}
+#endif
+
diff --git a/libs/resample/resample.c b/libs/resample/resample.c
new file mode 100644
index 0000000000..019858bb21
--- /dev/null
+++ b/libs/resample/resample.c
@@ -0,0 +1,523 @@
+/* Resampling library
+ * Copyright (C) <2001> David A. Schleef <ds@schleef.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+
+#include <string.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <resample.h>
+
+inline double sinc(double x)
+{
+	if(x==0)return 1;
+	return sin(x) / x;
+}
+
+inline double window_func(double x)
+{
+	x = 1 - x*x;
+	return x*x;
+}
+
+signed short double_to_s16(double x)
+{
+	if(x<-32768){
+		printf("clipped\n");
+		return -32768;
+	}
+	if(x>32767){
+		printf("clipped\n");
+		return -32767;
+	}
+	return rint(x);
+}
+
+signed short double_to_s16_ppcasm(double x)
+{
+	if(x<-32768){
+		return -32768;
+	}
+	if(x>32767){
+		return -32767;
+	}
+	return rint(x);
+}
+
+static void resample_sinc_ft(resample_t * r);
+
+void resample_init(resample_t * r)
+{
+	r->i_start = 0;
+	if(r->filter_length&1){
+		r->o_start = 0;
+	}else{
+		r->o_start = r->o_inc * 0.5;
+	}
+
+	memset(r->acc, 0, sizeof(r->acc));
+
+	resample_reinit(r);
+}
+
+void resample_reinit(resample_t * r)
+{
+	/* i_inc is the number of samples that the output increments for
+	 * each input sample.  o_inc is the opposite. */
+	r->i_inc = (double) r->o_rate / r->i_rate;
+	r->o_inc = (double) r->i_rate / r->o_rate;
+
+	r->halftaps = (r->filter_length - 1.0) * 0.5;
+
+	switch (r->method) {
+	default:
+	case RESAMPLE_NEAREST:
+		r->scale = resample_nearest;
+		break;
+	case RESAMPLE_BILINEAR:
+		r->scale = resample_bilinear;
+		break;
+	case RESAMPLE_SINC_SLOW:
+		r->scale = resample_sinc;
+		break;
+	case RESAMPLE_SINC:
+		r->scale = resample_sinc_ft;
+		break;
+	}
+}
+
+/*
+ * Prepare to be confused.
+ *
+ * We keep a "timebase" that is based on output samples.  The zero
+ * of the timebase cooresponds to the next output sample that will
+ * be written.
+ *
+ * i_start is the "time" that corresponds to the first input sample
+ * in an incoming buffer.  Since the output depends on input samples
+ * ahead in time, i_start will tend to be around halftaps.
+ *
+ * i_start_buf is the time of the first sample in the temporary
+ * buffer.
+ */
+void resample_scale(resample_t * r, void *i_buf, unsigned int i_size)
+{
+	int o_size;
+
+	r->i_buf = i_buf;
+
+	r->i_samples = i_size / 2 / r->channels;
+
+	r->i_start_buf = r->i_start - r->filter_length * r->i_inc;
+
+	/* i_start is the offset (in a given output sample) that is the
+	 * beginning of the current input buffer */
+	r->i_end = r->i_start + r->i_inc * r->i_samples;
+
+	r->o_samples = floor(r->i_end - r->halftaps * r->i_inc);
+
+	o_size = r->o_samples * r->channels * 2;
+	r->o_buf = r->get_buffer(r->priv, o_size);
+
+	if(r->verbose){
+		printf("resample_scale: i_buf=%p i_size=%d\n",
+			i_buf,i_size);
+		printf("resample_scale: i_samples=%d o_samples=%d i_inc=%g o_buf=%p\n",
+			r->i_samples, r->o_samples, r->i_inc, r->o_buf);
+		printf("resample_scale: i_start=%g i_end=%g o_start=%g\n",
+			r->i_start, r->i_end, r->o_start);
+	}
+
+	if ((r->filter_length + r->i_samples)*2*r->channels > r->buffer_len) {
+		//int size = taps * 2 * r->channels;
+		int size = (r->filter_length + r->i_samples) * sizeof(double) * r->channels;
+
+		if(r->verbose){
+			printf("resample temp buffer size=%d\n",size);
+		}
+		if(r->buffer)free(r->buffer);
+		r->buffer_len = size;
+		r->buffer = malloc(size);
+		memset(r->buffer, 0, size);
+	}
+
+	conv_double_short(
+		r->buffer + r->filter_length * sizeof(double) * r->channels,
+		r->i_buf, r->i_samples * r->channels);
+
+	r->scale(r);
+
+	memcpy(r->buffer,
+		r->buffer + r->i_samples * sizeof(double) * r->channels,
+		r->filter_length * sizeof(double) * r->channels);
+
+	/* updating times */
+	r->i_start += r->i_samples * r->i_inc;
+	r->o_start += r->o_samples * r->o_inc - r->i_samples;
+	
+	/* adjusting timebase zero */
+	r->i_start -= r->o_samples;
+}
+
+void resample_nearest(resample_t * r)
+{
+	signed short *i_ptr, *o_ptr;
+	int i_count = 0;
+	double a;
+	int i;
+
+	i_ptr = (signed short *) r->i_buf;
+	o_ptr = (signed short *) r->o_buf;
+
+	a = r->o_start;
+	i_count = 0;
+#define SCALE_LOOP(COPY,INC) \
+	for (i = 0; i < r->o_samples; i++) {	\
+		COPY;							\
+		a += r->o_inc;						\
+		while (a >= 1) {				\
+			a -= 1;						\
+			i_ptr+=INC;					\
+			i_count++;					\
+		}								\
+		o_ptr+=INC;						\
+   	}
+
+	switch (r->channels) {
+	case 1:
+		SCALE_LOOP(o_ptr[0] = i_ptr[0], 1);
+		break;
+	case 2:
+		SCALE_LOOP(o_ptr[0] = i_ptr[0];
+			   o_ptr[1] = i_ptr[1], 2);
+		break;
+	default:
+	{
+		int n, n_chan = r->channels;
+
+		SCALE_LOOP(for (n = 0; n < n_chan; n++) o_ptr[n] =
+			   i_ptr[n], n_chan);
+	}
+	}
+	if (i_count != r->i_samples) {
+		printf("handled %d in samples (expected %d)\n", i_count,
+		       r->i_samples);
+	}
+}
+
+void resample_bilinear(resample_t * r)
+{
+	signed short *i_ptr, *o_ptr;
+	int o_count = 0;
+	double b;
+	int i;
+	double acc0, acc1;
+
+	i_ptr = (signed short *) r->i_buf;
+	o_ptr = (signed short *) r->o_buf;
+
+	acc0 = r->acc[0];
+	acc1 = r->acc[1];
+	b = r->i_start;
+	for (i = 0; i < r->i_samples; i++) {
+		b += r->i_inc;
+		//printf("in %d\n",i_ptr[0]);
+		if(b>=2){
+			printf("not expecting b>=2\n");
+		}
+		if (b >= 1) {
+			acc0 += (1.0 - (b-r->i_inc)) * i_ptr[0];
+			acc1 += (1.0 - (b-r->i_inc)) * i_ptr[1];
+
+			o_ptr[0] = rint(acc0);
+			//printf("out %d\n",o_ptr[0]);
+			o_ptr[1] = rint(acc1);
+			o_ptr += 2;
+			o_count++;
+
+			b -= 1.0;
+
+			acc0 = b * i_ptr[0];
+			acc1 = b * i_ptr[1];
+		} else {
+			acc0 += i_ptr[0] * r->i_inc;
+			acc1 += i_ptr[1] * r->i_inc;
+		}
+		i_ptr += 2;
+	}
+	r->acc[0] = acc0;
+	r->acc[1] = acc1;
+
+	if (o_count != r->o_samples) {
+		printf("handled %d out samples (expected %d)\n", o_count,
+		       r->o_samples);
+	}
+}
+
+void resample_sinc_slow(resample_t * r)
+{
+	signed short *i_ptr, *o_ptr;
+	int i, j;
+	double c0, c1;
+	double a;
+	int start;
+	double center;
+	double weight;
+
+	if (!r->buffer) {
+		int size = r->filter_length * 2 * r->channels;
+
+		printf("resample temp buffer\n");
+		r->buffer = malloc(size);
+		memset(r->buffer, 0, size);
+	}
+
+	i_ptr = (signed short *) r->i_buf;
+	o_ptr = (signed short *) r->o_buf;
+
+	a = r->i_start;
+#define GETBUF(index,chan) (((index)<0) \
+			? ((short *)(r->buffer))[((index)+r->filter_length)*2+(chan)] \
+			: i_ptr[(index)*2+(chan)])
+	{
+		double sinx, cosx, sind, cosd;
+		double x, d;
+		double t;
+
+		for (i = 0; i < r->o_samples; i++) {
+			start = floor(a) - r->filter_length;
+			center = a - r->halftaps;
+			x = M_PI * (start - center) * r->o_inc;
+			sinx = sin(M_PI * (start - center) * r->o_inc);
+			cosx = cos(M_PI * (start - center) * r->o_inc);
+			d = M_PI * r->o_inc;
+			sind = sin(M_PI * r->o_inc);
+			cosd = cos(M_PI * r->o_inc);
+			c0 = 0;
+			c1 = 0;
+			for (j = 0; j < r->filter_length; j++) {
+				weight = (x==0)?1:(sinx/x);
+//printf("j %d sin %g cos %g\n",j,sinx,cosx);
+//printf("j %d sin %g x %g sinc %g\n",j,sinx,x,weight);
+				c0 += weight * GETBUF((start + j), 0);
+				c1 += weight * GETBUF((start + j), 1);
+				t = cosx * cosd - sinx * sind;
+				sinx = cosx * sind + sinx * cosd;
+				cosx = t;
+				x += d;
+			}
+			o_ptr[0] = rint(c0);
+			o_ptr[1] = rint(c1);
+			o_ptr += 2;
+			a += r->o_inc;
+		}
+	}
+
+	memcpy(r->buffer,
+	       i_ptr + (r->i_samples - r->filter_length) * r->channels,
+	       r->filter_length * 2 * r->channels);
+}
+
+void resample_sinc(resample_t * r)
+{
+	double *ptr;
+	signed short *o_ptr;
+	int i, j;
+	double c0, c1;
+	double a;
+	int start;
+	double center;
+	double weight;
+	double x0, x, d;
+	double scale;
+
+	ptr = (double *) r->buffer;
+	o_ptr = (signed short *) r->o_buf;
+
+	/* scale provides a cutoff frequency for the low
+	 * pass filter aspects of sinc().  scale=M_PI
+	 * will cut off at the input frequency, which is
+	 * good for up-sampling, but will cause aliasing
+	 * for downsampling.  Downsampling needs to be
+	 * cut off at o_rate, thus scale=M_PI*r->i_inc. */
+	/* actually, it needs to be M_PI*r->i_inc*r->i_inc.
+	 * Need to research why. */
+	scale = M_PI*r->i_inc;
+	for (i = 0; i < r->o_samples; i++) {
+		a = r->o_start + i * r->o_inc;
+		start = floor(a - r->halftaps);
+//printf("%d: a=%g start=%d end=%d\n",i,a,start,start+r->filter_length-1);
+		center = a;
+		//x = M_PI * (start - center) * r->o_inc;
+		//d = M_PI * r->o_inc;
+		//x = (start - center) * r->o_inc;
+		x0 = (start - center) * r->o_inc;
+		d = r->o_inc;
+		c0 = 0;
+		c1 = 0;
+		for (j = 0; j < r->filter_length; j++) {
+			x = x0 + d * j;
+			weight = sinc(x*scale*r->i_inc)*scale/M_PI;
+			weight *= window_func(x/r->halftaps*r->i_inc);
+			c0 += weight * ptr[(start + j + r->filter_length)*2 + 0];
+			c1 += weight * ptr[(start + j + r->filter_length)*2 + 1];
+		}
+		o_ptr[0] = double_to_s16(c0);
+		o_ptr[1] = double_to_s16(c1);
+		o_ptr += 2;
+	}
+}
+
+
+
+
+/*
+ * Resampling audio is best done using a sinc() filter.
+ *
+ *
+ *  out[t] = Sum( in[t'] * sinc((t-t')/delta_t), all t')
+ *
+ * The immediate problem with this algorithm is that it involves a
+ * sum over an infinite number of input samples, both in the past
+ * and future.  Note that even though sinc(x) is bounded by 1/x,
+ * and thus decays to 0 for large x, since sum(x,{x=0,1..,n}) diverges
+ * as log(n), we need to be careful about convergence.  This is
+ * typically done by using a windowing function, which also makes
+ * the sum over a finite number of input samples.
+ *
+ * The next problem is computational:  sinc(), and especially
+ * sinc() multiplied by a non-trivial windowing function is expensive
+ * to calculate, and also difficult to find SIMD optimizations.  Since
+ * the time increment on input and output is different, it is not
+ * possible to use a FIR filter, because the taps would have to be
+ * recalculated for every t.
+ *
+ * To get around the expense of calculating sinc() for every point,
+ * we pre-calculate sinc() at a number of points, and then interpolate
+ * for the values we want in calculations.  The interpolation method
+ * chosen is bi-cubic, which requires both the evalated function and
+ * its derivative at every pre-sampled point.  Also, if the sampled
+ * points are spaced commensurate with the input delta_t, we notice
+ * that the interpolating weights are the same for every input point.
+ * This decreases the number of operations to 4 multiplies and 4 adds
+ * for each tap, regardless of the complexity of the filtering function.
+ * 
+ * At this point, it is possible to rearrange the problem as the sum
+ * of 4 properly weghted FIR filters.  Typical SIMD computation units
+ * are highly optimized for FIR filters, making long filter lengths
+ * reasonable.
+ */
+
+static functable_t *ft;
+
+double out_tmp[10000];
+
+static void resample_sinc_ft(resample_t * r)
+{
+	double *ptr;
+	signed short *o_ptr;
+	int i;
+	//int j;
+	double c0, c1;
+	//double a;
+	double start_f, start_x;
+	int start;
+	double center;
+	//double weight;
+	double x, d;
+	double scale;
+	int n = 4;
+
+	scale = r->i_inc;	// cutoff at 22050
+	//scale = 1.0;		// cutoff at 24000
+	//scale = r->i_inc * 0.5;	// cutoff at 11025
+
+	if(!ft){
+		ft = malloc(sizeof(*ft));
+		memset(ft,0,sizeof(*ft));
+
+		ft->len = (r->filter_length + 2) * n;
+		ft->offset = 1.0 / n;
+		ft->start = - ft->len * 0.5 * ft->offset;
+
+		ft->func_x = functable_sinc;
+		ft->func_dx = functable_dsinc;
+		ft->scale = M_PI * scale;
+
+		ft->func2_x = functable_window_std;
+		ft->func2_dx = functable_window_dstd;
+		ft->scale2 = 1.0 / r->halftaps;
+	
+		functable_init(ft);
+
+		//printf("len=%d offset=%g start=%g\n",ft->len,ft->offset,ft->start);
+	}
+
+	ptr = r->buffer;
+	o_ptr = (signed short *) r->o_buf;
+
+	center = r->o_start;
+	start_x = center - r->halftaps;
+	start_f = floor(start_x);
+	start_x -= start_f;
+	start = start_f;
+	for (i = 0; i < r->o_samples; i++) {
+		//start_f = floor(center - r->halftaps);
+//printf("%d: a=%g start=%d end=%d\n",i,a,start,start+r->filter_length-1);
+		x = start_f - center;
+		d = 1;
+		c0 = 0;
+		c1 = 0;
+//#define slow
+#ifdef slow
+		for (j = 0; j < r->filter_length; j++) {
+			weight = functable_eval(ft,x)*scale;
+			//weight = sinc(M_PI * scale * x)*scale*r->i_inc;
+			//weight *= window_func(x / r->halftaps);
+			c0 += weight * ptr[(start + j + r->filter_length)*2 + 0];
+			c1 += weight * ptr[(start + j + r->filter_length)*2 + 1];
+			x += d;
+		}
+#else
+		functable_fir2(ft,
+			&c0,&c1,
+			x, n,
+			ptr+(start + r->filter_length)*2,
+			r->filter_length);
+		c0 *= scale;
+		c1 *= scale;
+#endif
+
+		out_tmp[2 * i + 0] = c0;
+		out_tmp[2 * i + 1] = c1;
+		center += r->o_inc;
+		start_x += r->o_inc;
+		while(start_x>=1.0){
+			start_f++;
+			start_x -= 1.0;
+			start++;
+		}
+	}
+
+	conv_short_double(r->o_buf,out_tmp,2 * r->o_samples);
+	//o_ptr[0] = double_to_s16(c0);
+	//o_ptr[1] = double_to_s16(c1);
+}
+
diff --git a/libs/resample/resample.h b/libs/resample/resample.h
new file mode 100644
index 0000000000..e5347f0868
--- /dev/null
+++ b/libs/resample/resample.h
@@ -0,0 +1,145 @@
+/* Resampling library
+ * Copyright (C) <2001> David Schleef <ds@schleef.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+
+#ifndef __RESAMPLE_H__
+#define __RESAMPLE_H__
+
+typedef struct resample_s resample_t;
+
+struct resample_s {
+	/* parameters */
+
+	int method;
+	int channels;
+	int verbose;
+
+	int filter_length;
+
+	double i_rate;
+	double o_rate;
+
+	void *priv;
+
+	void *(*get_buffer)(void *priv, unsigned int size);
+
+	/* internal parameters */
+
+	double halftaps;
+
+	/* filter state */
+
+	void *buffer;
+	int buffer_len;
+
+	double i_start;
+	double o_start;
+
+	double i_start_buf;
+	double i_end_buf;
+
+	double i_inc;
+	double o_inc;
+
+	double i_end;
+	double o_end;
+
+	int i_samples;
+	int o_samples;
+
+	void *i_buf, *o_buf;
+
+	double acc[10];
+
+	/* methods */
+	void (*scale)(resample_t *r);
+
+	double ack;
+};
+
+enum{
+	RESAMPLE_NEAREST = 0,
+	RESAMPLE_BILINEAR,
+	RESAMPLE_SINC_SLOW,
+	RESAMPLE_SINC,
+};
+
+void resample_init(resample_t *r);
+void resample_reinit(resample_t *r);
+
+void resample_scale(resample_t *r, void *i_buf, unsigned int size);
+
+void resample_nearest(resample_t *r);
+void resample_bilinear(resample_t *r);
+void resample_sinc(resample_t *r);
+void resample_sinc_slow(resample_t *r);
+
+
+typedef struct functable_s functable_t;
+struct functable_s {
+	double start;
+	double offset;
+	int len;
+
+	double invoffset;
+
+	double scale;
+	double scale2;
+
+	double (*func_x)(void *,double x);
+	double (*func_dx)(void *,double x);
+
+	double (*func2_x)(void *,double x);
+	double (*func2_dx)(void *,double x);
+
+	double *fx;
+	double *fdx;
+
+	void *priv;
+};
+
+void functable_init(functable_t *t);
+double functable_eval(functable_t *t,double x);
+
+double functable_fir(functable_t *t,double x0,int n,double *data,int len);
+void functable_fir2(functable_t *t,double *r0, double *r1, double x0,
+	int n,double *data,int len);
+
+double functable_sinc(void *p, double x);
+double functable_dsinc(void *p, double x);
+double functable_window_std(void *p, double x);
+double functable_window_dstd(void *p, double x);
+double functable_window_boxcar(void *p, double x);
+double functable_window_dboxcar(void *p, double x);
+
+/* math lib stuff */
+
+void conv_double_short_table(double *dest, short *src, int n);
+void conv_double_short_unroll(double *dest, short *src, int n);
+void conv_double_short_ref(double *dest, short *src, int n);
+void conv_double_short_altivec(double *dest, short *src, int n);
+
+void conv_short_double_ref(short *dest, double *src, int n);
+void conv_short_double_ppcasm(short *dest, double *src, int n);
+
+#define conv_double_short conv_double_short_table
+#define conv_short_double conv_short_double_ppcasm
+
+#endif /* __RESAMPLE_H__ */
+
diff --git a/libs/resample/test.c b/libs/resample/test.c
new file mode 100644
index 0000000000..b6f8d7b7b2
--- /dev/null
+++ b/libs/resample/test.c
@@ -0,0 +1,280 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <sys/time.h>
+
+#include <resample.h>
+
+#define AMP 16000
+#define I_RATE 48000
+#define O_RATE 44100
+//#define O_RATE 24000
+
+//#define test_func(x) 1
+//#define test_func(x) sin(2*M_PI*(x)*10)
+//#define test_func(x) sin(2*M_PI*(x)*(x)*1000)
+#define test_func(x) sin(2*M_PI*(x)*(x)*12000)
+
+short i_buf[I_RATE*2*2];
+short o_buf[O_RATE*2*2];
+
+static int i_offset;
+static int o_offset;
+
+FILE *out;
+
+void test_res1(void);
+void test_res2(void);
+void test_res3(void);
+void test_res4(void);
+void test_res5(void);
+void test_res6(void);
+
+int main(int argc,char *argv[])
+{
+	out = fopen("out","w");
+
+	test_res1();
+
+	return 0;
+}
+
+void *get_buffer(void *priv, unsigned int size)
+{
+	void *ret;
+	ret = ((void *)o_buf) + o_offset;
+	o_offset += size;
+	return ret;
+}
+
+struct timeval start_time;
+void start_timer(void)
+{
+	gettimeofday(&start_time,NULL);
+	//printf("start %ld.%06ld\n",start_time.tv_sec,start_time.tv_usec);
+}
+
+void end_timer(void)
+{
+	struct timeval end_time;
+	double diff;
+
+	gettimeofday(&end_time,NULL);
+	//printf("end %ld.%06ld\n",end_time.tv_sec,end_time.tv_usec);
+	diff = (end_time.tv_sec - start_time.tv_sec) +
+		1e-6*(end_time.tv_usec - start_time.tv_usec);
+
+	printf("time %g\n",diff);
+
+}
+
+void test_res1(void)
+{
+	resample_t *r;
+	int i;
+	double sum10k,sum22k;
+	double f;
+	int n10k,n22k;
+	double x;
+
+	for(i=0;i<I_RATE;i++){
+		i_buf[i*2+0] = rint(AMP * test_func((double)i/I_RATE));
+		//i_buf[i*2+1] = rint(AMP * test_func((double)i/I_RATE));
+		i_buf[i*2+1] = (i<1000)?AMP:0;
+	}
+
+	r = malloc(sizeof(resample_t));
+	memset(r,0,sizeof(resample_t));
+
+	r->i_rate = I_RATE;
+	r->o_rate = O_RATE;
+	//r->method = RESAMPLE_SINC_SLOW;
+	r->method = RESAMPLE_SINC;
+	r->channels = 2;
+	//r->verbose = 1;
+	r->filter_length = 64;
+	r->get_buffer = get_buffer;
+
+	resample_init(r);
+
+	start_timer();
+#define blocked
+#ifdef blocked
+	for(i=0;i+256<I_RATE;i+=256){
+		resample_scale(r,i_buf+i*2,256*2*2);
+	}
+	if(I_RATE-i){
+		resample_scale(r,i_buf+i*2,(I_RATE-i)*2*2);
+	}
+#else
+	resample_scale(r,i_buf,I_RATE*2*2);
+#endif
+	end_timer();
+
+	for(i=0;i<O_RATE;i++){
+		f = AMP*test_func((double)i/O_RATE);
+		//f = rint(AMP*test_func((double)i/O_RATE));
+		fprintf(out,"%d %d %d %g %g\n",i,
+			o_buf[2*i+0],o_buf[2*i+1],
+			f,o_buf[2*i+0]-f);
+	}
+
+	sum10k=0;
+	sum22k=0;
+	n10k=0;
+	n22k=0;
+	for(i=0;i<O_RATE;i++){
+		f = AMP*test_func((double)i/O_RATE);
+		//f = rint(AMP*test_func((double)i/O_RATE));
+		x = o_buf[2*i+0]-f;
+		if(((0.5*i)/O_RATE*I_RATE)<10000){
+			sum10k += x*x;
+			n10k++;
+		}
+		if(((0.5*i)/O_RATE*I_RATE)<22050){
+			sum22k += x*x;
+			n22k++;
+		}
+	}
+	printf("average error 10k=%g 22k=%g\n",
+		sqrt(sum10k/n10k),
+		sqrt(sum22k/n22k));
+}
+
+
+void test_res2(void)
+{
+	functable_t *t;
+	int i;
+	double x;
+	double f1,f2;
+
+	t = malloc(sizeof(*t));
+	memset(t,0,sizeof(*t));
+
+	t->start = -50.0;
+	t->offset = 1;
+	t->len = 100;
+
+	t->func_x = functable_sinc;
+	t->func_dx = functable_dsinc;
+
+	functable_init(t);
+
+	for(i=0;i<1000;i++){
+		x = -50.0 + 0.1 * i;
+		f1 = functable_sinc(NULL,x);
+		f2 = functable_eval(t,x);
+		fprintf(out,"%d %g %g %g\n",i,f1,f2,f1-f2);
+	}
+}
+
+void test_res3(void)
+{
+	functable_t *t;
+	int i;
+	double x;
+	double f1,f2;
+	int n = 1;
+
+	t = malloc(sizeof(*t));
+	memset(t,0,sizeof(*t));
+
+	t->start = -50.0;
+	t->offset = 1.0 / n;
+	t->len = 100 * n;
+
+	t->func_x = functable_sinc;
+	t->func_dx = functable_dsinc;
+
+	t->func2_x = functable_window_std;
+	t->func2_dx = functable_window_dstd;
+
+	t->scale = 1.0;
+	t->scale2 = 1.0 / (M_PI * 16);
+
+	functable_init(t);
+
+	for(i=0;i<1000 * n;i++){
+		x = -50.0 + 0.1/n * i;
+		f1 = functable_sinc(NULL,t->scale * x) *
+			functable_window_std(NULL,t->scale2 * x);
+		f2 = functable_eval(t,x);
+		fprintf(out,"%d %g %g %g\n",i,f1,f2,f2-f1);
+	}
+}
+
+double sinc_poly(double x)
+{
+#define INV3FAC 1.66666666666666666e-1
+#define INV5FAC 8.33333333333333333e-3
+#define INV7FAC 1.984126984e-4
+#define INV9FAC 2.755731922e-6
+#define INV11FAC 2.505210839e-8
+	double x2 = x * x;
+
+	return 1
+		- x2 * INV3FAC
+		+ x2 * x2 * INV5FAC
+		- x2 * x2 * x2 * INV7FAC;
+		//+ x2 * x2 * x2 * x2 * INV9FAC
+		//- x2 * x2 * x2 * x2 * x2 * INV11FAC;
+}
+
+void test_res4(void)
+{
+	int i;
+	double x,f1,f2;
+
+	for(i=1;i<100;i++){
+		x = 0.01 * i;
+		f1 = 1 - sin(x)/x;
+		f2 = 1 - sinc_poly(x);
+	
+		fprintf(out,"%g %.20g %.20g %.20g\n",x,f1,f2,f2-f1);
+	}
+}
+
+
+void test_res5(void)
+{
+	int i;
+	double sum;
+
+	start_timer();
+	sum = 0;
+	for(i=0;i<I_RATE;i++){
+		sum += i_buf[i*2];
+	}
+	end_timer();
+	i_buf[0] = sum;
+}
+
+
+void short_to_double(double *d,short *x) { *d = *x; }
+void short_to_float(float *f,short *x) { *f = *x; }
+void float_to_double(double *f,float *x) { *f = *x; }
+void double_to_short(short *f,double *x) { *f = *x; }
+
+double res6_tmp[1000];
+
+void test_res6(void)
+{
+	int i;
+
+	for(i=0;i<I_RATE;i++){
+		i_buf[i] = rint(AMP * test_func((double)i/I_RATE));
+	}
+
+	conv_double_short_ref(res6_tmp,i_buf,1000);
+	for(i=0;i<1000;i++){
+		res6_tmp[i] *= 3.0;
+	}
+	conv_short_double_ppcasm(o_buf,res6_tmp,1000);
+
+	for(i=0;i<1000;i++){
+		fprintf(out,"%d %d %g %d\n",i,i_buf[i],res6_tmp[i],o_buf[i]);
+	}
+}
+
-- 
2.34.1