From 4cd90d9e32ca9a23e3c0f7615974ea0c55ff3e49 Mon Sep 17 00:00:00 2001 From: Arun Raghavan Date: Mon, 25 Oct 2010 17:59:08 +0100 Subject: [PATCH] volume: Add Orc-based optimised volume scaling This adds volume scaling for 1- and 2-channel software volume scaling using Orc. While testing the MMX and SSE backends on a Core2, I see an ~2x performance benefit over the hand-rolled MMX and SSE code. Since I haven't been able to test on other architectures, the Orc code is only used when MMX/SSE* is present. This can be changed in the future after testing on AMD and ARM machines. --- src/Makefile.am | 9 ++++ src/daemon/main.c | 1 + src/pulsecore/cpu-orc.c | 34 +++++++++++++ src/pulsecore/cpu-orc.h | 37 ++++++++++++++ src/pulsecore/svolume.orc | 84 +++++++++++++++++++++++++++++++ src/pulsecore/svolume_orc.c | 117 ++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 282 insertions(+) create mode 100644 src/pulsecore/cpu-orc.c create mode 100644 src/pulsecore/cpu-orc.h create mode 100644 src/pulsecore/svolume.orc create mode 100644 src/pulsecore/svolume_orc.c diff --git a/src/Makefile.am b/src/Makefile.am index 24e2f82..68eabd2 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -850,6 +850,7 @@ libpulsecore_@PA_MAJORMINOR@_la_SOURCES = \ pulsecore/cpu.h \ pulsecore/cpu-arm.c pulsecore/cpu-arm.h \ pulsecore/cpu-x86.c pulsecore/cpu-x86.h \ + pulsecore/cpu-orc.c pulsecore/cpu-orc.h \ pulsecore/svolume_c.c pulsecore/svolume_arm.c \ pulsecore/svolume_mmx.c pulsecore/svolume_sse.c \ pulsecore/sconv-s16be.c pulsecore/sconv-s16be.h \ @@ -874,6 +875,14 @@ libpulsecore_@PA_MAJORMINOR@_la_CFLAGS = $(AM_CFLAGS) $(LIBSAMPLERATE_CFLAGS) $( libpulsecore_@PA_MAJORMINOR@_la_LDFLAGS = $(AM_LDFLAGS) -avoid-version libpulsecore_@PA_MAJORMINOR@_la_LIBADD = $(AM_LIBADD) $(LIBLTDL) $(LIBSAMPLERATE_LIBS) $(LIBSPEEX_LIBS) $(LIBSNDFILE_LIBS) $(WINSOCK_LIBS) $(LTLIBICONV) libpulsecommon-@PA_MAJORMINOR@.la libpulse.la libpulsecore-foreign.la +if HAVE_ORC +ORC_SOURCE += pulsecore/svolume +libpulsecore_@PA_MAJORMINOR@_la_SOURCES += pulsecore/svolume_orc.c +nodist_libpulsecore_@PA_MAJORMINOR@_la_SOURCES = pulsecore/svolume-orc-gen.c pulsecore/svolume-orc-gen.h +libpulsecore_@PA_MAJORMINOR@_la_CFLAGS += $(ORC_CFLAGS) +libpulsecore_@PA_MAJORMINOR@_la_LIBADD += $(ORC_LIBS) +endif + if HAVE_X11 libpulsecore_@PA_MAJORMINOR@_la_SOURCES += pulsecore/x11wrap.c pulsecore/x11wrap.h libpulsecore_@PA_MAJORMINOR@_la_CFLAGS += $(X11_CFLAGS) diff --git a/src/daemon/main.c b/src/daemon/main.c index 243e7c0..533c4c3 100644 --- a/src/daemon/main.c +++ b/src/daemon/main.c @@ -966,6 +966,7 @@ int main(int argc, char *argv[]) { c->cpu_info.cpu_type = PA_CPU_X86; if (pa_cpu_init_arm(&(c->cpu_info.flags.arm))) c->cpu_info.cpu_type = PA_CPU_ARM; + pa_cpu_init_orc(c->cpu_info); } pa_assert_se(pa_signal_init(pa_mainloop_get_api(mainloop)) == 0); diff --git a/src/pulsecore/cpu-orc.c b/src/pulsecore/cpu-orc.c new file mode 100644 index 0000000..d4a1535 --- /dev/null +++ b/src/pulsecore/cpu-orc.c @@ -0,0 +1,34 @@ +/*** + This file is part of PulseAudio. + + Copyright 2010 Arun Raghavan + + PulseAudio is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of the License, + or (at your option) any later version. + + PulseAudio is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with PulseAudio; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. +***/ + +#include "cpu-orc.h" + +void pa_cpu_init_orc(pa_cpu_info cpu_info) +{ +#ifndef DISABLE_ORC + /* Update these as we test on more architectures */ + pa_cpu_x86_flag_t x86_want_flags = PA_CPU_X86_MMX | PA_CPU_X86_SSE | PA_CPU_X86_SSE2 | PA_CPU_X86_SSE3 | PA_CPU_X86_SSSE3 | PA_CPU_X86_SSE4_1 | PA_CPU_X86_SSE4_2; + + /* Enable Orc svolume optimizations */ + if ((cpu_info.cpu_type == PA_CPU_X86) && (cpu_info.flags.x86 & x86_want_flags)) + pa_volume_func_init_orc(); +#endif +} diff --git a/src/pulsecore/cpu-orc.h b/src/pulsecore/cpu-orc.h new file mode 100644 index 0000000..9924d27 --- /dev/null +++ b/src/pulsecore/cpu-orc.h @@ -0,0 +1,37 @@ +#ifndef foocpuorchfoo +#define foocpuorchfoo + +/*** + This file is part of PulseAudio. + + Copyright 2010 Arun Raghavan + + PulseAudio is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of the License, + or (at your option) any later version. + + PulseAudio is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with PulseAudio; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. +***/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +/* Orc-optimised bits */ + +void pa_cpu_init_orc(pa_cpu_info cpu_info); + +void pa_volume_func_init_orc(void); + +#endif /* foocpuorchfoo */ diff --git a/src/pulsecore/svolume.orc b/src/pulsecore/svolume.orc new file mode 100644 index 0000000..3411161 --- /dev/null +++ b/src/pulsecore/svolume.orc @@ -0,0 +1,84 @@ +# This file is part of PulseAudio. +# +# Copyright 2010 Lennart Poettering +# Copyright 2010 Wim Taymans +# Copyright 2010 Arun Raghavan +# +# PulseAudio is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation; either version 2.1 of the License, +# or (at your option) any later version. +# +# PulseAudio is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with PulseAudio; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +# USA. + +# S16NE 1- and 2-channel volume scaling work as follows: +# +# params: samples s (signed 16-bit), volume v (signed 32-bit < 2^31) +# +# 32 16 0 (type of operation) +# sample = | sample | (signed) +# s = | 0 | sample | (unsigned) +# +# if (sample < 0) +# signc = | 0 | 0xffff | (unsigned) +# else +# signc = | 0 | 0 | (unsgined) +# +# if (sample < 0) +# ml = | 0 | -((s*vl) >> 16) | (unsgined) +# else +# ml = | 0 | (s*vl) >> 16 | (unsgined) +# +# vh = | v >> 16 | (signed, but value is always signed +# since PA_VOLUME_MAX is 0x0fffffff) +# mh = | (s * vh) >> 16 | (signed) +# ml = | ml + mh | (signed) +# sample = | (ml >> 16) | (signed, saturated) + +.function pa_volume_s16ne_orc_1ch +.dest 2 samples int16_t +.param 4 v int32_t +.temp 2 vh +.temp 4 s +.temp 4 mh +.temp 4 ml +.temp 4 signc + +convuwl s, samples +x2 cmpgtsw signc, 0, s +x2 andw signc, signc, v +x2 mulhuw ml, s, v +subl ml, ml, signc +convhlw vh, v +mulswl mh, samples, vh +addl ml, ml, mh +convssslw samples, ml + +.function pa_volume_s16ne_orc_2ch +.dest 4 samples int16_t +.longparam 8 vols +.temp 8 v +.temp 4 vh +.temp 8 s +.temp 8 mh +.temp 8 ml +.temp 8 signc + +loadpq v, vols +x2 convuwl s, samples +x4 cmpgtsw signc, 0, s +x4 andw signc, signc, v +x4 mulhuw ml, s, v +x2 subl ml, ml, signc +x2 convhlw vh, v +x2 mulswl mh, samples, vh +x2 addl ml, ml, mh +x2 convssslw samples, ml diff --git a/src/pulsecore/svolume_orc.c b/src/pulsecore/svolume_orc.c new file mode 100644 index 0000000..db07ba6 --- /dev/null +++ b/src/pulsecore/svolume_orc.c @@ -0,0 +1,117 @@ +/*** + This file is part of PulseAudio. + + Copyright 2004-2006 Lennart Poettering + Copyright 2009 Wim Taymans + Copyright 2010 Arun Raghavan + + PulseAudio is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of the License, + or (at your option) any later version. + + PulseAudio is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with PulseAudio; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. +***/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "cpu-orc.h" +#include +#include +#include +#include +#include + +pa_do_volume_func_t fallback; + +static void +pa_volume_s16ne_orc(int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + if (channels == 2) { + int64_t v = (int64_t)volumes[1] << 32 | volumes[0]; + pa_volume_s16ne_orc_2ch (samples, v, ((length / (sizeof(int16_t))) / 2)); + } else if (channels == 1) + pa_volume_s16ne_orc_1ch (samples, volumes[0], length / (sizeof(int16_t))); + else + fallback(samples, volumes, channels, length); +} + +#undef RUN_TEST + +#ifdef RUN_TEST +#define CHANNELS 2 +#define SAMPLES 1022 +#define TIMES 1000 +#define PADDING 16 + +static void run_test (void) { + int16_t samples[SAMPLES]; + int16_t samples_ref[SAMPLES]; + int16_t samples_orig[SAMPLES]; + int32_t volumes[CHANNELS + PADDING]; + int i, j, padding; + pa_do_volume_func_t func; + pa_usec_t start, stop; + + func = pa_get_volume_func (PA_SAMPLE_S16NE); + + printf ("checking ORC %zd\n", sizeof (samples)); + + pa_random (samples, sizeof (samples)); + memcpy (samples_ref, samples, sizeof (samples)); + memcpy (samples_orig, samples, sizeof (samples)); + + for (i = 0; i < CHANNELS; i++) + volumes[i] = PA_CLAMP_VOLUME(rand() >> 1); + for (padding = 0; padding < PADDING; padding++, i++) + volumes[i] = volumes[padding]; + + func (samples_ref, volumes, CHANNELS, sizeof (samples)); + pa_volume_s16ne_orc (samples, volumes, CHANNELS, sizeof (samples)); + for (i = 0; i < SAMPLES; i++) { + if (samples[i] != samples_ref[i]) { + printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i], + samples_orig[i], volumes[i % CHANNELS]); + } + } + + start = pa_rtclock_now(); + for (j = 0; j < TIMES; j++) { + memcpy (samples, samples_orig, sizeof (samples)); + pa_volume_s16ne_orc (samples, volumes, CHANNELS, sizeof (samples)); + } + stop = pa_rtclock_now(); + pa_log_info("ORC: %llu usec.", (long long unsigned int)(stop - start)); + + start = pa_rtclock_now(); + for (j = 0; j < TIMES; j++) { + memcpy (samples_ref, samples_orig, sizeof (samples)); + func (samples_ref, volumes, CHANNELS, sizeof (samples)); + } + stop = pa_rtclock_now(); + pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); + + pa_assert_se(memcmp(samples_ref, samples, sizeof(samples)) == 0); +} +#endif + +void pa_volume_func_init_orc(void) { + pa_log_info("Initialising ORC optimized functions."); + +#ifdef RUN_TEST + run_test(); +#endif + + fallback = pa_get_volume_func(PA_SAMPLE_S16NE); + pa_set_volume_func(PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_orc); +} -- 2.7.4