From 3aadd10c67602f9585df6515708a61cb388d5692 Mon Sep 17 00:00:00 2001 From: Josh Coalson Date: Tue, 27 Jul 2004 01:13:16 +0000 Subject: [PATCH] check in Brady's second altivec-related patch that hooks up the asm routines and does the requisite configure-related stuff --- configure.in | 14 ++++++++++++ src/libFLAC/cpu.c | 32 ++++++++++++++++++++++++++ src/libFLAC/include/private/cpu.h | 6 +++++ src/libFLAC/stream_decoder.c | 48 ++++++++++++++++++++++++++++----------- 4 files changed, 87 insertions(+), 13 deletions(-) diff --git a/configure.in b/configure.in index 37163ab..607e382 100644 --- a/configure.in +++ b/configure.in @@ -208,6 +208,18 @@ if test x$use_3dnow = xtrue ; then AC_DEFINE(FLAC__USE_3DNOW) fi +AC_ARG_ENABLE(altivec, +[ --disable-altivec Disable Altivec optimizations], +[case "${enableval}" in + yes) use_altivec=true ;; + no) use_altivec=false ;; + *) AC_MSG_ERROR(bad value ${enableval} for --enable-altivec) ;; +esac],[use_altivec=true]) +AM_CONDITIONAL(FLaC__USE_ALTIVEC, test x$use_altivec = xtrue) +if test x$use_altivec = xtrue ; then +AC_DEFINE(FLAC__USE_ALTIVEC) +fi + AC_ARG_ENABLE(local-xmms-plugin, [ --enable-local-xmms-plugin Install XMMS plugin to ~/.xmms/Plugins instead of system location], [case "${enableval}" in @@ -330,6 +342,7 @@ AH_TEMPLATE(FLAC__HAS_OGG, [define if you have the ogg library]) AH_TEMPLATE(FLAC__NO_ASM, [define to disable use of assembly code]) AH_TEMPLATE(FLAC__SSE_OS, [define if your operating system supports SSE instructions]) AH_TEMPLATE(FLAC__USE_3DNOW, [define to enable use of 3Dnow! instructions]) +AH_TEMPLATE(FLAC__USE_ALTIVEC, [define to enable use of Altivec instructions]) AH_TEMPLATE(ID3LIB_MAJOR, [define to major version number of id3lib]) AH_TEMPLATE(ID3LIB_MINOR, [define to minor version number of id3lib]) AH_TEMPLATE(ID3LIB_PATCH, [define to patch level of id3lib]) @@ -339,6 +352,7 @@ AC_OUTPUT( \ src/Makefile \ src/libFLAC/Makefile \ src/libFLAC/ia32/Makefile \ + src/libFLAC/ppc/Makefile \ src/libFLAC/include/Makefile \ src/libFLAC/include/private/Makefile \ src/libFLAC/include/protected/Makefile \ diff --git a/src/libFLAC/cpu.c b/src/libFLAC/cpu.c index 61f52c7..63ce05e 100644 --- a/src/libFLAC/cpu.c +++ b/src/libFLAC/cpu.c @@ -37,6 +37,14 @@ #include #endif +#if defined FLAC__CPU_PPC +#if !defined FLAC__NO_ASM +#if defined __APPLE__ && defined __MACH__ +#include +#endif /* __APPLE__ && __MACH__ */ +#endif /* FLAC__NO_ASM */ +#endif /* FLAC__CPU_PPC */ + const unsigned FLAC__CPUINFO_IA32_CPUID_CMOV = 0x00008000; const unsigned FLAC__CPUINFO_IA32_CPUID_MMX = 0x00800000; const unsigned FLAC__CPUINFO_IA32_CPUID_FXSR = 0x01000000; @@ -78,6 +86,30 @@ void FLAC__cpu_info(FLAC__CPUInfo *info) #else info->use_asm = false; #endif +#elif defined FLAC__CPU_PPC + info->type = FLAC__CPUINFO_TYPE_PPC; +#if !defined FLAC__NO_ASM + info->use_asm = true; +#ifdef FLAC__USE_ALTIVEC +#if defined __APPLE__ && defined __MACH__ + { + int selectors[2] = { CTL_HW, HW_VECTORUNIT }; + int result = 0; + size_t length = sizeof(result); + int error = sysctl(selectors, 2, &result, &length, 0, 0); + + info->data.ppc.altivec = error==0 ? result!=0 : 0; + } +#else /* __APPLE__ && __MACH__ */ + /* don't know of any other thread-safe way to check */ + info->data.ppc.altivec = 0; +#endif /* __APPLE__ && __MACH__ */ +#else /* FLAC__USE_ALTIVEC */ + info->data.ppc.altivec = 0; +#endif /* FLAC__USE_ALTIVEC */ +#else /* FLAC__NO_ASM */ + info->use_asm = false; +#endif /* FLAC__NO_ASM */ #else info->type = FLAC__CPUINFO_TYPE_UNKNOWN; info->use_asm = false; diff --git a/src/libFLAC/include/private/cpu.h b/src/libFLAC/include/private/cpu.h index 0c4ab35..b8c001a 100644 --- a/src/libFLAC/include/private/cpu.h +++ b/src/libFLAC/include/private/cpu.h @@ -40,6 +40,7 @@ typedef enum { FLAC__CPUINFO_TYPE_IA32, + FLAC__CPUINFO_TYPE_PPC, FLAC__CPUINFO_TYPE_UNKNOWN } FLAC__CPUInfo_Type; @@ -54,6 +55,10 @@ typedef struct { FLAC__bool extmmx; } FLAC__CPUInfo_IA32; +typedef struct { + FLAC__bool altivec; +} FLAC__CPUInfo_PPC; + extern const unsigned FLAC__CPUINFO_IA32_CPUID_CMOV; extern const unsigned FLAC__CPUINFO_IA32_CPUID_MMX; extern const unsigned FLAC__CPUINFO_IA32_CPUID_FXSR; @@ -69,6 +74,7 @@ typedef struct { FLAC__CPUInfo_Type type; union { FLAC__CPUInfo_IA32 ia32; + FLAC__CPUInfo_PPC ppc; } data; } FLAC__CPUInfo; diff --git a/src/libFLAC/stream_decoder.c b/src/libFLAC/stream_decoder.c index 4369d6d..fb6cdbc 100644 --- a/src/libFLAC/stream_decoder.c +++ b/src/libFLAC/stream_decoder.c @@ -41,6 +41,7 @@ #include "private/fixed.h" #include "private/format.h" #include "private/lpc.h" +#include "private/memory.h" #ifdef HAVE_CONFIG_H #include @@ -98,13 +99,18 @@ typedef struct FLAC__StreamDecoderPrivate { FLAC__StreamDecoderWriteCallback write_callback; FLAC__StreamDecoderMetadataCallback metadata_callback; FLAC__StreamDecoderErrorCallback error_callback; + /* generic 32-bit datapath: */ void (*local_lpc_restore_signal)(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]); + /* generic 64-bit datapath: */ void (*local_lpc_restore_signal_64bit)(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]); + /* for use when the signal is <= 16 bits-per-sample, or <= 15 bits-per-sample on a side channel (which requires 1 extra bit): */ void (*local_lpc_restore_signal_16bit)(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]); + /* for use when the signal is <= 16 bits-per-sample, or <= 15 bits-per-sample on a side channel (which requires 1 extra bit), AND order <= 8: */ + void (*local_lpc_restore_signal_16bit_order8)(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]); void *client_data; FLAC__BitBuffer *input; FLAC__int32 *output[FLAC__MAX_CHANNELS]; - FLAC__int32 *residual[FLAC__MAX_CHANNELS]; + FLAC__int32 *residual[FLAC__MAX_CHANNELS]; /* WATCHOUT: these are the aligned pointers; the real pointers that should be free()'d are residual_unaligned[] below */ FLAC__EntropyCodingMethod_PartitionedRiceContents partitioned_rice_contents[FLAC__MAX_CHANNELS]; unsigned output_capacity, output_channels; FLAC__uint32 last_frame_number; @@ -120,6 +126,8 @@ typedef struct FLAC__StreamDecoderPrivate { FLAC__CPUInfo cpuinfo; FLAC__byte header_warmup[2]; /* contains the sync code and reserved bits */ FLAC__byte lookahead; /* temp storage when we need to look ahead one byte in the stream */ + /* unaligned (original) pointers to allocated data */ + FLAC__int32 *residual_unaligned[FLAC__MAX_CHANNELS]; } FLAC__StreamDecoderPrivate; /*********************************************************************** @@ -208,7 +216,7 @@ FLAC_API FLAC__StreamDecoder *FLAC__stream_decoder_new() for(i = 0; i < FLAC__MAX_CHANNELS; i++) { decoder->private_->output[i] = 0; - decoder->private_->residual[i] = 0; + decoder->private_->residual_unaligned[i] = decoder->private_->residual[i] = 0; } decoder->private_->output_capacity = 0; @@ -281,6 +289,7 @@ FLAC_API FLAC__StreamDecoderState FLAC__stream_decoder_init(FLAC__StreamDecoder decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal; decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide; decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal; + decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal; /* now override with asm where appropriate */ #ifndef FLAC__NO_ASM if(decoder->private_->cpuinfo.use_asm) { @@ -290,12 +299,20 @@ FLAC_API FLAC__StreamDecoderState FLAC__stream_decoder_init(FLAC__StreamDecoder if(decoder->private_->cpuinfo.data.ia32.mmx) { decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal_asm_ia32; decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_asm_ia32_mmx; + decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_asm_ia32_mmx; } else { decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal_asm_ia32; decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_asm_ia32; + decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_asm_ia32; } #endif +#elif defined FLAC__CPU_PPC + FLAC__ASSERT(decoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_PPC); + if(decoder->private_->cpuinfo.data.ppc.altivec) { + decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_asm_ppc_altivec_16; + decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8; + } #endif } #endif @@ -329,9 +346,9 @@ FLAC_API void FLAC__stream_decoder_finish(FLAC__StreamDecoder *decoder) free(decoder->private_->output[i]-4); decoder->private_->output[i] = 0; } - if(0 != decoder->private_->residual[i]) { - free(decoder->private_->residual[i]); - decoder->private_->residual[i] = 0; + if(0 != decoder->private_->residual_unaligned[i]) { + free(decoder->private_->residual_unaligned[i]); + decoder->private_->residual_unaligned[i] = decoder->private_->residual[i] = 0; } } decoder->private_->output_capacity = 0; @@ -763,9 +780,9 @@ FLAC__bool allocate_output_(FLAC__StreamDecoder *decoder, unsigned size, unsigne free(decoder->private_->output[i]-4); decoder->private_->output[i] = 0; } - if(0 != decoder->private_->residual[i]) { - free(decoder->private_->residual[i]); - decoder->private_->residual[i] = 0; + if(0 != decoder->private_->residual_unaligned[i]) { + free(decoder->private_->residual_unaligned[i]); + decoder->private_->residual_unaligned[i] = decoder->private_->residual[i] = 0; } } @@ -784,12 +801,13 @@ FLAC__bool allocate_output_(FLAC__StreamDecoder *decoder, unsigned size, unsigne memset(tmp, 0, sizeof(FLAC__int32)*4); decoder->private_->output[i] = tmp + 4; - tmp = (FLAC__int32*)malloc(sizeof(FLAC__int32)*size); - if(tmp == 0) { + /* WATCHOUT: + * minimum of quadword alignment for PPC vector optimizations is REQUIRED: + */ + if(!FLAC__memory_alloc_aligned_int32_array(size, &decoder->private_->residual_unaligned[i], &decoder->private_->residual[i])) { decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR; return false; } - decoder->private_->residual[i] = tmp; } decoder->private_->output_capacity = size; @@ -1974,8 +1992,12 @@ FLAC__bool read_subframe_lpc_(FLAC__StreamDecoder *decoder, unsigned channel, un if(do_full_decode) { memcpy(decoder->private_->output[channel], subframe->warmup, sizeof(FLAC__int32) * order); if(bps + subframe->qlp_coeff_precision + FLAC__bitmath_ilog2(order) <= 32) - if(bps <= 16 && subframe->qlp_coeff_precision <= 16) - decoder->private_->local_lpc_restore_signal_16bit(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order); + if(bps <= 16 && subframe->qlp_coeff_precision <= 16) { + if(order <= 8) + decoder->private_->local_lpc_restore_signal_16bit_order8(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order); + else + decoder->private_->local_lpc_restore_signal_16bit(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order); + } else decoder->private_->local_lpc_restore_signal(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order); else -- 2.7.4