From: Josh Coalson Date: Mon, 16 Jul 2001 21:13:19 +0000 (+0000) Subject: add 3DNOW stuff from Miroslav X-Git-Tag: 1.2.0~2156 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=f5925df599218658f417fec40bfa3fe56e4cde77;p=platform%2Fupstream%2Fflac.git add 3DNOW stuff from Miroslav --- diff --git a/src/libFLAC/cpu.c b/src/libFLAC/cpu.c index f14ef178..19b473a9 100644 --- a/src/libFLAC/cpu.c +++ b/src/libFLAC/cpu.c @@ -27,6 +27,10 @@ const unsigned FLAC__CPUINFO_IA32_CPUID_FXSR = 0x01000000; const unsigned FLAC__CPUINFO_IA32_CPUID_SSE = 0x02000000; const unsigned FLAC__CPUINFO_IA32_CPUID_SSE2 = 0x04000000; +const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_3DNOW = 0x80000000; +const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXT3DNOW = 0x40000000; +const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXTMMX = 0x00400000; + void FLAC__cpu_info(FLAC__CPUInfo *info) { @@ -41,6 +45,11 @@ void FLAC__cpu_info(FLAC__CPUInfo *info) info->data.ia32.fxsr = (cpuid & FLAC__CPUINFO_IA32_CPUID_FXSR)? true : false; info->data.ia32.sse = (cpuid & FLAC__CPUINFO_IA32_CPUID_SSE)? true : false; /* @@@ also need to check for operating system support */ info->data.ia32.sse2 = (cpuid & FLAC__CPUINFO_IA32_CPUID_SSE2)? true : false; /* @@@ also need to check for operating system support */ + + cpuid = FLAC__cpu_info_extended_amd_asm_ia32(); + info->data.ia32._3dnow = (cpuid & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_3DNOW)? true : false; + info->data.ia32.ext3dnow = (cpuid & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXT3DNOW)? true : false; + info->data.ia32.extmmx = (cpuid & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXTMMX)? true : false; } #else info->use_asm = false; diff --git a/src/libFLAC/ia32/cpu_asm.nasm b/src/libFLAC/ia32/cpu_asm.nasm index edd10b49..39e0418a 100644 --- a/src/libFLAC/ia32/cpu_asm.nasm +++ b/src/libFLAC/ia32/cpu_asm.nasm @@ -21,20 +21,18 @@ data_section cglobal FLAC__cpu_info_asm_ia32 +cglobal FLAC__cpu_info_extended_amd_asm_ia32 code_section ; ********************************************************************** ; - ALIGN 16 -cident FLAC__cpu_info_asm_ia32 - - push ebx +have_cpuid: pushfd pop eax mov edx, eax - xor eax, 00200000h + xor eax, 0x00200000 push eax popfd pushfd @@ -42,12 +40,43 @@ cident FLAC__cpu_info_asm_ia32 cmp eax, edx jz .no_cpuid mov eax, 1 + jmp .end +.no_cpuid: + xor eax, eax +.end: + ret + +cident FLAC__cpu_info_asm_ia32 + push ebx + call have_cpuid + test eax, eax + jz .no_cpuid + mov eax, 1 cpuid mov eax, edx - jmp short .end + jmp .end .no_cpuid: - xor eax, eax ; return 0 -.end: + xor eax, eax +.end + pop ebx + ret + +cident FLAC__cpu_info_extended_amd_asm_ia32 + push ebx + call have_cpuid + test eax, eax + jz .no_cpuid + mov eax, 0x80000000 + cpuid + cmp eax, 0x80000001 + jb .no_cpuid + mov eax, 0x80000001 + cpuid + mov eax, edx + jmp .end +.no_cpuid + xor eax, eax +.end pop ebx ret diff --git a/src/libFLAC/ia32/lpc_asm.nasm b/src/libFLAC/ia32/lpc_asm.nasm index a7dec5fa..6886d629 100644 --- a/src/libFLAC/ia32/lpc_asm.nasm +++ b/src/libFLAC/ia32/lpc_asm.nasm @@ -24,6 +24,7 @@ cglobal FLAC__lpc_compute_autocorrelation_asm_ia32 cglobal FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_4 cglobal FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_8 cglobal FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_12 +cglobal FLAC__lpc_compute_autocorrelation_asm_ia32_3dnow cglobal FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32 cglobal FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx cglobal FLAC__lpc_restore_signal_asm_ia32 @@ -592,6 +593,124 @@ cident FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_12 .end: ret + align 16 +cident FLAC__lpc_compute_autocorrelation_asm_ia32_3dnow + ;[ebp + 32] autoc + ;[ebp + 28] lag + ;[ebp + 24] data_len + ;[ebp + 20] data + + push ebp + push ebx + push esi + push edi + mov ebp, esp + + mov esi, [ebp + 20] + mov edi, [ebp + 24] + mov edx, [ebp + 28] + mov eax, edx + neg eax + and esp, byte -8 + lea esp, [esp + 4 * eax] + mov ecx, edx + xor eax, eax +.loop0: + dec ecx + mov [esp + 4 * ecx], eax + jnz short .loop0 + + mov eax, edi + sub eax, edx + mov ebx, edx + and ebx, byte 1 + sub eax, ebx + lea ecx, [esi + 4 * eax - 12] + cmp esi, ecx + mov eax, esi + ja short .loop2_pre + align 16 ;8 nops +.loop1_i: + movd mm0, [eax] + movd mm2, [eax + 4] + movd mm4, [eax + 8] + movd mm6, [eax + 12] + mov ebx, edx + punpckldq mm0, mm0 + punpckldq mm2, mm2 + punpckldq mm4, mm4 + punpckldq mm6, mm6 + align 16 ;3 nops +.loop1_j: + sub ebx, byte 2 + movd mm1, [eax + 4 * ebx] + movd mm3, [eax + 4 * ebx + 4] + movd mm5, [eax + 4 * ebx + 8] + movd mm7, [eax + 4 * ebx + 12] + punpckldq mm1, mm3 + punpckldq mm3, mm5 + pfmul mm1, mm0 + punpckldq mm5, mm7 + pfmul mm3, mm2 + punpckldq mm7, [eax + 4 * ebx + 16] + pfmul mm5, mm4 + pfmul mm7, mm6 + pfadd mm1, mm3 + movq mm3, [esp + 4 * ebx] + pfadd mm5, mm7 + pfadd mm1, mm5 + pfadd mm3, mm1 + movq [esp + 4 * ebx], mm3 + jg short .loop1_j + + add eax, byte 16 + cmp eax, ecx + jb short .loop1_i + +.loop2_pre: + mov ebx, eax + sub eax, esi + shr eax, 2 + lea ecx, [esi + 4 * edi] + mov esi, ebx +.loop2_i: + movd mm0, [esi] + mov ebx, edi + sub ebx, eax + cmp ebx, edx + jbe short .loop2_j + mov ebx, edx +.loop2_j: + dec ebx + movd mm1, [esi + 4 * ebx] + pfmul mm1, mm0 + movd mm2, [esp + 4 * ebx] + pfadd mm1, mm2 + movd [esp + 4 * ebx], mm1 + + jnz short .loop2_j + + add esi, byte 4 + inc eax + cmp esi, ecx + jnz short .loop2_i + + mov edi, [ebp + 32] +.loop3: + dec edx + mov eax, [esp + 4 * edx] + mov [edi + 4 * edx], eax + jnz short .loop3 + + femms + + mov esp, ebp + pop edi + pop esi + pop ebx + pop ebp + ret + ;void FLAC__lpc_compute_residual_from_qlp_coefficients(const FLAC__int32 data[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]) ; ; for(i = 0; i < data_len; i++) { diff --git a/src/libFLAC/include/private/cpu.h b/src/libFLAC/include/private/cpu.h index 4e8e69c5..c858b3ac 100644 --- a/src/libFLAC/include/private/cpu.h +++ b/src/libFLAC/include/private/cpu.h @@ -33,13 +33,21 @@ typedef struct { FLAC__bool fxsr; FLAC__bool sse; FLAC__bool sse2; + FLAC__bool _3dnow; + FLAC__bool ext3dnow; + FLAC__bool extmmx; } FLAC__CPUInfo_IA32; + extern const unsigned FLAC__CPUINFO_IA32_CPUID_CMOV; extern const unsigned FLAC__CPUINFO_IA32_CPUID_MMX; extern const unsigned FLAC__CPUINFO_IA32_CPUID_FXSR; extern const unsigned FLAC__CPUINFO_IA32_CPUID_SSE; extern const unsigned FLAC__CPUINFO_IA32_CPUID_SSE2; +extern const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_3DNOW; +extern const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXT3DNOW; +extern const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXTMMX; + typedef struct { FLAC__bool use_asm; FLAC__CPUInfo_Type type; @@ -54,6 +62,7 @@ void FLAC__cpu_info(FLAC__CPUInfo *info); #ifdef FLAC__CPU_IA32 #ifdef FLAC__HAS_NASM unsigned FLAC__cpu_info_asm_ia32(); +unsigned FLAC__cpu_info_extended_amd_asm_ia32(); #endif #endif #endif diff --git a/src/libFLAC/include/private/lpc.h b/src/libFLAC/include/private/lpc.h index 8bb034e1..f6015015 100644 --- a/src/libFLAC/include/private/lpc.h +++ b/src/libFLAC/include/private/lpc.h @@ -44,6 +44,7 @@ void FLAC__lpc_compute_autocorrelation_asm_ia32(const FLAC__real data[], unsigne void FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_4(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]); void FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_8(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]); void FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_12(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]); +void FLAC__lpc_compute_autocorrelation_asm_ia32_3dnow(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]); #endif #endif #endif diff --git a/src/libFLAC/stream_encoder.c b/src/libFLAC/stream_encoder.c index acf97cd4..c6178a17 100644 --- a/src/libFLAC/stream_encoder.c +++ b/src/libFLAC/stream_encoder.c @@ -375,7 +375,7 @@ FLAC__StreamEncoderState FLAC__stream_encoder_init(FLAC__StreamEncoder *encoder) #ifdef FLAC__CPU_IA32 FLAC__ASSERT(encoder->private->cpuinfo.type == FLAC__CPUINFO_TYPE_IA32); #ifdef FLAC__HAS_NASM - if(0 && encoder->private->cpuinfo.data.ia32.sse) { /* SSE version lacks necessary resolution, plus SSE flag doesn't check for OS support */ + if(0 && encoder->private->cpuinfo.data.ia32.sse) { /*@@@ SSE version lacks necessary resolution, plus SSE flag doesn't check for OS support */ if(encoder->protected->max_lpc_order < 4) encoder->private->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_4; else if(encoder->protected->max_lpc_order < 8) @@ -385,6 +385,8 @@ FLAC__StreamEncoderState FLAC__stream_encoder_init(FLAC__StreamEncoder *encoder) else encoder->private->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32; } + else if(0 && encoder->private->cpuinfo.data.ia32._3dnow) /*@@@ turn back on in first beta after 1.0 */ + encoder->private->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32_3dnow; else encoder->private->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32; if(encoder->private->cpuinfo.data.ia32.mmx && encoder->private->cpuinfo.data.ia32.cmov)