From f2f328bdbf624f3e462d633be00ae9ad895a889f Mon Sep 17 00:00:00 2001 From: Josh Coalson Date: Tue, 13 Mar 2007 06:35:55 +0000 Subject: [PATCH] improve ia-32 checks to include cpuid, bswap, sse3 and ssse3 support --- src/libFLAC/cpu.c | 100 ++++++++++++++++++++++++++------------ src/libFLAC/ia32/cpu_asm.nasm | 30 ++++++++++-- src/libFLAC/include/private/cpu.h | 19 +++----- 3 files changed, 102 insertions(+), 47 deletions(-) diff --git a/src/libFLAC/cpu.c b/src/libFLAC/cpu.c index d8bc0fb..2b34721 100644 --- a/src/libFLAC/cpu.c +++ b/src/libFLAC/cpu.c @@ -86,15 +86,19 @@ static void sigill_handler (int sig) /* how to get sysctlbyname()? */ #endif -const unsigned FLAC__CPUINFO_IA32_CPUID_CMOV = 0x00008000; -const unsigned FLAC__CPUINFO_IA32_CPUID_MMX = 0x00800000; -const unsigned FLAC__CPUINFO_IA32_CPUID_FXSR = 0x01000000; -const unsigned FLAC__CPUINFO_IA32_CPUID_SSE = 0x02000000; -const unsigned FLAC__CPUINFO_IA32_CPUID_SSE2 = 0x04000000; - -const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_3DNOW = 0x80000000; -const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXT3DNOW = 0x40000000; -const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXTMMX = 0x00400000; +/* these are flags in EDX of CPUID AX=00000001 */ +static const unsigned FLAC__CPUINFO_IA32_CPUID_CMOV = 0x00008000; +static const unsigned FLAC__CPUINFO_IA32_CPUID_MMX = 0x00800000; +static const unsigned FLAC__CPUINFO_IA32_CPUID_FXSR = 0x01000000; +static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE = 0x02000000; +static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE2 = 0x04000000; +/* these are flags in ECX of CPUID AX=00000001 */ +static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE3 = 0x00000001; +static const unsigned FLAC__CPUINFO_IA32_CPUID_SSSE3 = 0x00000200; +/* these are flags in EDX of CPUID AX=80000001 */ +static const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_3DNOW = 0x80000000; +static const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXT3DNOW = 0x40000000; +static const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXTMMX = 0x00400000; /* @@ -156,31 +160,63 @@ void FLAC__cpu_info(FLAC__CPUInfo *info) #ifdef FLAC__CPU_IA32 info->type = FLAC__CPUINFO_TYPE_IA32; #if !defined FLAC__NO_ASM && defined FLAC__HAS_NASM - info->use_asm = true; - { - unsigned cpuid = FLAC__cpu_info_asm_ia32(); - info->data.ia32.cmov = (cpuid & FLAC__CPUINFO_IA32_CPUID_CMOV)? true : false; - info->data.ia32.mmx = (cpuid & FLAC__CPUINFO_IA32_CPUID_MMX )? true : false; - info->data.ia32.fxsr = (cpuid & FLAC__CPUINFO_IA32_CPUID_FXSR)? true : false; - info->data.ia32.sse = (cpuid & FLAC__CPUINFO_IA32_CPUID_SSE )? true : false; - info->data.ia32.sse2 = (cpuid & FLAC__CPUINFO_IA32_CPUID_SSE2)? true : false; + info->use_asm = true; /* we assume a minimum of 80386 with FLAC__CPU_IA32 */ + info->data.ia32.cpuid = FLAC__cpu_have_cpuid_asm_ia32()? true : false; + info->data.ia32.bswap = info->data.ia32.cpuid; /* CPUID => BSWAP since it came after */ + info->data.ia32.cmov = false; + info->data.ia32.mmx = false; + info->data.ia32.fxsr = false; + info->data.ia32.sse = false; + info->data.ia32.sse2 = false; + info->data.ia32.sse3 = false; + info->data.ia32.ssse3 = false; + info->data.ia32._3dnow = false; + info->data.ia32.ext3dnow = false; + info->data.ia32.extmmx = false; + if(info->data.ia32.cpuid) { + /* http://www.sandpile.org/ia32/cpuid.htm */ + FLAC__uint32 flags_edx, flags_ecx; + FLAC__cpu_info_asm_ia32(&flags_edx, &flags_ecx); + info->data.ia32.cmov = (flags_edx & FLAC__CPUINFO_IA32_CPUID_CMOV )? true : false; + info->data.ia32.mmx = (flags_edx & FLAC__CPUINFO_IA32_CPUID_MMX )? true : false; + info->data.ia32.fxsr = (flags_edx & FLAC__CPUINFO_IA32_CPUID_FXSR )? true : false; + info->data.ia32.sse = (flags_edx & FLAC__CPUINFO_IA32_CPUID_SSE )? true : false; + info->data.ia32.sse2 = (flags_edx & FLAC__CPUINFO_IA32_CPUID_SSE2 )? true : false; + info->data.ia32.sse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE3 )? true : false; + info->data.ia32.ssse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSSE3)? true : false; #ifdef FLAC__USE_3DNOW - cpuid = FLAC__cpu_info_extended_amd_asm_ia32(); - info->data.ia32._3dnow = (cpuid & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_3DNOW)? true : false; - info->data.ia32.ext3dnow = (cpuid & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXT3DNOW)? true : false; - info->data.ia32.extmmx = (cpuid & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXTMMX)? true : false; + flags_edx = FLAC__cpu_info_extended_amd_asm_ia32(); + info->data.ia32._3dnow = (flags_edx & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_3DNOW )? true : false; + info->data.ia32.ext3dnow = (flags_edx & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXT3DNOW)? true : false; + info->data.ia32.extmmx = (flags_edx & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXTMMX )? true : false; #else info->data.ia32._3dnow = info->data.ia32.ext3dnow = info->data.ia32.extmmx = false; #endif +#ifdef DEBUG + fprintf(stderr, "CPU info (IA-32):\n"); + fprintf(stderr, " CPUID ...... %c\n", info->data.ia32.cpuid ? 'Y' : 'n'); + fprintf(stderr, " BSWAP ...... %c\n", info->data.ia32.bswap ? 'Y' : 'n'); + fprintf(stderr, " CMOV ....... %c\n", info->data.ia32.cmov ? 'Y' : 'n'); + fprintf(stderr, " MMX ........ %c\n", info->data.ia32.mmx ? 'Y' : 'n'); + fprintf(stderr, " FXSR ....... %c\n", info->data.ia32.fxsr ? 'Y' : 'n'); + fprintf(stderr, " SSE ........ %c\n", info->data.ia32.sse ? 'Y' : 'n'); + fprintf(stderr, " SSE2 ....... %c\n", info->data.ia32.sse2 ? 'Y' : 'n'); + fprintf(stderr, " SSE3 ....... %c\n", info->data.ia32.sse3 ? 'Y' : 'n'); + fprintf(stderr, " SSSE3 ...... %c\n", info->data.ia32.ssse3 ? 'Y' : 'n'); + fprintf(stderr, " 3DNow! ..... %c\n", info->data.ia32._3dnow ? 'Y' : 'n'); + fprintf(stderr, " 3DNow!-ext . %c\n", info->data.ia32.ext3dnow? 'Y' : 'n'); + fprintf(stderr, " 3DNow!-MMX . %c\n", info->data.ia32.extmmx ? 'Y' : 'n'); +#endif + /* * now have to check for OS support of SSE/SSE2 */ if(info->data.ia32.fxsr || info->data.ia32.sse || info->data.ia32.sse2) { #if defined FLAC__NO_SSE_OS /* assume user knows better than us; turn it off */ - info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = false; + info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false; #elif defined FLAC__SSE_OS /* assume user knows better than us; leave as detected above */ #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__) || defined(__APPLE__) @@ -190,21 +226,21 @@ void FLAC__cpu_info(FLAC__CPUInfo *info) len = sizeof(sse); sse = sse || (sysctlbyname("hw.instruction_sse", &sse, &len, NULL, 0) == 0 && sse); len = sizeof(sse); sse = sse || (sysctlbyname("hw.optional.sse" , &sse, &len, NULL, 0) == 0 && sse); /* __APPLE__ ? */ if(!sse) - info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = false; + info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false; #elif defined(__NetBSD__) || defined (__OpenBSD__) # if __NetBSD_Version__ >= 105250000 || (defined __OpenBSD__) int val = 0, mib[2] = { CTL_MACHDEP, CPU_SSE }; size_t len = sizeof(val); if(sysctl(mib, 2, &val, &len, NULL, 0) < 0 || !val) - info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = false; + info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false; else { /* double-check SSE2 */ mib[1] = CPU_SSE2; len = sizeof(val); if(sysctl(mib, 2, &val, &len, NULL, 0) < 0 || !val) - info->data.ia32.sse2 = false; + info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false; } # else - info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = false; + info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false; # endif #elif defined(__linux__) int sse = 0; @@ -243,7 +279,7 @@ void FLAC__cpu_info(FLAC__CPUInfo *info) } if(!sse) - info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = false; + info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false; #elif defined(_MSC_VER) # ifdef USE_TRY_CATCH_FLAVOR _try { @@ -260,7 +296,7 @@ void FLAC__cpu_info(FLAC__CPUInfo *info) } _except(EXCEPTION_EXECUTE_HANDLER) { if (_exception_code() == STATUS_ILLEGAL_INSTRUCTION) - info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = false; + info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false; } # else int sse = 0; @@ -291,12 +327,16 @@ void FLAC__cpu_info(FLAC__CPUInfo *info) } SetUnhandledExceptionFilter(save); if(!sse) - info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = false; + info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false; # endif #else /* no way to test, disable to be safe */ - info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = false; + info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false; +#endif +#ifdef DEBUG + fprintf(stderr, " SSE OS sup . %c\n", info->data.ia32.sse ? 'Y' : 'n'); #endif + } } #else diff --git a/src/libFLAC/ia32/cpu_asm.nasm b/src/libFLAC/ia32/cpu_asm.nasm index 2f669a2..ff92090 100644 --- a/src/libFLAC/ia32/cpu_asm.nasm +++ b/src/libFLAC/ia32/cpu_asm.nasm @@ -32,6 +32,7 @@ data_section +cglobal FLAC__cpu_have_cpuid_asm_ia32 cglobal FLAC__cpu_info_asm_ia32 cglobal FLAC__cpu_info_extended_amd_asm_ia32 @@ -39,8 +40,11 @@ cglobal FLAC__cpu_info_extended_amd_asm_ia32 ; ********************************************************************** ; +; FLAC__uint32 FLAC__cpu_have_cpuid_asm_ia32() +; -have_cpuid: +cident FLAC__cpu_have_cpuid_asm_ia32 + push ebx pushfd pop eax mov edx, eax @@ -56,26 +60,42 @@ have_cpuid: .no_cpuid: xor eax, eax .end: + pop ebx ret +; ********************************************************************** +; +; void FLAC__cpu_info_asm_ia32(FLAC__uint32 *flags_edx, FLAC__uint32 *flags_ecx) +; + cident FLAC__cpu_info_asm_ia32 + ;[esp + 8] == flags_edx + ;[esp + 12] == flags_ecx + push ebx - call have_cpuid + call FLAC__cpu_have_cpuid_asm_ia32 test eax, eax jz .no_cpuid mov eax, 1 cpuid - mov eax, edx + mov ebx, [esp + 8] + mov [ebx], edx + mov ebx, [esp + 12] + mov [ebx], ecx jmp .end -.no_cpuid: +.no_cpuid xor eax, eax + mov ebx, [esp + 8] + mov [ebx], eax + mov ebx, [esp + 12] + mov [ebx], eax .end pop ebx ret cident FLAC__cpu_info_extended_amd_asm_ia32 push ebx - call have_cpuid + call FLAC__cpu_have_cpuid_asm_ia32 test eax, eax jz .no_cpuid mov eax, 0x80000000 diff --git a/src/libFLAC/include/private/cpu.h b/src/libFLAC/include/private/cpu.h index 092f20b..651bb22 100644 --- a/src/libFLAC/include/private/cpu.h +++ b/src/libFLAC/include/private/cpu.h @@ -45,11 +45,15 @@ typedef enum { } FLAC__CPUInfo_Type; typedef struct { + FLAC__bool cpuid; + FLAC__bool bswap; FLAC__bool cmov; FLAC__bool mmx; FLAC__bool fxsr; FLAC__bool sse; FLAC__bool sse2; + FLAC__bool sse3; + FLAC__bool ssse3; FLAC__bool _3dnow; FLAC__bool ext3dnow; FLAC__bool extmmx; @@ -60,16 +64,6 @@ typedef struct { FLAC__bool ppc64; } FLAC__CPUInfo_PPC; -extern const unsigned FLAC__CPUINFO_IA32_CPUID_CMOV; -extern const unsigned FLAC__CPUINFO_IA32_CPUID_MMX; -extern const unsigned FLAC__CPUINFO_IA32_CPUID_FXSR; -extern const unsigned FLAC__CPUINFO_IA32_CPUID_SSE; -extern const unsigned FLAC__CPUINFO_IA32_CPUID_SSE2; - -extern const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_3DNOW; -extern const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXT3DNOW; -extern const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXTMMX; - typedef struct { FLAC__bool use_asm; FLAC__CPUInfo_Type type; @@ -84,8 +78,9 @@ void FLAC__cpu_info(FLAC__CPUInfo *info); #ifndef FLAC__NO_ASM #ifdef FLAC__CPU_IA32 #ifdef FLAC__HAS_NASM -unsigned FLAC__cpu_info_asm_ia32(void); -unsigned FLAC__cpu_info_extended_amd_asm_ia32(void); +FLAC__uint32 FLAC__cpu_have_cpuid_asm_ia32(void); +void FLAC__cpu_info_asm_ia32(FLAC__uint32 *flags_edx, FLAC__uint32 *flags_ecx); +FLAC__uint32 FLAC__cpu_info_extended_amd_asm_ia32(void); #endif #endif #endif -- 2.7.4