improve ia-32 checks to include cpuid, bswap, sse3 and ssse3 support
authorJosh Coalson <jcoalson@users.sourceforce.net>
Tue, 13 Mar 2007 06:35:55 +0000 (06:35 +0000)
committerJosh Coalson <jcoalson@users.sourceforce.net>
Tue, 13 Mar 2007 06:35:55 +0000 (06:35 +0000)
src/libFLAC/cpu.c
src/libFLAC/ia32/cpu_asm.nasm
src/libFLAC/include/private/cpu.h

index d8bc0fb..2b34721 100644 (file)
@@ -86,15 +86,19 @@ static void sigill_handler (int sig)
 /* how to get sysctlbyname()? */
 #endif
 
-const unsigned FLAC__CPUINFO_IA32_CPUID_CMOV = 0x00008000;
-const unsigned FLAC__CPUINFO_IA32_CPUID_MMX = 0x00800000;
-const unsigned FLAC__CPUINFO_IA32_CPUID_FXSR = 0x01000000;
-const unsigned FLAC__CPUINFO_IA32_CPUID_SSE = 0x02000000;
-const unsigned FLAC__CPUINFO_IA32_CPUID_SSE2 = 0x04000000;
-
-const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_3DNOW = 0x80000000;
-const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXT3DNOW = 0x40000000;
-const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXTMMX = 0x00400000;
+/* these are flags in EDX of CPUID AX=00000001 */
+static const unsigned FLAC__CPUINFO_IA32_CPUID_CMOV = 0x00008000;
+static const unsigned FLAC__CPUINFO_IA32_CPUID_MMX = 0x00800000;
+static const unsigned FLAC__CPUINFO_IA32_CPUID_FXSR = 0x01000000;
+static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE = 0x02000000;
+static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE2 = 0x04000000;
+/* these are flags in ECX of CPUID AX=00000001 */
+static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE3 = 0x00000001;
+static const unsigned FLAC__CPUINFO_IA32_CPUID_SSSE3 = 0x00000200;
+/* these are flags in EDX of CPUID AX=80000001 */
+static const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_3DNOW = 0x80000000;
+static const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXT3DNOW = 0x40000000;
+static const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXTMMX = 0x00400000;
 
 
 /*
@@ -156,31 +160,63 @@ void FLAC__cpu_info(FLAC__CPUInfo *info)
 #ifdef FLAC__CPU_IA32
        info->type = FLAC__CPUINFO_TYPE_IA32;
 #if !defined FLAC__NO_ASM && defined FLAC__HAS_NASM
-       info->use_asm = true;
-       {
-               unsigned cpuid = FLAC__cpu_info_asm_ia32();
-               info->data.ia32.cmov = (cpuid & FLAC__CPUINFO_IA32_CPUID_CMOV)? true : false;
-               info->data.ia32.mmx  = (cpuid & FLAC__CPUINFO_IA32_CPUID_MMX )? true : false;
-               info->data.ia32.fxsr = (cpuid & FLAC__CPUINFO_IA32_CPUID_FXSR)? true : false;
-               info->data.ia32.sse  = (cpuid & FLAC__CPUINFO_IA32_CPUID_SSE )? true : false;
-               info->data.ia32.sse2 = (cpuid & FLAC__CPUINFO_IA32_CPUID_SSE2)? true : false;
+       info->use_asm = true; /* we assume a minimum of 80386 with FLAC__CPU_IA32 */
+       info->data.ia32.cpuid = FLAC__cpu_have_cpuid_asm_ia32()? true : false;
+       info->data.ia32.bswap = info->data.ia32.cpuid; /* CPUID => BSWAP since it came after */
+       info->data.ia32.cmov = false;
+       info->data.ia32.mmx = false;
+       info->data.ia32.fxsr = false;
+       info->data.ia32.sse = false;
+       info->data.ia32.sse2 = false;
+       info->data.ia32.sse3 = false;
+       info->data.ia32.ssse3 = false;
+       info->data.ia32._3dnow = false;
+       info->data.ia32.ext3dnow = false;
+       info->data.ia32.extmmx = false;
+       if(info->data.ia32.cpuid) {
+               /* http://www.sandpile.org/ia32/cpuid.htm */
+               FLAC__uint32 flags_edx, flags_ecx;
+               FLAC__cpu_info_asm_ia32(&flags_edx, &flags_ecx);
+               info->data.ia32.cmov  = (flags_edx & FLAC__CPUINFO_IA32_CPUID_CMOV )? true : false;
+               info->data.ia32.mmx   = (flags_edx & FLAC__CPUINFO_IA32_CPUID_MMX  )? true : false;
+               info->data.ia32.fxsr  = (flags_edx & FLAC__CPUINFO_IA32_CPUID_FXSR )? true : false;
+               info->data.ia32.sse   = (flags_edx & FLAC__CPUINFO_IA32_CPUID_SSE  )? true : false;
+               info->data.ia32.sse2  = (flags_edx & FLAC__CPUINFO_IA32_CPUID_SSE2 )? true : false;
+               info->data.ia32.sse3  = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE3 )? true : false;
+               info->data.ia32.ssse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSSE3)? true : false;
 
 #ifdef FLAC__USE_3DNOW
-               cpuid = FLAC__cpu_info_extended_amd_asm_ia32();
-               info->data.ia32._3dnow = (cpuid & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_3DNOW)? true : false;
-               info->data.ia32.ext3dnow = (cpuid & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXT3DNOW)? true : false;
-               info->data.ia32.extmmx = (cpuid & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXTMMX)? true : false;
+               flags_edx = FLAC__cpu_info_extended_amd_asm_ia32();
+               info->data.ia32._3dnow   = (flags_edx & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_3DNOW   )? true : false;
+               info->data.ia32.ext3dnow = (flags_edx & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXT3DNOW)? true : false;
+               info->data.ia32.extmmx   = (flags_edx & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXTMMX  )? true : false;
 #else
                info->data.ia32._3dnow = info->data.ia32.ext3dnow = info->data.ia32.extmmx = false;
 #endif
 
+#ifdef DEBUG
+               fprintf(stderr, "CPU info (IA-32):\n");
+               fprintf(stderr, "  CPUID ...... %c\n", info->data.ia32.cpuid   ? 'Y' : 'n');
+               fprintf(stderr, "  BSWAP ...... %c\n", info->data.ia32.bswap   ? 'Y' : 'n');
+               fprintf(stderr, "  CMOV ....... %c\n", info->data.ia32.cmov    ? 'Y' : 'n');
+               fprintf(stderr, "  MMX ........ %c\n", info->data.ia32.mmx     ? 'Y' : 'n');
+               fprintf(stderr, "  FXSR ....... %c\n", info->data.ia32.fxsr    ? 'Y' : 'n');
+               fprintf(stderr, "  SSE ........ %c\n", info->data.ia32.sse     ? 'Y' : 'n');
+               fprintf(stderr, "  SSE2 ....... %c\n", info->data.ia32.sse2    ? 'Y' : 'n');
+               fprintf(stderr, "  SSE3 ....... %c\n", info->data.ia32.sse3    ? 'Y' : 'n');
+               fprintf(stderr, "  SSSE3 ...... %c\n", info->data.ia32.ssse3   ? 'Y' : 'n');
+               fprintf(stderr, "  3DNow! ..... %c\n", info->data.ia32._3dnow  ? 'Y' : 'n');
+               fprintf(stderr, "  3DNow!-ext . %c\n", info->data.ia32.ext3dnow? 'Y' : 'n');
+               fprintf(stderr, "  3DNow!-MMX . %c\n", info->data.ia32.extmmx  ? 'Y' : 'n');
+#endif
+
                /*
                 * now have to check for OS support of SSE/SSE2
                 */
                if(info->data.ia32.fxsr || info->data.ia32.sse || info->data.ia32.sse2) {
 #if defined FLAC__NO_SSE_OS
                        /* assume user knows better than us; turn it off */
-                       info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = false;
+                       info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
 #elif defined FLAC__SSE_OS
                        /* assume user knows better than us; leave as detected above */
 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__) || defined(__APPLE__)
@@ -190,21 +226,21 @@ void FLAC__cpu_info(FLAC__CPUInfo *info)
                        len = sizeof(sse); sse = sse || (sysctlbyname("hw.instruction_sse", &sse, &len, NULL, 0) == 0 && sse);
                        len = sizeof(sse); sse = sse || (sysctlbyname("hw.optional.sse"   , &sse, &len, NULL, 0) == 0 && sse); /* __APPLE__ ? */
                        if(!sse)
-                               info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = false;
+                               info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
 #elif defined(__NetBSD__) || defined (__OpenBSD__)
 # if __NetBSD_Version__ >= 105250000 || (defined __OpenBSD__)
                        int val = 0, mib[2] = { CTL_MACHDEP, CPU_SSE };
                        size_t len = sizeof(val);
                        if(sysctl(mib, 2, &val, &len, NULL, 0) < 0 || !val)
-                               info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = false;
+                               info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
                        else { /* double-check SSE2 */
                                mib[1] = CPU_SSE2;
                                len = sizeof(val);
                                if(sysctl(mib, 2, &val, &len, NULL, 0) < 0 || !val)
-                                       info->data.ia32.sse2 = false;
+                                       info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
                        }
 # else
-                       info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = false;
+                       info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
 # endif
 #elif defined(__linux__)
                        int sse = 0;
@@ -243,7 +279,7 @@ void FLAC__cpu_info(FLAC__CPUInfo *info)
                        }
 
                        if(!sse)
-                               info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = false;
+                               info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
 #elif defined(_MSC_VER)
 # ifdef USE_TRY_CATCH_FLAVOR
                        _try {
@@ -260,7 +296,7 @@ void FLAC__cpu_info(FLAC__CPUInfo *info)
                        }
                        _except(EXCEPTION_EXECUTE_HANDLER) {
                                if (_exception_code() == STATUS_ILLEGAL_INSTRUCTION)
-                                       info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = false;
+                                       info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
                        }
 # else
                        int sse = 0;
@@ -291,12 +327,16 @@ void FLAC__cpu_info(FLAC__CPUInfo *info)
                        }
                        SetUnhandledExceptionFilter(save);
                        if(!sse)
-                               info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = false;
+                               info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
 # endif
 #else
                        /* no way to test, disable to be safe */
-                       info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = false;
+                       info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
+#endif
+#ifdef DEBUG
+               fprintf(stderr, "  SSE OS sup . %c\n", info->data.ia32.sse     ? 'Y' : 'n');
 #endif
+
                }
        }
 #else
index 2f669a2..ff92090 100644 (file)
@@ -32,6 +32,7 @@
 
        data_section
 
+cglobal FLAC__cpu_have_cpuid_asm_ia32
 cglobal FLAC__cpu_info_asm_ia32
 cglobal FLAC__cpu_info_extended_amd_asm_ia32
 
@@ -39,8 +40,11 @@ cglobal FLAC__cpu_info_extended_amd_asm_ia32
 
 ; **********************************************************************
 ;
+; FLAC__uint32 FLAC__cpu_have_cpuid_asm_ia32()
+;
 
-have_cpuid:
+cident FLAC__cpu_have_cpuid_asm_ia32
+       push    ebx
        pushfd
        pop     eax
        mov     edx, eax
@@ -56,26 +60,42 @@ have_cpuid:
 .no_cpuid:
        xor     eax, eax
 .end:
+       pop     ebx
        ret
 
+; **********************************************************************
+;
+; void FLAC__cpu_info_asm_ia32(FLAC__uint32 *flags_edx, FLAC__uint32 *flags_ecx)
+;
+
 cident FLAC__cpu_info_asm_ia32
+       ;[esp + 8] == flags_edx
+       ;[esp + 12] == flags_ecx
+
        push    ebx
-       call    have_cpuid
+       call    FLAC__cpu_have_cpuid_asm_ia32
        test    eax, eax
        jz      .no_cpuid
        mov     eax, 1
        cpuid
-       mov     eax, edx
+       mov     ebx, [esp + 8]
+       mov     [ebx], edx
+       mov     ebx, [esp + 12]
+       mov     [ebx], ecx
        jmp     .end
-.no_cpuid:
+.no_cpuid
        xor     eax, eax
+       mov     ebx, [esp + 8]
+       mov     [ebx], eax
+       mov     ebx, [esp + 12]
+       mov     [ebx], eax
 .end
        pop     ebx
        ret
 
 cident FLAC__cpu_info_extended_amd_asm_ia32
        push    ebx
-       call    have_cpuid
+       call    FLAC__cpu_have_cpuid_asm_ia32
        test    eax, eax
        jz      .no_cpuid
        mov     eax, 0x80000000
index 092f20b..651bb22 100644 (file)
@@ -45,11 +45,15 @@ typedef enum {
 } FLAC__CPUInfo_Type;
 
 typedef struct {
+       FLAC__bool cpuid;
+       FLAC__bool bswap;
        FLAC__bool cmov;
        FLAC__bool mmx;
        FLAC__bool fxsr;
        FLAC__bool sse;
        FLAC__bool sse2;
+       FLAC__bool sse3;
+       FLAC__bool ssse3;
        FLAC__bool _3dnow;
        FLAC__bool ext3dnow;
        FLAC__bool extmmx;
@@ -60,16 +64,6 @@ typedef struct {
        FLAC__bool ppc64;
 } FLAC__CPUInfo_PPC;
 
-extern const unsigned FLAC__CPUINFO_IA32_CPUID_CMOV;
-extern const unsigned FLAC__CPUINFO_IA32_CPUID_MMX;
-extern const unsigned FLAC__CPUINFO_IA32_CPUID_FXSR;
-extern const unsigned FLAC__CPUINFO_IA32_CPUID_SSE;
-extern const unsigned FLAC__CPUINFO_IA32_CPUID_SSE2;
-
-extern const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_3DNOW;
-extern const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXT3DNOW;
-extern const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXTMMX;
-
 typedef struct {
        FLAC__bool use_asm;
        FLAC__CPUInfo_Type type;
@@ -84,8 +78,9 @@ void FLAC__cpu_info(FLAC__CPUInfo *info);
 #ifndef FLAC__NO_ASM
 #ifdef FLAC__CPU_IA32
 #ifdef FLAC__HAS_NASM
-unsigned FLAC__cpu_info_asm_ia32(void);
-unsigned FLAC__cpu_info_extended_amd_asm_ia32(void);
+FLAC__uint32 FLAC__cpu_have_cpuid_asm_ia32(void);
+void         FLAC__cpu_info_asm_ia32(FLAC__uint32 *flags_edx, FLAC__uint32 *flags_ecx);
+FLAC__uint32 FLAC__cpu_info_extended_amd_asm_ia32(void);
 #endif
 #endif
 #endif