From 278459fcff30f5547b9d5cd3eb512048edd5d5cb Mon Sep 17 00:00:00 2001 From: David Schleef Date: Tue, 28 Apr 2009 22:00:43 -0700 Subject: [PATCH] Detect CPU flags for SSE --- orc-float/orcfloat-sse.c | 2 +- orc-pixel/orcpixel-sse.c | 2 +- orc/orccpu-x86.c | 150 +++++++++++++++++++++++------------------------ orc/orccpu.c | 2 +- orc/orccpu.h | 2 +- orc/orcexecutor.c | 4 +- orc/orcopcodes.c | 4 +- orc/orcprogram-c.c | 2 +- orc/orcprogram-powerpc.c | 2 +- orc/orcprogram-sse.c | 11 +++- orc/orcprogram.h | 24 ++++---- orc/orcrules-arm.c | 2 +- orc/orcrules-mmx.c | 2 +- orc/orcrules-sse.c | 15 ++--- 14 files changed, 115 insertions(+), 109 deletions(-) diff --git a/orc-float/orcfloat-sse.c b/orc-float/orcfloat-sse.c index 009f8bc..0703907 100644 --- a/orc-float/orcfloat-sse.c +++ b/orc-float/orcfloat-sse.c @@ -180,7 +180,7 @@ orc_float_sse_register_rules (OrcTarget *target) OrcRuleSet *rule_set; rule_set = orc_rule_set_new (orc_opcode_set_get("float"), - orc_target_get_by_name ("sse")); + orc_target_get_by_name ("sse"), ORC_TARGET_SSE_SSE2); orc_rule_register (rule_set, "addf", sse_rule_addf, NULL); orc_rule_register (rule_set, "subf", sse_rule_subf, NULL); diff --git a/orc-pixel/orcpixel-sse.c b/orc-pixel/orcpixel-sse.c index 439a77b..9badcc4 100644 --- a/orc-pixel/orcpixel-sse.c +++ b/orc-pixel/orcpixel-sse.c @@ -124,7 +124,7 @@ orc_pixel_sse_register_rules (OrcTarget *target) OrcRuleSet *rule_set; rule_set = orc_rule_set_new (orc_opcode_set_get("pixel"), - orc_target_get_by_name ("sse")); + orc_target_get_by_name ("sse"), ORC_TARGET_SSE_SSE2); orc_rule_register (rule_set, "compin", sse_rule_compin, NULL); orc_rule_register (rule_set, "compadd", sse_rule_compadd, NULL); diff --git a/orc/orccpu-x86.c b/orc/orccpu-x86.c index b723523..4a96f53 100644 --- a/orc/orccpu-x86.c +++ b/orc/orccpu-x86.c @@ -30,6 +30,7 @@ #endif #include #include +#include #include #include @@ -61,9 +62,10 @@ #ifdef USE_I386_CPUINFO -static void -orc_cpu_i386_getflags_cpuinfo (char *cpuinfo) +static unsigned int +orc_sse_getflags_cpuinfo (char *cpuinfo) { + unsigned int sse_flags; char *cpuinfo_flags; char **flags; char **f; @@ -76,47 +78,33 @@ orc_cpu_i386_getflags_cpuinfo (char *cpuinfo) flags = strsplit(cpuinfo_flags); for (f = flags; *f; f++) { - if (strcmp (*f, "cmov") == 0) { - ORC_DEBUG ("cpu flag %s", *f); - orc_cpu_flags |= ORC_CPU_FLAG_CMOV; - } - if (strcmp (*f, "mmx") == 0) { - ORC_DEBUG ("cpu flag %s", *f); - orc_cpu_flags |= ORC_CPU_FLAG_MMX; - } - if (strcmp (*f, "sse") == 0) { + if (strcmp (*f, "sse2") == 0) { ORC_DEBUG ("cpu flag %s", *f); - orc_cpu_flags |= ORC_CPU_FLAG_SSE; + sse_flags |= ORC_TARGET_SSE_SSE2; } - if (strcmp (*f, "mmxext") == 0) { + if (strcmp (*f, "pni") == 0) { ORC_DEBUG ("cpu flag %s", *f); - orc_cpu_flags |= ORC_CPU_FLAG_MMXEXT; + sse_flags |= ORC_TARGET_SSE_SSE3; } - if (strcmp (*f, "sse2") == 0) { + if (strcmp (*f, "ssse3") == 0) { ORC_DEBUG ("cpu flag %s", *f); - orc_cpu_flags |= ORC_CPU_FLAG_SSE2; - orc_cpu_flags |= ORC_CPU_FLAG_MMXEXT; + sse_flags |= ORC_TARGET_SSE_SSSE3; } - if (strcmp (*f, "3dnow") == 0) { + if (strcmp (*f, "sse4_1") == 0) { ORC_DEBUG ("cpu flag %s", *f); - orc_cpu_flags |= ORC_CPU_FLAG_3DNOW; + sse_flags |= ORC_TARGET_SSE_SSSE4_1; } - if (strcmp (*f, "3dnowext") == 0) { + if (strcmp (*f, "sse4_2") == 0) { ORC_DEBUG ("cpu flag %s", *f); - orc_cpu_flags |= ORC_CPU_FLAG_3DNOWEXT; + sse_flags |= ORC_TARGET_SSE_SSSE4_2; } - if (strcmp (*f, "sse3") == 0) { + if (strcmp (*f, "sse4a") == 0) { ORC_DEBUG ("cpu flag %s", *f); - orc_cpu_flags |= ORC_CPU_FLAG_SSE3; - orc_cpu_flags |= ORC_CPU_FLAG_SSE2; - orc_cpu_flags |= ORC_CPU_FLAG_MMXEXT; + sse_flags |= ORC_TARGET_SSE_SSSE4A; } - if (strcmp (*f, "ssse3") == 0) { + if (strcmp (*f, "sse5") == 0) { ORC_DEBUG ("cpu flag %s", *f); - orc_cpu_flags |= ORC_CPU_FLAG_SSSE3; - orc_cpu_flags |= ORC_CPU_FLAG_SSE3; - orc_cpu_flags |= ORC_CPU_FLAG_SSE2; - orc_cpu_flags |= ORC_CPU_FLAG_MMXEXT; + orc_cpu_flags |= ORC_CPU_FLAG_SSE5; } free (*f); @@ -124,6 +112,8 @@ orc_cpu_i386_getflags_cpuinfo (char *cpuinfo) free (flags); free (cpuinfo); free (cpuinfo_flags); + + return sse_flags; } #endif @@ -166,12 +156,13 @@ test_cpuid (void *ignored) } #endif -static void -orc_cpu_detect_cpuid (void) +static unsigned int +orc_sse_detect_cpuid (void) { uint32_t eax, ebx, ecx, edx; uint32_t level; char vendor[13] = { 0 }; + unsigned int sse_flags = 0; #if 0 int ret; @@ -190,7 +181,7 @@ orc_cpu_detect_cpuid (void) ORC_DEBUG("cpuid %d %s", level, vendor); if (level < 1) { - return; + return 0; } get_cpuid (0x00000001, &eax, &ebx, &ecx, &edx); @@ -202,37 +193,34 @@ orc_cpu_detect_cpuid (void) #endif /* Intel flags */ - if (edx & (1<<15)) { - orc_cpu_flags |= ORC_CPU_FLAG_CMOV; + if (edx & (1<<26)) { + sse_flags |= ORC_TARGET_SSE_SSE2; } - if (edx & (1<<23)) { - orc_cpu_flags |= ORC_CPU_FLAG_MMX; + if (ecx & (1<<0)) { + sse_flags |= ORC_TARGET_SSE_SSE3; } - if (edx & (1<<25)) { - orc_cpu_flags |= ORC_CPU_FLAG_SSE; + if (ecx & (1<<9)) { + sse_flags |= ORC_TARGET_SSE_SSSE3; } - if (edx & (1<<26)) { - orc_cpu_flags |= ORC_CPU_FLAG_SSE2; - orc_cpu_flags |= ORC_CPU_FLAG_MMXEXT; + if (ecx & (1<<19)) { + sse_flags |= ORC_TARGET_SSE_SSE4_1; } - if (ecx & (1<<0)) { - orc_cpu_flags |= ORC_CPU_FLAG_SSE3; + if (ecx & (1<<20)) { + sse_flags |= ORC_TARGET_SSE_SSE4_2; } if (memcmp (vendor, "AuthenticAMD", 12) == 0) { get_cpuid (0x80000001, &eax, &ebx, &ecx, &edx); /* AMD flags */ - if (edx & (1<<22)) { - orc_cpu_flags |= ORC_CPU_FLAG_MMXEXT; + if (ecx & (1<<6)) { + sse_flags |= ORC_TARGET_SSE_SSE4A; } - if (edx & (1<<31)) { - orc_cpu_flags |= ORC_CPU_FLAG_3DNOW; - } - if (edx & (1<<30)) { - orc_cpu_flags |= ORC_CPU_FLAG_3DNOWEXT; + if (ecx & (1<<11)) { + sse_flags |= ORC_TARGET_SSE_SSE5; } +#if 0 get_cpuid (0x80000005, &eax, &ebx, &ecx, &edx); ORC_INFO("L1 D-cache: %d kbytes, %d-way, %d lines/tag, %d line size", @@ -243,46 +231,51 @@ orc_cpu_detect_cpuid (void) get_cpuid (0x80000006, &eax, &ebx, &ecx, &edx); ORC_INFO("L2 cache: %d kbytes, %d assoc, %d lines/tag, %d line size", (ecx>>16)&0xffff, (ecx>>12)&0xf, (ecx>>8)&0xf, ecx&0xff); +#endif } + + return sse_flags; } #endif #ifdef USE_I386_GETISAX -static void -orc_cpu_detect_getisax (void) +static unsigned int +orc_sse_detect_getisax (void) { + unsigned int sse_flags; uint_t ui; getisax (&ui, 1); - if (ui & AV_386_CMOV) { - orc_cpu_flags |= ORC_CPU_FLAG_CMOV; - } - if (ui & AV_386_MMX) { - orc_cpu_flags |= ORC_CPU_FLAG_MMX; - } - if (ui & AV_386_SSE) { - orc_cpu_flags |= ORC_CPU_FLAG_SSE; - } if (ui & AV_386_SSE2) { - orc_cpu_flags |= ORC_CPU_FLAG_SSE2; - orc_cpu_flags |= ORC_CPU_FLAG_MMXEXT; + sse_flags |= ORC_TARGET_SSE_SSE2; } if (ui & AV_386_SSE3) { - orc_cpu_flags |= ORC_CPU_FLAG_SSE3; + sse_flags |= ORC_TARGET_SSE_SSE3; } - if (ui & AV_386_AMD_3DNow) { - orc_cpu_flags |= ORC_CPU_FLAG_3DNOW; + + /* guesses. if these fail to compile, please fix */ + if (ui & AV_386_SSSE3) { + sse_flags |= ORC_TARGET_SSE_SSSE3; + } + if (ui & AV_386_SSE4_1) { + sse_flags |= ORC_TARGET_SSE_SSE4_1; } - if (ui & AV_386_AMD_3DNowx) { - orc_cpu_flags |= ORC_CPU_FLAG_3DNOWEXT; + if (ui & AV_386_SSE4_2) { + sse_flags |= ORC_TARGET_SSE_SSE4_2; } - if (ui & AV_386_AMD_MMX) { - orc_cpu_flags |= ORC_CPU_FLAG_MMXEXT; + if (ui & AV_386_SSE4A) { + sse_flags |= ORC_TARGET_SSE_SSE4A; } + if (ui & AV_386_SSE5) { + sse_flags |= ORC_TARGET_SSE_SSE5; + } + + return sse_flags; } #endif +#if 0 /* Reduce the set of CPU capabilities detected by whatever detection mechanism * was chosen, according to kernel limitations. SSE requires kernel support for * use. @@ -320,21 +313,22 @@ orc_cpu_detect_kernel_support (void) #endif #endif } +#endif -void -orc_cpu_detect_arch(void) +unsigned int +orc_sse_get_cpu_flags(void) { + //orc_cpu_detect_kernel_support (); + #ifdef USE_I386_CPUID - orc_cpu_detect_cpuid (); + return orc_sse_detect_cpuid (); #endif #ifdef USE_I386_GETISAX - orc_cpu_detect_getisax (); + return orc_sse_detect_getisax (); #endif #ifdef USE_I386_CPUINFO - orc_cpu_detect_cpuinfo (); + return orc_sse_detect_cpuinfo (); #endif - - orc_cpu_detect_kernel_support (); } diff --git a/orc/orccpu.c b/orc/orccpu.c index 1a2ccd4..ee5aaaf 100644 --- a/orc/orccpu.c +++ b/orc/orccpu.c @@ -105,7 +105,7 @@ _orc_cpu_init (void) { const char *envvar; - orc_cpu_detect_arch(); + //orc_cpu_detect_arch(); envvar = getenv ("ORC_CPU_FLAGS"); if (envvar != NULL) { diff --git a/orc/orccpu.h b/orc/orccpu.h index fda3e34..9bfa378 100644 --- a/orc/orccpu.h +++ b/orc/orccpu.h @@ -50,7 +50,7 @@ typedef enum { ORC_CPU_FLAG_SSSE3 = (1<<12) } OrcCpuFlag; -unsigned int orc_cpu_get_flags (void); +unsigned int orc_sse_get_cpu_flags (void); void _orc_cpu_init (void); diff --git a/orc/orcexecutor.c b/orc/orcexecutor.c index 74c928e..a142330 100644 --- a/orc/orcexecutor.c +++ b/orc/orcexecutor.c @@ -77,13 +77,13 @@ orc_executor_set_param_str (OrcExecutor *ex, const char *name, int value) } int -orc_executor_get_accumulator (OrcExecutor *ex, int var, int value) +orc_executor_get_accumulator (OrcExecutor *ex, int var) { return ex->accumulators[var]; } int -orc_executor_get_accumulator_str (OrcExecutor *ex, const char *name, int value) +orc_executor_get_accumulator_str (OrcExecutor *ex, const char *name) { int var; var = orc_program_find_var_by_name (ex->program, name); diff --git a/orc/orcopcodes.c b/orc/orcopcodes.c index 712784a..67e537a 100644 --- a/orc/orcopcodes.c +++ b/orc/orcopcodes.c @@ -103,7 +103,8 @@ orc_opcode_register (const char *name, int n_dest, int n_src, #endif OrcRuleSet * -orc_rule_set_new (OrcOpcodeSet *opcode_set, OrcTarget *target) +orc_rule_set_new (OrcOpcodeSet *opcode_set, OrcTarget *target, + unsigned int required_flags) { OrcRuleSet *rule_set; @@ -113,6 +114,7 @@ orc_rule_set_new (OrcOpcodeSet *opcode_set, OrcTarget *target) memset (rule_set, 0, sizeof(OrcRuleSet)); rule_set->opcode_set = opcode_set; + rule_set->required_target_flags = required_flags; rule_set->rules = malloc (sizeof(OrcRule) * opcode_set->n_opcodes); memset (rule_set->rules, 0, sizeof(OrcRule) * opcode_set->n_opcodes); diff --git a/orc/orcprogram-c.c b/orc/orcprogram-c.c index 71234b5..f9b6089 100644 --- a/orc/orcprogram-c.c +++ b/orc/orcprogram-c.c @@ -256,7 +256,7 @@ orc_c_init (void) orc_target_register (&c_target); - rule_set = orc_rule_set_new (orc_opcode_set_get("sys"), &c_target); + rule_set = orc_rule_set_new (orc_opcode_set_get("sys"), &c_target, 0); #define BINARY_SB(a,b) orc_rule_register (rule_set, #a , c_rule_ ## a, NULL); #define BINARY_UB(a,b) orc_rule_register (rule_set, #a , c_rule_ ## a, NULL); diff --git a/orc/orcprogram-powerpc.c b/orc/orcprogram-powerpc.c index 3bfa698..b5b4b04 100644 --- a/orc/orcprogram-powerpc.c +++ b/orc/orcprogram-powerpc.c @@ -667,7 +667,7 @@ orc_compiler_powerpc_register_rules (OrcTarget *target) { OrcRuleSet *rule_set; - rule_set = orc_rule_set_new (orc_opcode_set_get("sys"), target); + rule_set = orc_rule_set_new (orc_opcode_set_get("sys"), target, 0); orc_rule_register (rule_set, "addw", powerpc_rule_addw, NULL); orc_rule_register (rule_set, "subw", powerpc_rule_subw, NULL); diff --git a/orc/orcprogram-sse.c b/orc/orcprogram-sse.c index 4658bb1..d725e86 100644 --- a/orc/orcprogram-sse.c +++ b/orc/orcprogram-sse.c @@ -12,6 +12,7 @@ #include #include #include +#include #define SIZE 65536 @@ -58,6 +59,14 @@ orc_compiler_sse_init (OrcCompiler *compiler) compiler->is_64bit = FALSE; #endif +#if defined(HAVE_AMD64) || defined(HAVE_I386) + compiler->target_flags = orc_sse_get_cpu_flags (); +#else + compiler->target_flags = ORC_TARGET_SSE_SSE2; + compiler->target_flags |= ORC_TARGET_SSE_SSE3; + compiler->target_flags |= ORC_TARGET_SSE_SSSE3; +#endif + if (compiler->is_64bit) { for(i=ORC_GP_REG_BASE;ivalid_regs[i] = 1; @@ -124,7 +133,7 @@ orc_compiler_sse_init (OrcCompiler *compiler) break; } - //compiler->long_jumps = TRUE; + compiler->long_jumps = TRUE; } void diff --git a/orc/orcprogram.h b/orc/orcprogram.h index f97523b..7360ced 100644 --- a/orc/orcprogram.h +++ b/orc/orcprogram.h @@ -63,15 +63,15 @@ typedef void (*OrcRuleEmitFunc)(OrcCompiler *p, void *user, OrcInstruction *insn orc_debug_print(ORC_DEBUG_ERROR, __FILE__, ORC_FUNCTION, __LINE__, __VA_ARGS__); \ } while (0) -#if 0 enum { - ORC_TARGET_C = 0, - ORC_TARGET_ALTIVEC = 1, - ORC_TARGET_MMX = 2, - ORC_TARGET_SSE = 3, - ORC_TARGET_ARM = 4 + ORC_TARGET_SSE_SSE2 = (1<<0), + ORC_TARGET_SSE_SSE3 = (1<<1), + ORC_TARGET_SSE_SSSE3 = (1<<2), + ORC_TARGET_SSE_SSE4_1 = (1<<3), + ORC_TARGET_SSE_SSE4_2 = (1<<4), + ORC_TARGET_SSE_SSE4A = (1<<5), + ORC_TARGET_SSE_SSE5 = (1<<6) }; -#endif typedef enum { ORC_VAR_TYPE_TEMP, @@ -155,6 +155,7 @@ struct _OrcRule { struct _OrcRuleSet { OrcOpcodeSet *opcode_set; + int required_target_flags; OrcRule *rules; int n_rules; @@ -218,6 +219,8 @@ struct _OrcCompiler { OrcProgram *program; OrcTarget *target; + unsigned int target_flags; + OrcInstruction insns[ORC_N_INSNS]; int n_insns; @@ -333,8 +336,8 @@ void orc_executor_set_array (OrcExecutor *ex, int var, void *ptr); void orc_executor_set_array_str (OrcExecutor *ex, const char *name, void *ptr); void orc_executor_set_param (OrcExecutor *ex, int var, int value); void orc_executor_set_param_str (OrcExecutor *ex, const char *name, int value); -int orc_executor_get_accumulator (OrcExecutor *ex, int var, int value); -int orc_executor_get_accumulator_str (OrcExecutor *ex, const char *name, int value); +int orc_executor_get_accumulator (OrcExecutor *ex, int var); +int orc_executor_get_accumulator_str (OrcExecutor *ex, const char *name); void orc_executor_set_n (OrcExecutor *ex, int n); void orc_executor_emulate (OrcExecutor *ex); void orc_executor_run (OrcExecutor *ex); @@ -343,7 +346,8 @@ OrcOpcodeSet *orc_opcode_set_get (const char *name); int orc_opcode_set_find_by_name (OrcOpcodeSet *opcode_set, const char *name); int orc_opcode_register_static (OrcStaticOpcode *sopcode, char *prefix); -OrcRuleSet * orc_rule_set_new (OrcOpcodeSet *opcode_set, OrcTarget *target); +OrcRuleSet * orc_rule_set_new (OrcOpcodeSet *opcode_set, OrcTarget *target, + unsigned int required_flags); void orc_rule_register (OrcRuleSet *rule_set, const char *opcode_name, OrcRuleEmitFunc emit, void *emit_user); OrcRule * orc_target_get_rule (OrcTarget *target, OrcStaticOpcode *opcode); diff --git a/orc/orcrules-arm.c b/orc/orcrules-arm.c index c733735..1544188 100644 --- a/orc/orcrules-arm.c +++ b/orc/orcrules-arm.c @@ -492,7 +492,7 @@ orc_compiler_arm_register_rules (OrcTarget *target) { OrcRuleSet *rule_set; - rule_set = orc_rule_set_new (orc_opcode_set_get("sys"), target); + rule_set = orc_rule_set_new (orc_opcode_set_get("sys"), target, 0); #if 0 #define REG(x) \ diff --git a/orc/orcrules-mmx.c b/orc/orcrules-mmx.c index 7868c54..263140e 100644 --- a/orc/orcrules-mmx.c +++ b/orc/orcrules-mmx.c @@ -140,7 +140,7 @@ orc_compiler_mmx_register_rules (OrcTarget *target) { OrcRuleSet *rule_set; - rule_set = orc_rule_set_new (orc_opcode_set_get("sys"), target); + rule_set = orc_rule_set_new (orc_opcode_set_get("sys"), target, 0); orc_rule_register (rule_set, "addw", mmx_rule_addw, NULL); orc_rule_register (rule_set, "subw", mmx_rule_subw, NULL); diff --git a/orc/orcrules-sse.c b/orc/orcrules-sse.c index 514df2d..f38ddcc 100644 --- a/orc/orcrules-sse.c +++ b/orc/orcrules-sse.c @@ -861,7 +861,8 @@ orc_compiler_sse_register_rules (OrcTarget *target) orc_rule_register (rule_set, #x , sse_rule_ ## x, NULL) /* SSE 2 */ - rule_set = orc_rule_set_new (orc_opcode_set_get("sys"), target); + rule_set = orc_rule_set_new (orc_opcode_set_get("sys"), target, + ORC_TARGET_SSE_SSE2); REG(addb); REG(addssb); @@ -949,9 +950,9 @@ orc_compiler_sse_register_rules (OrcTarget *target) /* SSE 3 -- no rules */ -if (0) { /* SSSE 3 */ - rule_set = orc_rule_set_new (orc_opcode_set_get("sys"), target); + rule_set = orc_rule_set_new (orc_opcode_set_get("sys"), target, + ORC_TARGET_SSE_SSSE3); orc_rule_register (rule_set, "signb", sse_rule_signX, (void *)0); orc_rule_register (rule_set, "signw", sse_rule_signX, (void *)1); @@ -959,11 +960,10 @@ if (0) { REG(absb); REG(absw); REG(absl); -} -if (0) { /* SSE 4.1 */ - rule_set = orc_rule_set_new (orc_opcode_set_get("sys"), target); + rule_set = orc_rule_set_new (orc_opcode_set_get("sys"), target, + ORC_TARGET_SSE_SSE4_1); REG(maxsb); REG(minsb); @@ -975,12 +975,9 @@ if (0) { REG(minul); REG(mulll); orc_rule_register (rule_set, "convsuslw", sse_rule_convsuslw, NULL); -} /* SSE 4.2 -- no rules */ /* SSE 4a -- no rules */ - - /* SSE 5 -- no rules */ } -- 2.7.4