From b042aae70d4c1790a42033d23816bc092347d846 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jos=C3=A9=20Fonseca?= Date: Wed, 10 Jul 2013 07:56:17 +0100 Subject: [PATCH] util/u_math: Use xmmintrin.h whenever possible. It seems __builtin_ia32_ldmxcsr is only available on gcc and only when -msse is used. xmmintrin.h/pmmintrin.h provide portable intrinsics, but these too are only available with gcc when -msse/-msse3 are set. scons build always sets -msse on x86 builds, but autotools doesn't seem to. We could try to get this working on gcc x86 without -msse by emitting assembly, but I believe that in this day and age we really should be building Mesa with -msse and -msse2. --- src/gallium/auxiliary/util/u_math.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/src/gallium/auxiliary/util/u_math.c b/src/gallium/auxiliary/util/u_math.c index 2487bc7..f3fe392 100644 --- a/src/gallium/auxiliary/util/u_math.c +++ b/src/gallium/auxiliary/util/u_math.c @@ -27,9 +27,17 @@ +#include "pipe/p_config.h" #include "util/u_math.h" #include "util/u_cpu_detect.h" +#if defined(PIPE_ARCH_SSE) +#include +/* This is defined in pmmintrin.h, but it can only be included when -msse3 is + * used, so just define it here to avoid further. */ +#define _MM_DENORMALS_ZERO_MASK 0x0040 +#endif + /** 2^x, for x in [-1.0, 1.0) */ float pow2_table[POW2_TABLE_SIZE]; @@ -81,9 +89,9 @@ util_fpstate_get(void) { unsigned mxcsr = 0; -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) +#if defined(PIPE_ARCH_SSE) if (util_cpu_caps.has_sse) { - mxcsr = __builtin_ia32_stmxcsr(); + mxcsr = _mm_getcsr(); } #endif @@ -99,13 +107,13 @@ util_fpstate_get(void) unsigned util_fpstate_set_denorms_to_zero(unsigned current_mxcsr) { -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) -#define MXCSR_DAZ (1 << 6) /* Enable denormals are zero mode */ -#define MXCSR_FTZ (1 << 15) /* Enable flush to zero mode */ +#if defined(PIPE_ARCH_SSE) if (util_cpu_caps.has_sse) { - current_mxcsr |= MXCSR_FTZ; + /* Enable flush to zero mode */ + current_mxcsr |= _MM_FLUSH_ZERO_MASK; if (util_cpu_caps.has_sse3) { - current_mxcsr |= MXCSR_DAZ; + /* Enable denormals are zero mode */ + current_mxcsr |= _MM_DENORMALS_ZERO_MASK; } util_fpstate_set(current_mxcsr); } @@ -121,9 +129,9 @@ util_fpstate_set_denorms_to_zero(unsigned current_mxcsr) void util_fpstate_set(unsigned mxcsr) { -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) +#if defined(PIPE_ARCH_SSE) if (util_cpu_caps.has_sse) { - __builtin_ia32_ldmxcsr(mxcsr); + _mm_setcsr(mxcsr); } #endif } -- 2.7.4