From: Liubov Dmitrieva Date: Fri, 28 Jun 2013 22:28:50 +0000 (-0700) Subject: Skip SSE4.2 versions on Intel Silvermont X-Git-Tag: upstream/2.20~1887 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=6308fd9a46a2f4aa550886e6f58190fb209ef027;p=platform%2Fupstream%2Flinaro-glibc.git Skip SSE4.2 versions on Intel Silvermont SSE2/SSSE3 versions are faster than SSE4.2 versions on Intel Silvermont. --- diff --git a/ChangeLog b/ChangeLog index c5551b8..406ca28 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +2013-06-28 Liubov Dmitrieva + + * sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features): Set + bit_Slow_SSE4_2 and bit_Prefer_PMINUB_for_stringop for Intel + Silvermont. + * sysdeps/x86_64/multiarch/init-arch.h (bit_Slow_SSE4_2): New + macro. + (index_Slow_SSE4_2): Likewise. + (index_Prefer_PMINUB_for_stringop): Likewise. + * sysdeps/x86_64/multiarch/strchr.S: Skip SSE4.2 version if + bit_Slow_SSE4_2 is set. + * sysdeps/x86_64/multiarch/strcmp.S: Likewise. + * sysdeps/x86_64/multiarch/strrchr.S: Likewise. + 2013-06-28 Ryan S. Arnold * sysdeps/powerpc/Makefile: Add comment about generating an offset to diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c index 9524aee..5583961 100644 --- a/sysdeps/x86_64/multiarch/init-arch.c +++ b/sysdeps/x86_64/multiarch/init-arch.c @@ -81,8 +81,16 @@ __init_cpu_features (void) case 0x37: /* Unaligned load versions are faster than SSSE3 on Silvermont. */ +#if index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop +# error index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop +#endif +#if index_Fast_Unaligned_Load != index_Slow_SSE4_2 +# error index_Fast_Unaligned_Load != index_Slow_SSE4_2 +#endif __cpu_features.feature[index_Fast_Unaligned_Load] - |= bit_Fast_Unaligned_Load; + |= (bit_Fast_Unaligned_Load + | bit_Prefer_PMINUB_for_stringop + | bit_Slow_SSE4_2); break; default: diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h index 28edbf7..0cb5f5b 100644 --- a/sysdeps/x86_64/multiarch/init-arch.h +++ b/sysdeps/x86_64/multiarch/init-arch.h @@ -23,6 +23,7 @@ #define bit_AVX_Usable (1 << 6) #define bit_FMA_Usable (1 << 7) #define bit_FMA4_Usable (1 << 8) +#define bit_Slow_SSE4_2 (1 << 9) /* CPUID Feature flags. */ @@ -62,6 +63,7 @@ # define index_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE # define index_FMA_Usable FEATURE_INDEX_1*FEATURE_SIZE # define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE +# define index_Slow_SSE4_2 FEATURE_INDEX_1*FEATURE_SIZE #else /* __ASSEMBLER__ */ @@ -156,9 +158,11 @@ extern const struct cpu_features *__get_cpu_features (void) # define index_Fast_Copy_Backward FEATURE_INDEX_1 # define index_Slow_BSF FEATURE_INDEX_1 # define index_Fast_Unaligned_Load FEATURE_INDEX_1 +# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1 # define index_AVX_Usable FEATURE_INDEX_1 # define index_FMA_Usable FEATURE_INDEX_1 # define index_FMA4_Usable FEATURE_INDEX_1 +# define index_Slow_SSE4_2 FEATURE_INDEX_1 # define HAS_ARCH_FEATURE(name) \ ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0) diff --git a/sysdeps/x86_64/multiarch/strchr.S b/sysdeps/x86_64/multiarch/strchr.S index 6860329..f170238 100644 --- a/sysdeps/x86_64/multiarch/strchr.S +++ b/sysdeps/x86_64/multiarch/strchr.S @@ -29,6 +29,8 @@ ENTRY(strchr) jne 1f call __init_cpu_features 1: leaq __strchr_sse2(%rip), %rax + testl $bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip) + jnz 2f testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) jz 2f leaq __strchr_sse42(%rip), %rax diff --git a/sysdeps/x86_64/multiarch/strcmp.S b/sysdeps/x86_64/multiarch/strcmp.S index f69aaf4..1d4d711 100644 --- a/sysdeps/x86_64/multiarch/strcmp.S +++ b/sysdeps/x86_64/multiarch/strcmp.S @@ -88,14 +88,16 @@ ENTRY(STRCMP) jne 1f call __init_cpu_features 1: + testl $bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip) + jnz 2f leaq STRCMP_SSE42(%rip), %rax testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) - jnz 2f - leaq STRCMP_SSSE3(%rip), %rax + jnz 3f +2: leaq STRCMP_SSSE3(%rip), %rax testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip) - jnz 2f + jnz 3f leaq STRCMP_SSE2(%rip), %rax -2: ret +3: ret END(STRCMP) # ifdef USE_AS_STRCASECMP_L @@ -109,16 +111,18 @@ ENTRY(__strcasecmp) # ifdef HAVE_AVX_SUPPORT leaq __strcasecmp_avx(%rip), %rax testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip) - jnz 2f + jnz 3f # endif + testl $bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip) + jnz 2f leaq __strcasecmp_sse42(%rip), %rax testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) - jnz 2f - leaq __strcasecmp_ssse3(%rip), %rax + jnz 3f +2: leaq __strcasecmp_ssse3(%rip), %rax testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip) - jnz 2f + jnz 3f leaq __strcasecmp_sse2(%rip), %rax -2: ret +3: ret END(__strcasecmp) weak_alias (__strcasecmp, strcasecmp) # endif @@ -133,16 +137,18 @@ ENTRY(__strncasecmp) # ifdef HAVE_AVX_SUPPORT leaq __strncasecmp_avx(%rip), %rax testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip) - jnz 2f + jnz 3f # endif + testl $bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip) + jnz 2f leaq __strncasecmp_sse42(%rip), %rax testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) - jnz 2f - leaq __strncasecmp_ssse3(%rip), %rax + jnz 3f +2: leaq __strncasecmp_ssse3(%rip), %rax testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip) - jnz 2f + jnz 3f leaq __strncasecmp_sse2(%rip), %rax -2: ret +3: ret END(__strncasecmp) weak_alias (__strncasecmp, strncasecmp) # endif diff --git a/sysdeps/x86_64/multiarch/strrchr.S b/sysdeps/x86_64/multiarch/strrchr.S index ee6af6e..3f92a41 100644 --- a/sysdeps/x86_64/multiarch/strrchr.S +++ b/sysdeps/x86_64/multiarch/strrchr.S @@ -32,6 +32,8 @@ ENTRY(strrchr) jne 1f call __init_cpu_features 1: leaq __strrchr_sse2(%rip), %rax + testl $bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip) + jnz 2f testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) jz 2f leaq __strrchr_sse42(%rip), %rax