From 0186c6e97ecd58e91a45cbb74e5643325777fd6f Mon Sep 17 00:00:00 2001 From: =?utf8?q?Ond=C5=99ej=20B=C3=ADlka?= Date: Fri, 30 Aug 2013 10:14:37 +0200 Subject: [PATCH] Fix rawmemchr regression on bulldozer. --- ChangeLog | 5 ++ sysdeps/x86_64/multiarch/ifunc-impl-list.c | 6 -- sysdeps/x86_64/multiarch/rawmemchr.S | 103 ----------------------------- 3 files changed, 5 insertions(+), 109 deletions(-) delete mode 100644 sysdeps/x86_64/multiarch/rawmemchr.S diff --git a/ChangeLog b/ChangeLog index 5fa802c..ac8c4b2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2013-08-30 Ondřej Bílka + + * sysdeps/x86_64/multiarch/rawmemchr.S: Delete. + * sysdeps/x86_64/multiarch/ifunc-impl-list.c: Remove rawmemchr ifunc. + 2013-08-29 Ondřej Bílka * sysdeps/unix/sysv/linux/powerpc/aix/aix-dirent.h: Remove diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index 28d3579..d0992e1 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -61,12 +61,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __memmove_ssse3) IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_sse2)) - /* Support sysdeps/x86_64/multiarch/rawmemchr.S. */ - IFUNC_IMPL (i, name, rawmemchr, - IFUNC_IMPL_ADD (array, i, rawmemchr, HAS_SSE4_2, - __rawmemchr_sse42) - IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_sse2)) - /* Support sysdeps/x86_64/multiarch/stpncpy.S. */ IFUNC_IMPL (i, name, stpncpy, IFUNC_IMPL_ADD (array, i, stpncpy, HAS_SSSE3, diff --git a/sysdeps/x86_64/multiarch/rawmemchr.S b/sysdeps/x86_64/multiarch/rawmemchr.S deleted file mode 100644 index 50de38f..0000000 --- a/sysdeps/x86_64/multiarch/rawmemchr.S +++ /dev/null @@ -1,103 +0,0 @@ -/* Multiple versions of rawmemchr - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2009-2013 Free Software Foundation, Inc. - Contributed by Ulrich Drepper . - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include -#include - - -/* Define multiple versions only for the definition in lib. */ -#ifndef NOT_IN_libc - .text -ENTRY(rawmemchr) - .type rawmemchr, @gnu_indirect_function - cmpl $0, __cpu_features+KIND_OFFSET(%rip) - jne 1f - call __init_cpu_features -1: testl $bit_Prefer_PMINUB_for_stringop, __cpu_features+FEATURE_OFFSET+index_Prefer_PMINUB_for_stringop(%rip) - jnz 2f - testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) - jz 2f - leaq __rawmemchr_sse42(%rip), %rax - ret -2: leaq __rawmemchr_sse2(%rip), %rax - ret - -END(rawmemchr) -strong_alias (rawmemchr, __rawmemchr) - - - .section .text.sse4.2,"ax",@progbits - .align 16 - .type __rawmemchr_sse42, @function - .globl __rawmemchr_sse42 - .hidden __rawmemchr_sse42 -__rawmemchr_sse42: - cfi_startproc - CALL_MCOUNT - movd %esi, %xmm1 - movq %rdi, %rcx - pxor %xmm2, %xmm2 - andq $~15, %rdi - orl $0xffffffff, %esi - pshufb %xmm2, %xmm1 - movdqa (%rdi), %xmm0 - subq %rdi, %rcx - pcmpeqb %xmm1, %xmm0 - shl %cl, %esi - pmovmskb %xmm0, %ecx - movl $16, %eax - movl $16, %edx - andl %esi, %ecx - jnz 1f - -2: pcmpestri $0x08, 16(%rdi), %xmm1 - leaq 16(%rdi), %rdi - jnc 2b - - leaq (%rdi,%rcx), %rax - ret - -1: bsfl %ecx, %eax - addq %rdi, %rax - ret - cfi_endproc - .size __rawmemchr_sse42, .-__rawmemchr_sse42 - - -# undef ENTRY -# define ENTRY(name) \ - .type __rawmemchr_sse2, @function; \ - .align 16; \ - .globl __rawmemchr_sse2; \ - .hidden __rawmemchr_sse2; \ - __rawmemchr_sse2: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __rawmemchr_sse2, .-__rawmemchr_sse2 -# undef libc_hidden_builtin_def -/* It doesn't make sense to send libc-internal rawmemchr calls through a PLT. - The speedup we get from using SSE4.2 instruction is likely eaten away - by the indirect call in the PLT. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI___rawmemchr; __GI___rawmemchr = __rawmemchr_sse2 -#endif - -#include "../rawmemchr.S" -- 2.7.4