Use p2align instead ALIGN

author Ondřej Bílka <neleai@seznam.cz>

Tue, 8 Oct 2013 13:46:48 +0000 (15:46 +0200)

committer Ondřej Bílka <neleai@seznam.cz>

Tue, 8 Oct 2013 13:46:48 +0000 (15:46 +0200)
author Ondřej Bílka <neleai@seznam.cz>
Tue, 8 Oct 2013 13:46:48 +0000 (15:46 +0200)
committer Ondřej Bílka <neleai@seznam.cz>
Tue, 8 Oct 2013 13:46:48 +0000 (15:46 +0200)
diff --git a/ChangeLog b/ChangeLog

index 297ff42..8a39723 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2013-10-08  Ondřej Bílka  <neleai@seznam.cz>
+
+       * sysdeps/x86_64/memset.S (ALIGN): Macro removed.
+       Use .p2align directive instead, throughout.
+       * sysdeps/x86_64/multiarch/memcmp-sse4.S: Likewise.
+       * sysdeps/x86_64/multiarch/memcmp-ssse3.S: Likewise.
+       * sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S: Likewise.
+       * sysdeps/x86_64/multiarch/memcpy-ssse3-back.S: Likewise.
+       * sysdeps/x86_64/multiarch/memcpy-ssse3.S: Likewise.
+       * sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S: Likewise.
+       * sysdeps/x86_64/strchr.S: Likewise.
+       * sysdeps/x86_64/strrchr.S: Likewise.
+
  2013-10-08  Siddhesh Poyarekar  <siddhesh@redhat.com>
  
         * sysdeps/ieee754/dbl-64/e_pow.c: Fix code formatting.
diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S

index 6c69f4b..9b1de89 100644 (file)
--- a/sysdeps/x86_64/memset.S
+++ b/sysdeps/x86_64/memset.S
@@ -19,10 +19,6 @@
  
  #include <sysdep.h>
  
-#ifndef ALIGN
-# define ALIGN(n) .p2align n
-#endif
-
         .text
  #if !defined NOT_IN_libc
  ENTRY(__bzero)
@@ -71,12 +67,12 @@ L(entry_from_bzero):
  L(return):
         rep
         ret
-       ALIGN (4)
+       .p2align 4
  L(between_32_64_bytes):
         movdqu  %xmm8, 16(%rdi)
         movdqu  %xmm8, -32(%rdi,%rdx)
         ret
-       ALIGN (4)
+       .p2align 4
  L(loop_start):
         leaq    64(%rdi), %rcx
         movdqu  %xmm8, (%rdi)
@@ -92,7 +88,7 @@ L(loop_start):
         andq    $-64, %rdx
         cmpq    %rdx, %rcx
         je      L(return)
-       ALIGN (4)
+       .p2align 4
  L(loop):
         movdqa  %xmm8, (%rcx)
         movdqa  %xmm8, 16(%rcx)
diff --git a/sysdeps/x86_64/multiarch/memcmp-sse4.S b/sysdeps/x86_64/multiarch/memcmp-sse4.S

index 1ed4200..d7b147e 100644 (file)
--- a/sysdeps/x86_64/multiarch/memcmp-sse4.S
+++ b/sysdeps/x86_64/multiarch/memcmp-sse4.S
@@ -25,10 +25,6 @@
  #  define MEMCMP       __memcmp_sse4_1
  # endif
  
-# ifndef ALIGN
-#  define ALIGN(n)     .p2align n
-# endif
-
  # define JMPTBL(I, B)  (I - B)
  
  # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)           \
@@ -60,7 +56,7 @@ ENTRY (MEMCMP)
         BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
  
  # ifndef USE_AS_WMEMCMP
-       ALIGN (4)
+       .p2align 4
  L(firstbyte):
         movzbl  (%rdi), %eax
         movzbl  (%rsi), %ecx
@@ -68,7 +64,7 @@ L(firstbyte):
         ret
  # endif
  
-       ALIGN (4)
+       .p2align 4
  L(79bytesormore):
         movdqu  (%rsi), %xmm1
         movdqu  (%rdi), %xmm2
@@ -316,7 +312,7 @@ L(less32bytesin256):
         add     %rdx, %rdi
         BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(512bytesormore):
  # ifdef DATA_CACHE_SIZE_HALF
         mov     $DATA_CACHE_SIZE_HALF, %R8_LP
@@ -329,7 +325,7 @@ L(512bytesormore):
         cmp     %r8, %rdx
         ja      L(L2_L3_cache_unaglined)
         sub     $64, %rdx
-       ALIGN (4)
+       .p2align 4
  L(64bytesormore_loop):
         movdqu  (%rdi), %xmm2
         pxor    (%rsi), %xmm2
@@ -361,7 +357,7 @@ L(64bytesormore_loop):
  
  L(L2_L3_cache_unaglined):
         sub     $64, %rdx
-       ALIGN (4)
+       .p2align 4
  L(L2_L3_unaligned_128bytes_loop):
         prefetchnta 0x1c0(%rdi)
         prefetchnta 0x1c0(%rsi)
@@ -396,7 +392,7 @@ L(L2_L3_unaligned_128bytes_loop):
  /*
   * This case is for machines which are sensitive for unaligned instructions.
   */
-       ALIGN (4)
+       .p2align 4
  L(2aligned):
         cmp     $128, %rdx
         ja      L(128bytesormorein2aligned)
@@ -444,7 +440,7 @@ L(less32bytesin64in2alinged):
         add     %rdx, %rdi
         BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(128bytesormorein2aligned):
         cmp     $512, %rdx
         ja      L(512bytesormorein2aligned)
@@ -519,7 +515,7 @@ L(less32bytesin128in2aligned):
         add     %rdx, %rdi
         BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(256bytesormorein2aligned):
  
         sub     $256, %rdx
@@ -632,7 +628,7 @@ L(less32bytesin256in2alinged):
         add     %rdx, %rdi
         BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(512bytesormorein2aligned):
  # ifdef DATA_CACHE_SIZE_HALF
         mov     $DATA_CACHE_SIZE_HALF, %R8_LP
@@ -646,7 +642,7 @@ L(512bytesormorein2aligned):
         ja      L(L2_L3_cache_aglined)
  
         sub     $64, %rdx
-       ALIGN (4)
+       .p2align 4
  L(64bytesormore_loopin2aligned):
         movdqa  (%rdi), %xmm2
         pxor    (%rsi), %xmm2
@@ -678,7 +674,7 @@ L(64bytesormore_loopin2aligned):
  L(L2_L3_cache_aglined):
         sub     $64, %rdx
  
-       ALIGN (4)
+       .p2align 4
  L(L2_L3_aligned_128bytes_loop):
         prefetchnta 0x1c0(%rdi)
         prefetchnta 0x1c0(%rsi)
@@ -711,7 +707,7 @@ L(L2_L3_aligned_128bytes_loop):
         BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
  
  
-       ALIGN (4)
+       .p2align 4
  L(64bytesormore_loop_end):
         add     $16, %rdi
         add     $16, %rsi
@@ -806,7 +802,7 @@ L(8bytes):
         xor     %eax, %eax
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(12bytes):
         mov     -12(%rdi), %rax
         mov     -12(%rsi), %rcx
@@ -827,7 +823,7 @@ L(0bytes):
  
  # ifndef USE_AS_WMEMCMP
  /* unreal case for wmemcmp */
-       ALIGN (4)
+       .p2align 4
  L(65bytes):
         movdqu  -65(%rdi), %xmm1
         movdqu  -65(%rsi), %xmm2
@@ -864,7 +860,7 @@ L(9bytes):
         sub     %edx, %eax
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(13bytes):
         mov     -13(%rdi), %rax
         mov     -13(%rsi), %rcx
@@ -877,7 +873,7 @@ L(13bytes):
         xor     %eax, %eax
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(5bytes):
         mov     -5(%rdi), %eax
         mov     -5(%rsi), %ecx
@@ -888,7 +884,7 @@ L(5bytes):
         sub     %edx, %eax
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(66bytes):
         movdqu  -66(%rdi), %xmm1
         movdqu  -66(%rsi), %xmm2
@@ -929,7 +925,7 @@ L(10bytes):
         sub     %ecx, %eax
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(14bytes):
         mov     -14(%rdi), %rax
         mov     -14(%rsi), %rcx
@@ -942,7 +938,7 @@ L(14bytes):
         xor     %eax, %eax
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(6bytes):
         mov     -6(%rdi), %eax
         mov     -6(%rsi), %ecx
@@ -958,7 +954,7 @@ L(2bytes):
         sub     %ecx, %eax
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(67bytes):
         movdqu  -67(%rdi), %xmm2
         movdqu  -67(%rsi), %xmm1
@@ -997,7 +993,7 @@ L(11bytes):
         xor     %eax, %eax
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(15bytes):
         mov     -15(%rdi), %rax
         mov     -15(%rsi), %rcx
@@ -1010,7 +1006,7 @@ L(15bytes):
         xor     %eax, %eax
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(7bytes):
         mov     -7(%rdi), %eax
         mov     -7(%rsi), %ecx
@@ -1023,7 +1019,7 @@ L(7bytes):
         xor     %eax, %eax
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(3bytes):
         movzwl  -3(%rdi), %eax
         movzwl  -3(%rsi), %ecx
@@ -1036,7 +1032,7 @@ L(1bytes):
         ret
  # endif
  
-       ALIGN (4)
+       .p2align 4
  L(68bytes):
         movdqu  -68(%rdi), %xmm2
         movdqu  -68(%rsi), %xmm1
@@ -1079,7 +1075,7 @@ L(20bytes):
  
  # ifndef USE_AS_WMEMCMP
  /* unreal cases for wmemcmp */
-       ALIGN (4)
+       .p2align 4
  L(69bytes):
         movdqu  -69(%rsi), %xmm1
         movdqu  -69(%rdi), %xmm2
@@ -1115,7 +1111,7 @@ L(21bytes):
         xor     %eax, %eax
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(70bytes):
         movdqu  -70(%rsi), %xmm1
         movdqu  -70(%rdi), %xmm2
@@ -1151,7 +1147,7 @@ L(22bytes):
         xor     %eax, %eax
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(71bytes):
         movdqu  -71(%rsi), %xmm1
         movdqu  -71(%rdi), %xmm2
@@ -1188,7 +1184,7 @@ L(23bytes):
         ret
  # endif
  
-       ALIGN (4)
+       .p2align 4
  L(72bytes):
         movdqu  -72(%rsi), %xmm1
         movdqu  -72(%rdi), %xmm2
@@ -1227,7 +1223,7 @@ L(24bytes):
  
  # ifndef USE_AS_WMEMCMP
  /* unreal cases for wmemcmp */
-       ALIGN (4)
+       .p2align 4
  L(73bytes):
         movdqu  -73(%rsi), %xmm1
         movdqu  -73(%rdi), %xmm2
@@ -1265,7 +1261,7 @@ L(25bytes):
         sub     %ecx, %eax
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(74bytes):
         movdqu  -74(%rsi), %xmm1
         movdqu  -74(%rdi), %xmm2
@@ -1302,7 +1298,7 @@ L(26bytes):
         movzwl  -2(%rsi), %ecx
         jmp     L(diffin2bytes)
  
-       ALIGN (4)
+       .p2align 4
  L(75bytes):
         movdqu  -75(%rsi), %xmm1
         movdqu  -75(%rdi), %xmm2
@@ -1342,7 +1338,7 @@ L(27bytes):
         xor     %eax, %eax
         ret
  # endif
-       ALIGN (4)
+       .p2align 4
  L(76bytes):
         movdqu  -76(%rsi), %xmm1
         movdqu  -76(%rdi), %xmm2
@@ -1388,7 +1384,7 @@ L(28bytes):
  
  # ifndef USE_AS_WMEMCMP
  /* unreal cases for wmemcmp */
-       ALIGN (4)
+       .p2align 4
  L(77bytes):
         movdqu  -77(%rsi), %xmm1
         movdqu  -77(%rdi), %xmm2
@@ -1430,7 +1426,7 @@ L(29bytes):
         xor     %eax, %eax
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(78bytes):
         movdqu  -78(%rsi), %xmm1
         movdqu  -78(%rdi), %xmm2
@@ -1470,7 +1466,7 @@ L(30bytes):
         xor     %eax, %eax
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(79bytes):
         movdqu  -79(%rsi), %xmm1
         movdqu  -79(%rdi), %xmm2
@@ -1510,7 +1506,7 @@ L(31bytes):
         xor     %eax, %eax
         ret
  # endif
-       ALIGN (4)
+       .p2align 4
  L(64bytes):
         movdqu  -64(%rdi), %xmm2
         movdqu  -64(%rsi), %xmm1
@@ -1548,7 +1544,7 @@ L(32bytes):
  /*
   * Aligned 8 bytes to avoid 2 branch "taken" in one 16 alinged code block.
   */
-       ALIGN (3)
+       .p2align 3
  L(less16bytes):
         movsbq  %dl, %rdx
         mov     (%rsi, %rdx), %rcx
@@ -1585,7 +1581,7 @@ L(diffin2bytes):
         sub     %ecx, %eax
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(end):
         and     $0xff, %eax
         and     $0xff, %ecx
@@ -1599,7 +1595,7 @@ L(end):
         neg     %eax
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(nequal_bigger):
         ret
  
@@ -1611,7 +1607,7 @@ L(unreal_case):
  END (MEMCMP)
  
         .section .rodata.sse4.1,"a",@progbits
-       ALIGN (3)
+       .p2align 3
  # ifndef USE_AS_WMEMCMP
  L(table_64bytes):
         .int    JMPTBL (L(0bytes), L(table_64bytes))
diff --git a/sysdeps/x86_64/multiarch/memcmp-ssse3.S b/sysdeps/x86_64/multiarch/memcmp-ssse3.S

index e319df9..e04f918 100644 (file)
--- a/sysdeps/x86_64/multiarch/memcmp-ssse3.S
+++ b/sysdeps/x86_64/multiarch/memcmp-ssse3.S
@@ -25,10 +25,6 @@
  #  define MEMCMP       __memcmp_ssse3
  # endif
  
-# ifndef ALIGN
-#  define ALIGN(n)     .p2align n
-# endif
-
  /* Warning!
            wmemcmp has to use SIGNED comparison for elements.
            memcmp has to use UNSIGNED comparison for elemnts.
@@ -50,7 +46,7 @@ ENTRY (MEMCMP)
         add     %rcx, %rdi
         jmp     L(less48bytes)
  
-       ALIGN   (4)
+       .p2align 4
  /* ECX >= 32.  */
  L(48bytesormore):
         movdqu  (%rdi), %xmm3
@@ -90,7 +86,7 @@ L(48bytesormore):
         je      L(shr_6)
         jmp     L(shr_7)
  
-       ALIGN   (2)
+       .p2align 2
  L(next_unaligned_table):
         cmp     $8, %edx
         je      L(shr_8)
@@ -117,7 +113,7 @@ L(next_unaligned_table):
         jmp     L(shr_12)
  # endif
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_0):
         cmp     $80, %rcx
         lea     -48(%rcx), %rcx
@@ -137,7 +133,7 @@ L(shr_0):
         add     %rcx, %rdi
         jmp     L(less48bytes)
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_0_gobble):
         movdqa  (%rsi), %xmm0
         xor     %eax, %eax
@@ -180,7 +176,7 @@ L(next):
  
  # ifndef USE_AS_WMEMCMP
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_1):
         cmp     $80, %rcx
         lea     -48(%rcx), %rcx
@@ -207,7 +203,7 @@ L(shr_1):
         add     %rcx, %rdi
         jmp     L(less48bytes)
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_1_gobble):
         sub     $32, %rcx
         movdqa  16(%rsi), %xmm0
@@ -258,7 +254,7 @@ L(shr_1_gobble_next):
         jmp     L(less48bytes)
  
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_2):
         cmp     $80, %rcx
         lea     -48(%rcx), %rcx
@@ -285,7 +281,7 @@ L(shr_2):
         add     %rcx, %rdi
         jmp     L(less48bytes)
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_2_gobble):
         sub     $32, %rcx
         movdqa  16(%rsi), %xmm0
@@ -335,7 +331,7 @@ L(shr_2_gobble_next):
         add     %rcx, %rdi
         jmp     L(less48bytes)
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_3):
         cmp     $80, %rcx
         lea     -48(%rcx), %rcx
@@ -362,7 +358,7 @@ L(shr_3):
         add     %rcx, %rdi
         jmp     L(less48bytes)
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_3_gobble):
         sub     $32, %rcx
         movdqa  16(%rsi), %xmm0
@@ -414,7 +410,7 @@ L(shr_3_gobble_next):
  
  # endif
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_4):
         cmp     $80, %rcx
         lea     -48(%rcx), %rcx
@@ -441,7 +437,7 @@ L(shr_4):
         add     %rcx, %rdi
         jmp     L(less48bytes)
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_4_gobble):
         sub     $32, %rcx
         movdqa  16(%rsi), %xmm0
@@ -493,7 +489,7 @@ L(shr_4_gobble_next):
  
  # ifndef USE_AS_WMEMCMP
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_5):
         cmp     $80, %rcx
         lea     -48(%rcx), %rcx
@@ -520,7 +516,7 @@ L(shr_5):
         add     %rcx, %rdi
         jmp     L(less48bytes)
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_5_gobble):
         sub     $32, %rcx
         movdqa  16(%rsi), %xmm0
@@ -570,7 +566,7 @@ L(shr_5_gobble_next):
         add     %rcx, %rdi
         jmp     L(less48bytes)
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_6):
         cmp     $80, %rcx
         lea     -48(%rcx), %rcx
@@ -597,7 +593,7 @@ L(shr_6):
         add     %rcx, %rdi
         jmp     L(less48bytes)
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_6_gobble):
         sub     $32, %rcx
         movdqa  16(%rsi), %xmm0
@@ -647,7 +643,7 @@ L(shr_6_gobble_next):
         add     %rcx, %rdi
         jmp     L(less48bytes)
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_7):
         cmp     $80, %rcx
         lea     -48(%rcx), %rcx
@@ -674,7 +670,7 @@ L(shr_7):
         add     %rcx, %rdi
         jmp     L(less48bytes)
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_7_gobble):
         sub     $32, %rcx
         movdqa  16(%rsi), %xmm0
@@ -726,7 +722,7 @@ L(shr_7_gobble_next):
  
  # endif
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_8):
         cmp     $80, %rcx
         lea     -48(%rcx), %rcx
@@ -753,7 +749,7 @@ L(shr_8):
         add     %rcx, %rdi
         jmp     L(less48bytes)
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_8_gobble):
         sub     $32, %rcx
         movdqa  16(%rsi), %xmm0
@@ -805,7 +801,7 @@ L(shr_8_gobble_next):
  
  # ifndef USE_AS_WMEMCMP
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_9):
         cmp     $80, %rcx
         lea     -48(%rcx), %rcx
@@ -832,7 +828,7 @@ L(shr_9):
         add     %rcx, %rdi
         jmp     L(less48bytes)
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_9_gobble):
         sub     $32, %rcx
         movdqa  16(%rsi), %xmm0
@@ -882,7 +878,7 @@ L(shr_9_gobble_next):
         add     %rcx, %rdi
         jmp     L(less48bytes)
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_10):
         cmp     $80, %rcx
         lea     -48(%rcx), %rcx
@@ -909,7 +905,7 @@ L(shr_10):
         add     %rcx, %rdi
         jmp     L(less48bytes)
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_10_gobble):
         sub     $32, %rcx
         movdqa  16(%rsi), %xmm0
@@ -959,7 +955,7 @@ L(shr_10_gobble_next):
         add     %rcx, %rdi
         jmp     L(less48bytes)
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_11):
         cmp     $80, %rcx
         lea     -48(%rcx), %rcx
@@ -986,7 +982,7 @@ L(shr_11):
         add     %rcx, %rdi
         jmp     L(less48bytes)
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_11_gobble):
         sub     $32, %rcx
         movdqa  16(%rsi), %xmm0
@@ -1038,7 +1034,7 @@ L(shr_11_gobble_next):
  
  # endif
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_12):
         cmp     $80, %rcx
         lea     -48(%rcx), %rcx
@@ -1065,7 +1061,7 @@ L(shr_12):
         add     %rcx, %rdi
         jmp     L(less48bytes)
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_12_gobble):
         sub     $32, %rcx
         movdqa  16(%rsi), %xmm0
@@ -1117,7 +1113,7 @@ L(shr_12_gobble_next):
  
  # ifndef USE_AS_WMEMCMP
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_13):
         cmp     $80, %rcx
         lea     -48(%rcx), %rcx
@@ -1144,7 +1140,7 @@ L(shr_13):
         add     %rcx, %rdi
         jmp     L(less48bytes)
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_13_gobble):
         sub     $32, %rcx
         movdqa  16(%rsi), %xmm0
@@ -1194,7 +1190,7 @@ L(shr_13_gobble_next):
         add     %rcx, %rdi
         jmp     L(less48bytes)
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_14):
         cmp     $80, %rcx
         lea     -48(%rcx), %rcx
@@ -1221,7 +1217,7 @@ L(shr_14):
         add     %rcx, %rdi
         jmp     L(less48bytes)
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_14_gobble):
         sub     $32, %rcx
         movdqa  16(%rsi), %xmm0
@@ -1271,7 +1267,7 @@ L(shr_14_gobble_next):
         add     %rcx, %rdi
         jmp     L(less48bytes)
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_15):
         cmp     $80, %rcx
         lea     -48(%rcx), %rcx
@@ -1298,7 +1294,7 @@ L(shr_15):
         add     %rcx, %rdi
         jmp     L(less48bytes)
  
-       ALIGN   (4)
+       .p2align 4
  L(shr_15_gobble):
         sub     $32, %rcx
         movdqa  16(%rsi), %xmm0
@@ -1348,7 +1344,7 @@ L(shr_15_gobble_next):
         add     %rcx, %rdi
         jmp     L(less48bytes)
  # endif
-       ALIGN   (4)
+       .p2align 4
  L(exit):
         pmovmskb %xmm1, %r8d
         sub     $0xffff, %r8d
@@ -1389,56 +1385,56 @@ L(less16bytes):
         sub     %edx, %eax
         ret
  
-       ALIGN   (4)
+       .p2align 4
  L(Byte16):
         movzbl  -16(%rdi), %eax
         movzbl  -16(%rsi), %edx
         sub     %edx, %eax
         ret
  
-       ALIGN   (4)
+       .p2align 4
  L(Byte17):
         movzbl  -15(%rdi), %eax
         movzbl  -15(%rsi), %edx
         sub     %edx, %eax
         ret
  
-       ALIGN   (4)
+       .p2align 4
  L(Byte18):
         movzbl  -14(%rdi), %eax
         movzbl  -14(%rsi), %edx
         sub     %edx, %eax
         ret
  
-       ALIGN   (4)
+       .p2align 4
  L(Byte19):
         movzbl  -13(%rdi), %eax
         movzbl  -13(%rsi), %edx
         sub     %edx, %eax
         ret
  
-       ALIGN   (4)
+       .p2align 4
  L(Byte20):
         movzbl  -12(%rdi), %eax
         movzbl  -12(%rsi), %edx
         sub     %edx, %eax
         ret
  
-       ALIGN   (4)
+       .p2align 4
  L(Byte21):
         movzbl  -11(%rdi), %eax
         movzbl  -11(%rsi), %edx
         sub     %edx, %eax
         ret
  
-       ALIGN   (4)
+       .p2align 4
  L(Byte22):
         movzbl  -10(%rdi), %eax
         movzbl  -10(%rsi), %edx
         sub     %edx, %eax
         ret
  
-       ALIGN   (4)
+       .p2align 4
  L(next_24_bytes):
         lea     8(%rdi), %rdi
         lea     8(%rsi), %rsi
@@ -1479,14 +1475,14 @@ L(next_24_bytes):
         jne     L(find_diff)
         ret
  
-       ALIGN   (4)
+       .p2align 4
  L(second_double_word):
         mov     -12(%rdi), %eax
         cmp     -12(%rsi), %eax
         jne     L(find_diff)
         ret
  
-       ALIGN   (4)
+       .p2align 4
  L(next_two_double_words):
         and     $15, %dh
         jz      L(fourth_double_word)
@@ -1495,7 +1491,7 @@ L(next_two_double_words):
         jne     L(find_diff)
         ret
  
-       ALIGN   (4)
+       .p2align 4
  L(fourth_double_word):
         mov     -4(%rdi), %eax
         cmp     -4(%rsi), %eax
@@ -1503,7 +1499,7 @@ L(fourth_double_word):
         ret
  # endif
  
-       ALIGN   (4)
+       .p2align 4
  L(less48bytes):
         cmp     $8, %ecx
         jae     L(more8bytes)
@@ -1527,7 +1523,7 @@ L(less48bytes):
         jmp     L(4bytes)
  # endif
  
-       ALIGN   (4)
+       .p2align 4
  L(more8bytes):
         cmp     $16, %ecx
         jae     L(more16bytes)
@@ -1551,7 +1547,7 @@ L(more8bytes):
         jmp     L(12bytes)
  # endif
  
-       ALIGN   (4)
+       .p2align 4
  L(more16bytes):
         cmp     $24, %ecx
         jae     L(more24bytes)
@@ -1575,7 +1571,7 @@ L(more16bytes):
         jmp     L(20bytes)
  # endif
  
-       ALIGN   (4)
+       .p2align 4
  L(more24bytes):
         cmp     $32, %ecx
         jae     L(more32bytes)
@@ -1599,7 +1595,7 @@ L(more24bytes):
         jmp     L(28bytes)
  # endif
  
-       ALIGN   (4)
+       .p2align 4
  L(more32bytes):
         cmp     $40, %ecx
         jae     L(more40bytes)
@@ -1623,7 +1619,7 @@ L(more32bytes):
         jmp     L(36bytes)
  # endif
  
-       ALIGN   (4)
+       .p2align 4
  L(more40bytes):
         cmp     $40, %ecx
         je      L(40bytes)
@@ -1642,7 +1638,7 @@ L(more40bytes):
         je      L(46bytes)
         jmp     L(47bytes)
  
-       ALIGN   (4)
+       .p2align 4
  L(44bytes):
         movl    -44(%rdi), %eax
         movl    -44(%rsi), %ecx
@@ -1702,7 +1698,7 @@ L(0bytes):
         xor     %eax, %eax
         ret
  # else
-       ALIGN   (4)
+       .p2align 4
  L(44bytes):
         movl    -44(%rdi), %eax
         cmp     -44(%rsi), %eax
@@ -1753,7 +1749,7 @@ L(0bytes):
  # endif
  
  # ifndef USE_AS_WMEMCMP
-       ALIGN   (4)
+       .p2align 4
  L(45bytes):
         movl    -45(%rdi), %eax
         movl    -45(%rsi), %ecx
@@ -1816,7 +1812,7 @@ L(1bytes):
         xor     %eax, %eax
         ret
  
-       ALIGN   (4)
+       .p2align 4
  L(46bytes):
         movl    -46(%rdi), %eax
         movl    -46(%rsi), %ecx
@@ -1882,7 +1878,7 @@ L(2bytes):
         xor     %eax, %eax
         ret
  
-       ALIGN   (4)
+       .p2align 4
  L(47bytes):
         movl    -47(%rdi), %eax
         movl    -47(%rsi), %ecx
@@ -1951,7 +1947,7 @@ L(3bytes):
         xor     %eax, %eax
         ret
  
-       ALIGN   (4)
+       .p2align 4
  L(find_diff):
         cmpb    %cl, %al
         jne     L(set)
@@ -1973,19 +1969,19 @@ L(set):
  # else
  
  /* for wmemcmp */
-       ALIGN   (4)
+       .p2align 4
  L(find_diff):
         mov     $1, %eax
         jg      L(find_diff_bigger)
         neg     %eax
         ret
  
-       ALIGN   (4)
+       .p2align 4
  L(find_diff_bigger):
         ret
  # endif
  
-       ALIGN   (4)
+       .p2align 4
  L(equal):
         xor     %eax, %eax
         ret
diff --git a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S

index efdfea2..df6578e 100644 (file)
--- a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
@@ -20,10 +20,6 @@
  
  #include "asm-syntax.h"
  
-#ifndef ALIGN
-# define ALIGN(n)      .p2align n
-#endif
-
  
  ENTRY(__memcpy_sse2_unaligned)
         movq    %rsi, %rax
@@ -44,7 +40,7 @@ L(return):
         movq    %rdi, %rax
         ret
         .p2align 4,,10
-       ALIGN(4)
+       .p2align 4
  .L31:
         movdqu  16(%rsi), %xmm8
         cmpq    $64, %rdx
@@ -77,7 +73,7 @@ L(return):
         leaq    32(%r10), %r8
         leaq    48(%r10), %rax
         .p2align 4,,10
-       ALIGN(4)
+       .p2align 4
  L(loop):
         movdqu  (%rcx,%r10), %xmm8
         movdqa  %xmm8, (%rcx)
@@ -151,7 +147,7 @@ L(less_16):
  .L3:
         leaq    -1(%rdx), %rax
         .p2align 4,,10
-       ALIGN(4)
+       .p2align 4
  .L11:
         movzbl  (%rsi,%rax), %edx
         movb    %dl, (%rdi,%rax)
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S

index fc9fcef..0eb7d9b 100644 (file)
--- a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
@@ -31,10 +31,6 @@
  # define MEMCPY_CHK    __memcpy_chk_ssse3_back
  #endif
  
-#ifndef ALIGN
-# define ALIGN(n)      .p2align n
-#endif
-
  #define JMPTBL(I, B)   I - B
  
  /* Branch to an entry in a jump table.  TABLE is a jump table with
@@ -87,7 +83,7 @@ L(bk_write):
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
  #endif
  
-       ALIGN (4)
+       .p2align 4
  L(144bytesormore):
  
  #ifndef USE_AS_MEMMOVE
@@ -119,7 +115,7 @@ L(144bytesormore):
         jmp     *%r9
         ud2
  
-       ALIGN (4)
+       .p2align 4
  L(copy_backward):
  #ifdef DATA_CACHE_SIZE
         mov     $DATA_CACHE_SIZE, %RCX_LP
@@ -149,7 +145,7 @@ L(copy_backward):
         jmp     *%r9
         ud2
  
-       ALIGN (4)
+       .p2align 4
  L(shl_0):
  
         mov     %rdx, %r9
@@ -162,7 +158,7 @@ L(shl_0):
  #endif
         jae     L(gobble_mem_fwd)
         sub     $0x80, %rdx
-       ALIGN (4)
+       .p2align 4
  L(shl_0_loop):
         movdqa  (%rsi), %xmm1
         movdqa  %xmm1, (%rdi)
@@ -190,7 +186,7 @@ L(shl_0_loop):
         add     %rdx, %rdi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_0_bwd):
         sub     $0x80, %rdx
  L(copy_backward_loop):
@@ -221,7 +217,7 @@ L(copy_backward_loop):
         sub     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_1):
         sub     $0x80, %rdx
         movaps  -0x01(%rsi), %xmm1
@@ -258,7 +254,7 @@ L(shl_1):
         add     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_1_bwd):
         movaps  -0x01(%rsi), %xmm1
  
@@ -304,7 +300,7 @@ L(shl_1_bwd):
         sub     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_2):
         sub     $0x80, %rdx
         movaps  -0x02(%rsi), %xmm1
@@ -341,7 +337,7 @@ L(shl_2):
         add     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_2_bwd):
         movaps  -0x02(%rsi), %xmm1
  
@@ -387,7 +383,7 @@ L(shl_2_bwd):
         sub     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_3):
         sub     $0x80, %rdx
         movaps -0x03(%rsi), %xmm1
@@ -424,7 +420,7 @@ L(shl_3):
         add     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_3_bwd):
         movaps  -0x03(%rsi), %xmm1
  
@@ -470,7 +466,7 @@ L(shl_3_bwd):
         sub     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_4):
         sub     $0x80, %rdx
         movaps  -0x04(%rsi), %xmm1
@@ -507,7 +503,7 @@ L(shl_4):
         add     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_4_bwd):
         movaps  -0x04(%rsi), %xmm1
  
@@ -553,7 +549,7 @@ L(shl_4_bwd):
         sub     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_5):
         sub     $0x80, %rdx
         movaps  -0x05(%rsi), %xmm1
@@ -590,7 +586,7 @@ L(shl_5):
         add     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_5_bwd):
         movaps  -0x05(%rsi), %xmm1
  
@@ -636,7 +632,7 @@ L(shl_5_bwd):
         sub     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_6):
         sub     $0x80, %rdx
         movaps  -0x06(%rsi), %xmm1
@@ -673,7 +669,7 @@ L(shl_6):
         add     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_6_bwd):
         movaps  -0x06(%rsi), %xmm1
  
@@ -719,7 +715,7 @@ L(shl_6_bwd):
         sub     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_7):
         sub     $0x80, %rdx
         movaps  -0x07(%rsi), %xmm1
@@ -756,7 +752,7 @@ L(shl_7):
         add     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_7_bwd):
         movaps  -0x07(%rsi), %xmm1
  
@@ -802,7 +798,7 @@ L(shl_7_bwd):
         sub     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_8):
         sub     $0x80, %rdx
         movaps  -0x08(%rsi), %xmm1
@@ -839,7 +835,7 @@ L(shl_8):
         add     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_8_bwd):
         movaps  -0x08(%rsi), %xmm1
  
@@ -886,7 +882,7 @@ L(shl_8_end_bwd):
         sub     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_9):
         sub     $0x80, %rdx
         movaps  -0x09(%rsi), %xmm1
@@ -923,7 +919,7 @@ L(shl_9):
         add     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_9_bwd):
         movaps  -0x09(%rsi), %xmm1
  
@@ -969,7 +965,7 @@ L(shl_9_bwd):
         sub     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_10):
         sub     $0x80, %rdx
         movaps  -0x0a(%rsi), %xmm1
@@ -1006,7 +1002,7 @@ L(shl_10):
         add     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_10_bwd):
         movaps  -0x0a(%rsi), %xmm1
  
@@ -1052,7 +1048,7 @@ L(shl_10_bwd):
         sub     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_11):
         sub     $0x80, %rdx
         movaps  -0x0b(%rsi), %xmm1
@@ -1089,7 +1085,7 @@ L(shl_11):
         add     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_11_bwd):
         movaps  -0x0b(%rsi), %xmm1
  
@@ -1135,7 +1131,7 @@ L(shl_11_bwd):
         sub     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_12):
         sub     $0x80, %rdx
         movdqa  -0x0c(%rsi), %xmm1
@@ -1173,7 +1169,7 @@ L(shl_12):
         add     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_12_bwd):
         movaps  -0x0c(%rsi), %xmm1
  
@@ -1219,7 +1215,7 @@ L(shl_12_bwd):
         sub     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_13):
         sub     $0x80, %rdx
         movaps  -0x0d(%rsi), %xmm1
@@ -1256,7 +1252,7 @@ L(shl_13):
         add     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_13_bwd):
         movaps  -0x0d(%rsi), %xmm1
  
@@ -1302,7 +1298,7 @@ L(shl_13_bwd):
         sub     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_14):
         sub     $0x80, %rdx
         movaps  -0x0e(%rsi), %xmm1
@@ -1339,7 +1335,7 @@ L(shl_14):
         add     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_14_bwd):
         movaps  -0x0e(%rsi), %xmm1
  
@@ -1385,7 +1381,7 @@ L(shl_14_bwd):
         sub     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_15):
         sub     $0x80, %rdx
         movaps  -0x0f(%rsi), %xmm1
@@ -1422,7 +1418,7 @@ L(shl_15):
         add     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_15_bwd):
         movaps  -0x0f(%rsi), %xmm1
  
@@ -1468,7 +1464,7 @@ L(shl_15_bwd):
         sub     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(gobble_mem_fwd):
         movdqu  (%rsi), %xmm1
         movdqu  %xmm0, (%r8)
@@ -1570,7 +1566,7 @@ L(gobble_mem_fwd_end):
         add     %rdx, %rdi
         BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(gobble_mem_bwd):
         add     %rdx, %rsi
         add     %rdx, %rdi
@@ -2833,7 +2829,7 @@ L(bwd_write_1bytes):
  END (MEMCPY)
  
         .section .rodata.ssse3,"a",@progbits
-       ALIGN (3)
+       .p2align 3
  L(table_144_bytes_bwd):
         .int    JMPTBL (L(bwd_write_0bytes), L(table_144_bytes_bwd))
         .int    JMPTBL (L(bwd_write_1bytes), L(table_144_bytes_bwd))
@@ -2980,7 +2976,7 @@ L(table_144_bytes_bwd):
         .int    JMPTBL (L(bwd_write_142bytes), L(table_144_bytes_bwd))
         .int    JMPTBL (L(bwd_write_143bytes), L(table_144_bytes_bwd))
  
-       ALIGN (3)
+       .p2align 3
  L(table_144_bytes_fwd):
         .int    JMPTBL (L(fwd_write_0bytes), L(table_144_bytes_fwd))
         .int    JMPTBL (L(fwd_write_1bytes), L(table_144_bytes_fwd))
@@ -3127,7 +3123,7 @@ L(table_144_bytes_fwd):
         .int    JMPTBL (L(fwd_write_142bytes), L(table_144_bytes_fwd))
         .int    JMPTBL (L(fwd_write_143bytes), L(table_144_bytes_fwd))
  
-       ALIGN (3)
+       .p2align 3
  L(shl_table_fwd):
         .int    JMPTBL (L(shl_0), L(shl_table_fwd))
         .int    JMPTBL (L(shl_1), L(shl_table_fwd))
@@ -3146,7 +3142,7 @@ L(shl_table_fwd):
         .int    JMPTBL (L(shl_14), L(shl_table_fwd))
         .int    JMPTBL (L(shl_15), L(shl_table_fwd))
  
-       ALIGN (3)
+       .p2align 3
  L(shl_table_bwd):
         .int    JMPTBL (L(shl_0_bwd), L(shl_table_bwd))
         .int    JMPTBL (L(shl_1_bwd), L(shl_table_bwd))
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3.S b/sysdeps/x86_64/multiarch/memcpy-ssse3.S

index 9642cee..0cedab2 100644 (file)
--- a/sysdeps/x86_64/multiarch/memcpy-ssse3.S
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3.S
@@ -31,10 +31,6 @@
  # define MEMCPY_CHK    __memcpy_chk_ssse3
  #endif
  
-#ifndef ALIGN
-# define ALIGN(n)      .p2align n
-#endif
-
  #define JMPTBL(I, B)   I - B
  
  /* Branch to an entry in a jump table.  TABLE is a jump table with
@@ -80,7 +76,7 @@ L(copy_forward):
         jmp     *%r9
         ud2
  
-       ALIGN (4)
+       .p2align 4
  L(80bytesormore):
  #ifndef USE_AS_MEMMOVE
         cmp     %dil, %sil
@@ -113,7 +109,7 @@ L(80bytesormore):
  #endif
         BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %r9, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(copy_backward):
         movdqu  -16(%rsi, %rdx), %xmm0
         add     %rdx, %rsi
@@ -144,7 +140,7 @@ L(copy_backward):
  #endif
         BRANCH_TO_JMPTBL_ENTRY (L(shl_table_bwd), %r9, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_0):
         sub     $16, %rdx
         movdqa  (%rsi), %xmm1
@@ -172,7 +168,7 @@ L(shl_0_less_64bytes):
         add     %rdx, %rdi
         BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_0_gobble):
  #ifdef DATA_CACHE_SIZE_HALF
         cmp     $DATA_CACHE_SIZE_HALF, %RDX_LP
@@ -228,7 +224,7 @@ L(shl_0_cache_less_64bytes):
         add     %rdx, %rdi
         BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_0_gobble_mem_loop):
         prefetcht0 0x1c0(%rsi)
         prefetcht0 0x280(%rsi)
@@ -287,7 +283,7 @@ L(shl_0_mem_less_32bytes):
         add     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_0_bwd):
         sub     $16, %rdx
         movdqa  -0x10(%rsi), %xmm1
@@ -313,7 +309,7 @@ L(shl_0_bwd):
  L(shl_0_less_64bytes_bwd):
         BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_0_gobble_bwd):
  #ifdef DATA_CACHE_SIZE_HALF
         cmp     $DATA_CACHE_SIZE_HALF, %RDX_LP
@@ -367,7 +363,7 @@ L(shl_0_gobble_bwd_loop):
  L(shl_0_gobble_bwd_less_64bytes):
         BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_0_gobble_mem_bwd_loop):
         prefetcht0 -0x1c0(%rsi)
         prefetcht0 -0x280(%rsi)
@@ -423,7 +419,7 @@ L(shl_0_mem_bwd_less_64bytes):
  L(shl_0_mem_bwd_less_32bytes):
         BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_1):
         lea     (L(shl_1_loop_L1)-L(shl_1))(%r9), %r9
         cmp     %rcx, %rdx
@@ -466,7 +462,7 @@ L(shl_1_end):
         add     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_1_bwd):
         lea     (L(shl_1_bwd_loop_L1)-L(shl_1_bwd))(%r9), %r9
         cmp     %rcx, %rdx
@@ -508,7 +504,7 @@ L(shl_1_bwd_end):
         movdqu  %xmm0, (%r8)
         BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_2):
         lea     (L(shl_2_loop_L1)-L(shl_2))(%r9), %r9
         cmp     %rcx, %rdx
@@ -551,7 +547,7 @@ L(shl_2_end):
         add     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_2_bwd):
         lea     (L(shl_2_bwd_loop_L1)-L(shl_2_bwd))(%r9), %r9
         cmp     %rcx, %rdx
@@ -593,7 +589,7 @@ L(shl_2_bwd_end):
         movdqu  %xmm0, (%r8)
         BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_3):
         lea     (L(shl_3_loop_L1)-L(shl_3))(%r9), %r9
         cmp     %rcx, %rdx
@@ -636,7 +632,7 @@ L(shl_3_end):
         add     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_3_bwd):
         lea     (L(shl_3_bwd_loop_L1)-L(shl_3_bwd))(%r9), %r9
         cmp     %rcx, %rdx
@@ -678,7 +674,7 @@ L(shl_3_bwd_end):
         movdqu  %xmm0, (%r8)
         BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_4):
         lea     (L(shl_4_loop_L1)-L(shl_4))(%r9), %r9
         cmp     %rcx, %rdx
@@ -721,7 +717,7 @@ L(shl_4_end):
         add     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_4_bwd):
         lea     (L(shl_4_bwd_loop_L1)-L(shl_4_bwd))(%r9), %r9
         cmp     %rcx, %rdx
@@ -763,7 +759,7 @@ L(shl_4_bwd_end):
         movdqu  %xmm0, (%r8)
         BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_5):
         lea     (L(shl_5_loop_L1)-L(shl_5))(%r9), %r9
         cmp     %rcx, %rdx
@@ -806,7 +802,7 @@ L(shl_5_end):
         add     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_5_bwd):
         lea     (L(shl_5_bwd_loop_L1)-L(shl_5_bwd))(%r9), %r9
         cmp     %rcx, %rdx
@@ -848,7 +844,7 @@ L(shl_5_bwd_end):
         movdqu  %xmm0, (%r8)
         BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_6):
         lea     (L(shl_6_loop_L1)-L(shl_6))(%r9), %r9
         cmp     %rcx, %rdx
@@ -891,7 +887,7 @@ L(shl_6_end):
         add     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_6_bwd):
         lea     (L(shl_6_bwd_loop_L1)-L(shl_6_bwd))(%r9), %r9
         cmp     %rcx, %rdx
@@ -933,7 +929,7 @@ L(shl_6_bwd_end):
         movdqu  %xmm0, (%r8)
         BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_7):
         lea     (L(shl_7_loop_L1)-L(shl_7))(%r9), %r9
         cmp     %rcx, %rdx
@@ -976,7 +972,7 @@ L(shl_7_end):
         add     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_7_bwd):
         lea     (L(shl_7_bwd_loop_L1)-L(shl_7_bwd))(%r9), %r9
         cmp     %rcx, %rdx
@@ -1018,7 +1014,7 @@ L(shl_7_bwd_end):
         movdqu  %xmm0, (%r8)
         BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_8):
         lea     (L(shl_8_loop_L1)-L(shl_8))(%r9), %r9
         cmp     %rcx, %rdx
@@ -1051,7 +1047,7 @@ L(shl_8_loop_L1):
         movaps  %xmm5, -0x10(%rdi)
         jmp     *%r9
         ud2
-       ALIGN (4)
+       .p2align 4
  L(shl_8_end):
         lea     64(%rdx), %rdx
         movaps  %xmm4, -0x20(%rdi)
@@ -1061,7 +1057,7 @@ L(shl_8_end):
         movdqu  %xmm0, (%r8)
         BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_8_bwd):
         lea     (L(shl_8_bwd_loop_L1)-L(shl_8_bwd))(%r9), %r9
         cmp     %rcx, %rdx
@@ -1103,7 +1099,7 @@ L(shl_8_bwd_end):
         movdqu  %xmm0, (%r8)
         BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_9):
         lea     (L(shl_9_loop_L1)-L(shl_9))(%r9), %r9
         cmp     %rcx, %rdx
@@ -1146,7 +1142,7 @@ L(shl_9_end):
         add     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_9_bwd):
         lea     (L(shl_9_bwd_loop_L1)-L(shl_9_bwd))(%r9), %r9
         cmp     %rcx, %rdx
@@ -1188,7 +1184,7 @@ L(shl_9_bwd_end):
         movdqu  %xmm0, (%r8)
         BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_10):
         lea     (L(shl_10_loop_L1)-L(shl_10))(%r9), %r9
         cmp     %rcx, %rdx
@@ -1231,7 +1227,7 @@ L(shl_10_end):
         add     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_10_bwd):
         lea     (L(shl_10_bwd_loop_L1)-L(shl_10_bwd))(%r9), %r9
         cmp     %rcx, %rdx
@@ -1273,7 +1269,7 @@ L(shl_10_bwd_end):
         movdqu  %xmm0, (%r8)
         BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_11):
         lea     (L(shl_11_loop_L1)-L(shl_11))(%r9), %r9
         cmp     %rcx, %rdx
@@ -1316,7 +1312,7 @@ L(shl_11_end):
         add     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_11_bwd):
         lea     (L(shl_11_bwd_loop_L1)-L(shl_11_bwd))(%r9), %r9
         cmp     %rcx, %rdx
@@ -1358,7 +1354,7 @@ L(shl_11_bwd_end):
         movdqu  %xmm0, (%r8)
         BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_12):
         lea     (L(shl_12_loop_L1)-L(shl_12))(%r9), %r9
         cmp     %rcx, %rdx
@@ -1401,7 +1397,7 @@ L(shl_12_end):
         add     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_12_bwd):
         lea     (L(shl_12_bwd_loop_L1)-L(shl_12_bwd))(%r9), %r9
         cmp     %rcx, %rdx
@@ -1443,7 +1439,7 @@ L(shl_12_bwd_end):
         movdqu  %xmm0, (%r8)
         BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_13):
         lea     (L(shl_13_loop_L1)-L(shl_13))(%r9), %r9
         cmp     %rcx, %rdx
@@ -1486,7 +1482,7 @@ L(shl_13_end):
         add     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_13_bwd):
         lea     (L(shl_13_bwd_loop_L1)-L(shl_13_bwd))(%r9), %r9
         cmp     %rcx, %rdx
@@ -1528,7 +1524,7 @@ L(shl_13_bwd_end):
         movdqu  %xmm0, (%r8)
         BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_14):
         lea     (L(shl_14_loop_L1)-L(shl_14))(%r9), %r9
         cmp     %rcx, %rdx
@@ -1571,7 +1567,7 @@ L(shl_14_end):
         add     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_14_bwd):
         lea     (L(shl_14_bwd_loop_L1)-L(shl_14_bwd))(%r9), %r9
         cmp     %rcx, %rdx
@@ -1613,7 +1609,7 @@ L(shl_14_bwd_end):
         movdqu  %xmm0, (%r8)
         BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_15):
         lea     (L(shl_15_loop_L1)-L(shl_15))(%r9), %r9
         cmp     %rcx, %rdx
@@ -1656,7 +1652,7 @@ L(shl_15_end):
         add     %rdx, %rsi
         BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(shl_15_bwd):
         lea     (L(shl_15_bwd_loop_L1)-L(shl_15_bwd))(%r9), %r9
         cmp     %rcx, %rdx
@@ -1698,7 +1694,7 @@ L(shl_15_bwd_end):
         movdqu  %xmm0, (%r8)
         BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
  
-       ALIGN (4)
+       .p2align 4
  L(write_72bytes):
         movdqu  -72(%rsi), %xmm0
         movdqu  -56(%rsi), %xmm1
@@ -1716,7 +1712,7 @@ L(write_72bytes):
         mov      %rcx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_64bytes):
         movdqu  -64(%rsi), %xmm0
         mov     -48(%rsi), %rcx
@@ -1734,7 +1730,7 @@ L(write_64bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_56bytes):
         movdqu  -56(%rsi), %xmm0
         mov     -40(%rsi), %r8
@@ -1750,7 +1746,7 @@ L(write_56bytes):
         mov      %rcx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_48bytes):
         mov     -48(%rsi), %rcx
         mov     -40(%rsi), %r8
@@ -1766,7 +1762,7 @@ L(write_48bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_40bytes):
         mov     -40(%rsi), %r8
         mov     -32(%rsi), %r9
@@ -1780,7 +1776,7 @@ L(write_40bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_32bytes):
         mov     -32(%rsi), %r9
         mov     -24(%rsi), %r10
@@ -1792,7 +1788,7 @@ L(write_32bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_24bytes):
         mov     -24(%rsi), %r10
         mov     -16(%rsi), %r11
@@ -1802,7 +1798,7 @@ L(write_24bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_16bytes):
         mov     -16(%rsi), %r11
         mov     -8(%rsi), %rdx
@@ -1810,14 +1806,14 @@ L(write_16bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_8bytes):
         mov     -8(%rsi), %rdx
         mov      %rdx, -8(%rdi)
  L(write_0bytes):
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_73bytes):
         movdqu  -73(%rsi), %xmm0
         movdqu  -57(%rsi), %xmm1
@@ -1837,7 +1833,7 @@ L(write_73bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_65bytes):
         movdqu  -65(%rsi), %xmm0
         movdqu  -49(%rsi), %xmm1
@@ -1855,7 +1851,7 @@ L(write_65bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_57bytes):
         movdqu  -57(%rsi), %xmm0
         mov     -41(%rsi), %r8
@@ -1873,7 +1869,7 @@ L(write_57bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_49bytes):
         movdqu  -49(%rsi), %xmm0
         mov     -33(%rsi), %r9
@@ -1889,7 +1885,7 @@ L(write_49bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_41bytes):
         mov     -41(%rsi), %r8
         mov     -33(%rsi), %r9
@@ -1905,7 +1901,7 @@ L(write_41bytes):
         mov      %dl, -1(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_33bytes):
         mov     -33(%rsi), %r9
         mov     -25(%rsi), %r10
@@ -1919,7 +1915,7 @@ L(write_33bytes):
         mov      %dl, -1(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_25bytes):
         mov     -25(%rsi), %r10
         mov     -17(%rsi), %r11
@@ -1931,7 +1927,7 @@ L(write_25bytes):
         mov      %dl, -1(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_17bytes):
         mov     -17(%rsi), %r11
         mov     -9(%rsi), %rcx
@@ -1941,7 +1937,7 @@ L(write_17bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_9bytes):
         mov     -9(%rsi), %rcx
         mov     -4(%rsi), %edx
@@ -1949,13 +1945,13 @@ L(write_9bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_1bytes):
         mov     -1(%rsi), %dl
         mov      %dl, -1(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_74bytes):
         movdqu  -74(%rsi), %xmm0
         movdqu  -58(%rsi), %xmm1
@@ -1975,7 +1971,7 @@ L(write_74bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_66bytes):
         movdqu  -66(%rsi), %xmm0
         movdqu  -50(%rsi), %xmm1
@@ -1995,7 +1991,7 @@ L(write_66bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_58bytes):
         movdqu  -58(%rsi), %xmm1
         mov     -42(%rsi), %r8
@@ -2013,7 +2009,7 @@ L(write_58bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_50bytes):
         movdqu  -50(%rsi), %xmm0
         mov     -34(%rsi), %r9
@@ -2029,7 +2025,7 @@ L(write_50bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_42bytes):
         mov     -42(%rsi), %r8
         mov     -34(%rsi), %r9
@@ -2045,7 +2041,7 @@ L(write_42bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_34bytes):
         mov     -34(%rsi), %r9
         mov     -26(%rsi), %r10
@@ -2059,7 +2055,7 @@ L(write_34bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_26bytes):
         mov     -26(%rsi), %r10
         mov     -18(%rsi), %r11
@@ -2071,7 +2067,7 @@ L(write_26bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_18bytes):
         mov     -18(%rsi), %r11
         mov     -10(%rsi), %rcx
@@ -2081,7 +2077,7 @@ L(write_18bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_10bytes):
         mov     -10(%rsi), %rcx
         mov     -4(%rsi), %edx
@@ -2089,13 +2085,13 @@ L(write_10bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_2bytes):
         mov     -2(%rsi), %dx
         mov      %dx, -2(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_75bytes):
         movdqu  -75(%rsi), %xmm0
         movdqu  -59(%rsi), %xmm1
@@ -2115,7 +2111,7 @@ L(write_75bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_67bytes):
         movdqu  -67(%rsi), %xmm0
         movdqu  -59(%rsi), %xmm1
@@ -2135,7 +2131,7 @@ L(write_67bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_59bytes):
         movdqu  -59(%rsi), %xmm0
         mov     -43(%rsi), %r8
@@ -2153,7 +2149,7 @@ L(write_59bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_51bytes):
         movdqu  -51(%rsi), %xmm0
         mov     -35(%rsi), %r9
@@ -2169,7 +2165,7 @@ L(write_51bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_43bytes):
         mov     -43(%rsi), %r8
         mov     -35(%rsi), %r9
@@ -2185,7 +2181,7 @@ L(write_43bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_35bytes):
         mov     -35(%rsi), %r9
         mov     -27(%rsi), %r10
@@ -2199,7 +2195,7 @@ L(write_35bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_27bytes):
         mov     -27(%rsi), %r10
         mov     -19(%rsi), %r11
@@ -2211,7 +2207,7 @@ L(write_27bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_19bytes):
         mov     -19(%rsi), %r11
         mov     -11(%rsi), %rcx
@@ -2221,7 +2217,7 @@ L(write_19bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_11bytes):
         mov     -11(%rsi), %rcx
         mov     -4(%rsi), %edx
@@ -2229,7 +2225,7 @@ L(write_11bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_3bytes):
         mov     -3(%rsi), %dx
         mov     -2(%rsi), %cx
@@ -2237,7 +2233,7 @@ L(write_3bytes):
         mov      %cx, -2(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_76bytes):
         movdqu  -76(%rsi), %xmm0
         movdqu  -60(%rsi), %xmm1
@@ -2257,7 +2253,7 @@ L(write_76bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_68bytes):
         movdqu  -68(%rsi), %xmm0
         movdqu  -52(%rsi), %xmm1
@@ -2275,7 +2271,7 @@ L(write_68bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_60bytes):
         movdqu  -60(%rsi), %xmm0
         mov     -44(%rsi), %r8
@@ -2293,7 +2289,7 @@ L(write_60bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_52bytes):
         movdqu  -52(%rsi), %xmm0
         mov     -36(%rsi), %r9
@@ -2309,7 +2305,7 @@ L(write_52bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_44bytes):
         mov     -44(%rsi), %r8
         mov     -36(%rsi), %r9
@@ -2325,7 +2321,7 @@ L(write_44bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_36bytes):
         mov     -36(%rsi), %r9
         mov     -28(%rsi), %r10
@@ -2339,7 +2335,7 @@ L(write_36bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_28bytes):
         mov     -28(%rsi), %r10
         mov     -20(%rsi), %r11
@@ -2351,7 +2347,7 @@ L(write_28bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_20bytes):
         mov     -20(%rsi), %r11
         mov     -12(%rsi), %rcx
@@ -2361,7 +2357,7 @@ L(write_20bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_12bytes):
         mov     -12(%rsi), %rcx
         mov     -4(%rsi), %edx
@@ -2369,13 +2365,13 @@ L(write_12bytes):
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_4bytes):
         mov     -4(%rsi), %edx
         mov      %edx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_77bytes):
         movdqu  -77(%rsi), %xmm0
         movdqu  -61(%rsi), %xmm1
@@ -2395,7 +2391,7 @@ L(write_77bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_69bytes):
         movdqu  -69(%rsi), %xmm0
         movdqu  -53(%rsi), %xmm1
@@ -2413,7 +2409,7 @@ L(write_69bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_61bytes):
         movdqu  -61(%rsi), %xmm0
         mov     -45(%rsi), %r8
@@ -2431,7 +2427,7 @@ L(write_61bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_53bytes):
         movdqu  -53(%rsi), %xmm0
         mov     -45(%rsi), %r8
@@ -2448,7 +2444,7 @@ L(write_53bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_45bytes):
         mov     -45(%rsi), %r8
         mov     -37(%rsi), %r9
@@ -2464,7 +2460,7 @@ L(write_45bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_37bytes):
         mov     -37(%rsi), %r9
         mov     -29(%rsi), %r10
@@ -2478,7 +2474,7 @@ L(write_37bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_29bytes):
         mov     -29(%rsi), %r10
         mov     -21(%rsi), %r11
@@ -2490,7 +2486,7 @@ L(write_29bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_21bytes):
         mov     -21(%rsi), %r11
         mov     -13(%rsi), %rcx
@@ -2500,7 +2496,7 @@ L(write_21bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_13bytes):
         mov     -13(%rsi), %rcx
         mov     -8(%rsi), %rdx
@@ -2508,7 +2504,7 @@ L(write_13bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_5bytes):
         mov     -5(%rsi), %edx
         mov     -4(%rsi), %ecx
@@ -2516,7 +2512,7 @@ L(write_5bytes):
         mov      %ecx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_78bytes):
         movdqu  -78(%rsi), %xmm0
         movdqu  -62(%rsi), %xmm1
@@ -2536,7 +2532,7 @@ L(write_78bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_70bytes):
         movdqu  -70(%rsi), %xmm0
         movdqu  -54(%rsi), %xmm1
@@ -2554,7 +2550,7 @@ L(write_70bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_62bytes):
         movdqu  -62(%rsi), %xmm0
         mov     -46(%rsi), %r8
@@ -2572,7 +2568,7 @@ L(write_62bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_54bytes):
         movdqu  -54(%rsi), %xmm0
         mov     -38(%rsi), %r9
@@ -2588,7 +2584,7 @@ L(write_54bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_46bytes):
         mov     -46(%rsi), %r8
         mov     -38(%rsi), %r9
@@ -2604,7 +2600,7 @@ L(write_46bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_38bytes):
         mov     -38(%rsi), %r9
         mov     -30(%rsi), %r10
@@ -2618,7 +2614,7 @@ L(write_38bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_30bytes):
         mov     -30(%rsi), %r10
         mov     -22(%rsi), %r11
@@ -2630,7 +2626,7 @@ L(write_30bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_22bytes):
         mov     -22(%rsi), %r11
         mov     -14(%rsi), %rcx
@@ -2640,7 +2636,7 @@ L(write_22bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_14bytes):
         mov     -14(%rsi), %rcx
         mov     -8(%rsi), %rdx
@@ -2648,7 +2644,7 @@ L(write_14bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_6bytes):
         mov     -6(%rsi), %edx
         mov     -4(%rsi), %ecx
@@ -2656,7 +2652,7 @@ L(write_6bytes):
         mov      %ecx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_79bytes):
         movdqu  -79(%rsi), %xmm0
         movdqu  -63(%rsi), %xmm1
@@ -2676,7 +2672,7 @@ L(write_79bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_71bytes):
         movdqu  -71(%rsi), %xmm0
         movdqu  -55(%rsi), %xmm1
@@ -2694,7 +2690,7 @@ L(write_71bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_63bytes):
         movdqu  -63(%rsi), %xmm0
         mov     -47(%rsi), %r8
@@ -2712,7 +2708,7 @@ L(write_63bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_55bytes):
         movdqu  -55(%rsi), %xmm0
         mov     -39(%rsi), %r9
@@ -2728,7 +2724,7 @@ L(write_55bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_47bytes):
         mov     -47(%rsi), %r8
         mov     -39(%rsi), %r9
@@ -2744,7 +2740,7 @@ L(write_47bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_39bytes):
         mov     -39(%rsi), %r9
         mov     -31(%rsi), %r10
@@ -2758,7 +2754,7 @@ L(write_39bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_31bytes):
         mov     -31(%rsi), %r10
         mov     -23(%rsi), %r11
@@ -2770,7 +2766,7 @@ L(write_31bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_23bytes):
         mov     -23(%rsi), %r11
         mov     -15(%rsi), %rcx
@@ -2780,7 +2776,7 @@ L(write_23bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_15bytes):
         mov     -15(%rsi), %rcx
         mov     -8(%rsi), %rdx
@@ -2788,7 +2784,7 @@ L(write_15bytes):
         mov      %rdx, -8(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(write_7bytes):
         mov     -7(%rsi), %edx
         mov     -4(%rsi), %ecx
@@ -2796,7 +2792,7 @@ L(write_7bytes):
         mov      %ecx, -4(%rdi)
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(large_page_fwd):
         movdqu  (%rsi), %xmm1
         lea     16(%rsi), %rsi
@@ -2859,7 +2855,7 @@ L(large_page_less_64bytes):
         BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
  
  #ifdef USE_AS_MEMMOVE
-       ALIGN (4)
+       .p2align 4
  L(ll_cache_copy_fwd_start):
         prefetcht0 0x1c0(%rsi)
         prefetcht0 0x200(%rsi)
@@ -2906,7 +2902,7 @@ L(large_page_ll_less_fwd_64bytes):
         BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
  
  #endif
-       ALIGN (4)
+       .p2align 4
  L(large_page_bwd):
         movdqu  -0x10(%rsi), %xmm1
         lea     -16(%rsi), %rsi
@@ -2966,7 +2962,7 @@ L(large_page_less_bwd_64bytes):
         BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
  
  #ifdef USE_AS_MEMMOVE
-       ALIGN (4)
+       .p2align 4
  L(ll_cache_copy_bwd_start):
         prefetcht0 -0x1c0(%rsi)
         prefetcht0 -0x200(%rsi)
@@ -3014,7 +3010,7 @@ L(large_page_ll_less_bwd_64bytes):
  END (MEMCPY)
  
         .section .rodata.ssse3,"a",@progbits
-       ALIGN (3)
+       .p2align 3
  L(table_less_80bytes):
         .int    JMPTBL (L(write_0bytes), L(table_less_80bytes))
         .int    JMPTBL (L(write_1bytes), L(table_less_80bytes))
@@ -3097,7 +3093,7 @@ L(table_less_80bytes):
         .int    JMPTBL (L(write_78bytes), L(table_less_80bytes))
         .int    JMPTBL (L(write_79bytes), L(table_less_80bytes))
  
-       ALIGN (3)
+       .p2align 3
  L(shl_table):
         .int    JMPTBL (L(shl_0), L(shl_table))
         .int    JMPTBL (L(shl_1), L(shl_table))
@@ -3116,7 +3112,7 @@ L(shl_table):
         .int    JMPTBL (L(shl_14), L(shl_table))
         .int    JMPTBL (L(shl_15), L(shl_table))
  
-       ALIGN (3)
+       .p2align 3
  L(shl_table_bwd):
         .int    JMPTBL (L(shl_0_bwd), L(shl_table_bwd))
         .int    JMPTBL (L(shl_1_bwd), L(shl_table_bwd))
diff --git a/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S

index eed8432..4a8e57a 100644 (file)
--- a/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S
@@ -17,7 +17,6 @@
     <http://www.gnu.org/licenses/>.  */
  
  #include "sysdep.h"
-#define ALIGN(x)       .p2align x
  
  ENTRY ( __strcmp_sse2_unaligned)
         movl    %edi, %eax
@@ -43,7 +42,7 @@ L(return):
         subl    %edx, %eax
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(next_48_bytes):
         movdqu  16(%rdi), %xmm6
         movdqu  16(%rsi), %xmm3
@@ -85,7 +84,7 @@ L(main_loop_header):
         movq    %rcx, %rsi
         jmp     L(loop_start)
  
-       ALIGN   (4)
+       .p2align 4
  L(loop):
         addq    $64, %rax
         addq    $64, %rdx
@@ -141,7 +140,7 @@ L(back_to_loop):
         subl    %edx, %eax
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(loop_cross_page):
         xor     %r10, %r10
         movq    %rdx, %r9
@@ -191,7 +190,7 @@ L(loop_cross_page):
         subl    %edx, %eax
         ret
  
-       ALIGN (4)
+       .p2align 4
  L(cross_page_loop):
         cmpb    %cl, %al
         jne     L(different)
diff --git a/sysdeps/x86_64/strchr.S b/sysdeps/x86_64/strchr.S

index 1900b37..7440500 100644 (file)
--- a/sysdeps/x86_64/strchr.S
+++ b/sysdeps/x86_64/strchr.S
@@ -19,11 +19,6 @@
  
  #include <sysdep.h>
  
-# ifndef ALIGN
-#  define ALIGN(n)     .p2align n
-# endif
-
-
         .text
  ENTRY (strchr)
         movd    %esi, %xmm1
@@ -54,7 +49,7 @@ ENTRY (strchr)
  #endif
         ret
  
-       ALIGN(3)
+       .p2align 3
         L(next_48_bytes):
         movdqu  16(%rdi), %xmm0
         movdqa  %xmm0, %xmm4
@@ -83,10 +78,10 @@ ENTRY (strchr)
  L(loop_start):
         /* We use this alignment to force loop be aligned to 8 but not
            16 bytes.  This gives better sheduling on AMD processors.  */
-       ALIGN(4)
+       .p2align 4
         pxor    %xmm6, %xmm6
         andq    $-64, %rdi
-       ALIGN(3)
+       .p2align 3
  L(loop64):
         addq    $64, %rdi
         movdqa  (%rdi), %xmm5
@@ -129,7 +124,7 @@ L(loop64):
         orq     %rcx, %rax
         salq    $48, %rdx
         orq     %rdx, %rax
-       ALIGN(3)
+       .p2align 3
  L(return):
         bsfq    %rax, %rax
  #ifdef AS_STRCHRNUL
@@ -141,7 +136,7 @@ L(return):
         cmovne  %rdx, %rax
  #endif
         ret
-       ALIGN(4)
+       .p2align 4
  
  L(cross_page):
         movq    %rdi, %rdx
diff --git a/sysdeps/x86_64/strrchr.S b/sysdeps/x86_64/strrchr.S

index 514765b..2a07ff7 100644 (file)
--- a/sysdeps/x86_64/strrchr.S
+++ b/sysdeps/x86_64/strrchr.S
@@ -19,11 +19,6 @@
  
  #include <sysdep.h>
  
-# ifndef ALIGN
-#  define ALIGN(n)     .p2align n
-# endif
-
-
         .text
  ENTRY (strrchr)
         movd    %esi, %xmm1
@@ -51,7 +46,7 @@ ENTRY (strrchr)
         addq    %rdi, %rax
         ret
  
-       ALIGN(4)
+       .p2align 4
  L(next_48_bytes):
         movdqu  16(%rdi), %xmm4
         movdqa  %xmm4, %xmm5
@@ -91,7 +86,7 @@ L(next_48_bytes):
         leaq    (%rdi,%rsi), %rax
         ret
  
-       ALIGN(4)
+       .p2align 4
  L(loop_header2):
         testq   %rsi, %rsi
         movq    %rdi, %rcx
@@ -102,7 +97,7 @@ L(loop_header):
         andq    $-64, %rdi
         jmp     L(loop_entry)
  
-       ALIGN(4)
+       .p2align 4
  L(loop64):
         testq   %rdx, %rdx
         cmovne  %rdx, %rsi
@@ -163,18 +158,18 @@ L(loop_entry):
         leaq    (%rcx,%rsi), %rax
         ret
  
-       ALIGN(4)
+       .p2align 4
  L(no_c_found):
         movl    $1, %esi
         xorl    %ecx, %ecx
         jmp     L(loop_header)
  
-       ALIGN(4)
+       .p2align 4
  L(exit):
         xorl    %eax, %eax
         ret
  
-       ALIGN(4)
+       .p2align 4
  L(cross_page):
         movq    %rdi, %rax
         pxor    %xmm0, %xmm0
author	Ondřej Bílka <neleai@seznam.cz>
	Tue, 8 Oct 2013 13:46:48 +0000 (15:46 +0200)
committer	Ondřej Bílka <neleai@seznam.cz>
	Tue, 8 Oct 2013 13:46:48 +0000 (15:46 +0200)
ChangeLog		patch \| blob \| history
sysdeps/x86_64/memset.S		patch \| blob \| history
sysdeps/x86_64/multiarch/memcmp-sse4.S		patch \| blob \| history
sysdeps/x86_64/multiarch/memcmp-ssse3.S		patch \| blob \| history
sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S		patch \| blob \| history
sysdeps/x86_64/multiarch/memcpy-ssse3-back.S		patch \| blob \| history
sysdeps/x86_64/multiarch/memcpy-ssse3.S		patch \| blob \| history
sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S		patch \| blob \| history
sysdeps/x86_64/strchr.S		patch \| blob \| history
sysdeps/x86_64/strrchr.S		patch \| blob \| history