2009-02-05 Paul Brook <paul@codesourcery.com>
authorJoseph Myers <joseph@codesourcery.com>
Thu, 5 Feb 2009 14:46:41 +0000 (14:46 +0000)
committerJoseph Myers <joseph@codesourcery.com>
Thu, 5 Feb 2009 14:46:41 +0000 (14:46 +0000)
            Joseph Myers  <joseph@codesourcery.com>

* sysdeps/arm/dl-machine.h (elf_machine_dynamic): Ditto.
(elf_machine_load_address): Clear T bit of PLT entry contents.
(RTLD_START): Mark function symbols as such.  Tweak pc-relative
addressing to avoid depending on pc read pipeline offset.
* sysdeps/arm/machine-gmon.h (MCOUNT): Add Thumb-2 implementation.
* sysdeps/arm/tls-macros.h: Add alignment for Thumb-2.
(ARM_PC_OFFSET): Define.
(TLS_IE): Define differently for Thumb-2.
(TLS_LE, TLS_LD, TLS_GD): Use ARM_PC_OFFSET.
* sysdeps/arm/elf/start.S: Switch to thumb mode for Thumb-2.
* sysdeps/unix/sysv/linux/arm/eabi/sysdep.h (INTERNAL_SYSCALL_RAW):
Add Thumb implementation.
* sysdeps/unix/sysv/linux/arm/eabi/nptl/aio_misc.h: New.
* sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-resume.c: Enforce
alignment for Thumb-2.  Adjust offset from PC for Thumb-2.
* sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-forcedunwind.c: Ditto.
* sysdeps/unix/sysv/linux/arm/nptl/bits/atomic.h (atomic_full_barrier,
__arch_compare_and_exchange_val_32_acq): Add Thumb-2 implementation.

ChangeLog.arm
sysdeps/arm/dl-machine.h
sysdeps/arm/elf/start.S
sysdeps/arm/machine-gmon.h
sysdeps/arm/tls-macros.h
sysdeps/unix/sysv/linux/arm/eabi/nptl/aio_misc.h [new file with mode: 0644]
sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-forcedunwind.c
sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-resume.c
sysdeps/unix/sysv/linux/arm/eabi/sysdep.h
sysdeps/unix/sysv/linux/arm/nptl/bits/atomic.h

index 52480ac..fd520bc 100644 (file)
@@ -1,3 +1,25 @@
+2009-02-05  Paul Brook  <paul@codesourcery.com>
+            Joseph Myers  <joseph@codesourcery.com>
+
+       * sysdeps/arm/dl-machine.h (elf_machine_dynamic): Ditto.
+       (elf_machine_load_address): Clear T bit of PLT entry contents.
+       (RTLD_START): Mark function symbols as such.  Tweak pc-relative
+       addressing to avoid depending on pc read pipeline offset.
+       * sysdeps/arm/machine-gmon.h (MCOUNT): Add Thumb-2 implementation.
+       * sysdeps/arm/tls-macros.h: Add alignment for Thumb-2.
+       (ARM_PC_OFFSET): Define.
+       (TLS_IE): Define differently for Thumb-2.
+       (TLS_LE, TLS_LD, TLS_GD): Use ARM_PC_OFFSET.
+       * sysdeps/arm/elf/start.S: Switch to thumb mode for Thumb-2.
+       * sysdeps/unix/sysv/linux/arm/eabi/sysdep.h (INTERNAL_SYSCALL_RAW):
+       Add Thumb implementation.
+       * sysdeps/unix/sysv/linux/arm/eabi/nptl/aio_misc.h: New.
+       * sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-resume.c: Enforce
+       alignment for Thumb-2.  Adjust offset from PC for Thumb-2.
+       * sysdeps/unix/sysv/linux/arm/eabi/nptl/unwind-forcedunwind.c: Ditto.
+       * sysdeps/unix/sysv/linux/arm/nptl/bits/atomic.h (atomic_full_barrier,
+       __arch_compare_and_exchange_val_32_acq): Add Thumb-2 implementation.
+
 2009-02-02  Joseph Myers  <joseph@codesourcery.com>
 
        * sysdeps/unix/sysv/linux/arm/bits/shm.h (SHM_EXEC): Define.
index 1a45a26..f839d97 100644 (file)
@@ -53,11 +53,22 @@ static inline Elf32_Addr __attribute__ ((unused))
 elf_machine_dynamic (void)
 {
   Elf32_Addr dynamic;
+#ifdef __thumb2__
+  long tmp;
+  asm ("ldr\t%0, 1f\n\t"
+       "adr\t%1, 1f\n\t"
+       "ldr\t%0, [%0, %1]\n\t"
+       "b 2f\n"
+       ".align 2\n"
+       "1: .word _GLOBAL_OFFSET_TABLE_ - 1b\n"
+       "2:" : "=r" (dynamic), "=r"(tmp));
+#else
   asm ("ldr %0, 2f\n"
        "1: ldr %0, [pc, %0]\n"
        "b 3f\n"
        "2: .word _GLOBAL_OFFSET_TABLE_ - (1b+8)\n"
        "3:" : "=r" (dynamic));
+#endif
   return dynamic;
 }
 
@@ -69,6 +80,10 @@ elf_machine_load_address (void)
   extern void __dl_start asm ("_dl_start");
   Elf32_Addr got_addr = (Elf32_Addr) &__dl_start;
   Elf32_Addr pcrel_addr;
+#ifdef __thumb__
+  /* Clear the low bit of the funciton address.  */
+  got_addr &= ~(Elf32_Addr) 1;
+#endif
   asm ("adr %0, _dl_start" : "=r" (pcrel_addr));
   return pcrel_addr - got_addr;
 }
@@ -140,7 +155,9 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
 #define RTLD_START asm ("\
 .text\n\
 .globl _start\n\
+.type _start, %function\n\
 .globl _dl_start_user\n\
+.type _dl_start_user, %function\n\
 _start:\n\
        @ we are PIC code, so get global offset table\n\
        ldr     sl, .L_GET_GOT\n\
@@ -152,8 +169,8 @@ _start:\n\
        bl      _dl_start\n\
        @ returns user entry point in r0\n\
 _dl_start_user:\n\
-       add     sl, pc, sl\n\
-.L_GOT_GOT:\n\
+       adr     r6, .L_GET_GOT\n\
+       add     sl, sl, r6\n\
        ldr     r4, [sl, r4]\n\
        @ save the entry point in another register\n\
        mov     r6, r0\n\
@@ -210,7 +227,7 @@ _dl_start_user:\n\
        b       .L_done_fixup\n\
 \n\
 .L_GET_GOT:\n\
-       .word   _GLOBAL_OFFSET_TABLE_ - .L_GOT_GOT - 4\n\
+       .word   _GLOBAL_OFFSET_TABLE_ - .L_GET_GOT\n\
 .L_SKIP_ARGS:\n\
        .word   _dl_skip_args(GOTOFF)\n\
 .L_FINI_PROC:\n\
index f63b3db..0cf4339 100644 (file)
                ...
                                        NULL
 */
+#if defined(__thumb2__)
+       .thumb
+       .syntax unified
+#endif
 
        .text
        .globl _start
index fa3f652..dbda0dd 100644 (file)
@@ -50,6 +50,28 @@ static void mcount_internal (u_long frompc, u_long selfpc)
    }
 */
 
+#ifdef __thumb2__
+
+#define MCOUNT                                                         \
+void _mcount (void)                                                    \
+{                                                                      \
+  __asm__("push                {r0, r1, r2, r3};"                              \
+         "movs         fp, fp;"                                        \
+         "it           eq;"                                            \
+          "moveq       r1, #0;"                                        \
+         "itttt        ne;"                                            \
+         "ldrne        r1, [fp, $-4];"                                 \
+         "ldrne        r0, [fp, $-12];"                                \
+         "movnes       r0, r0;"                                        \
+         "ldrne        r0, [r0, $-4];"                                 \
+         "movs         r0, r0;"                                        \
+         "it           ne;"                                            \
+         "blne         mcount_internal;"                               \
+         "pop          {r0, r1, r2, r3}");                             \
+}
+
+#else
+
 #define MCOUNT                                                         \
 void _mcount (void)                                                    \
 {                                                                      \
@@ -65,3 +87,4 @@ void _mcount (void)                                                   \
          "ldmia        sp!, {r0, r1, r2, r3}");                        \
 }
 
+#endif
index 94aa3a8..e41d3bc 100644 (file)
@@ -1,14 +1,36 @@
+#ifdef __thumb2__
+#define ARM_PC_OFFSET "4"
+#else
+#define ARM_PC_OFFSET "8"
+#endif
+
 #define TLS_LE(x)                                      \
   ({ int *__result;                                    \
      void *tp = __builtin_thread_pointer ();           \
      asm ("ldr %0, 1f; "                               \
          "add %0, %1, %0; "                            \
          "b 2f; "                                      \
+         ".align 2; "                                  \
          "1: .word " #x "(tpoff); "                    \
          "2: "                                         \
          : "=&r" (__result) : "r" (tp));               \
      __result; })
 
+#ifdef __thumb2__
+#define TLS_IE(x)                                      \
+  ({ int *__result;                                    \
+     void *tp = __builtin_thread_pointer ();           \
+     asm ("ldr %0, 1f; "                               \
+         "3: add %0, pc, %0;"                          \
+         "ldr %0, [%0];"                               \
+         "add %0, %1, %0; "                            \
+         "b 2f; "                                      \
+         ".align 2; "                                  \
+         "1: .word " #x "(gottpoff) + (. - 3b - 4); "  \
+         "2: "                                         \
+         : "=&r" (__result) : "r" (tp));               \
+     __result; })
+#else
 #define TLS_IE(x)                                      \
   ({ int *__result;                                    \
      void *tp = __builtin_thread_pointer ();           \
          "3: ldr %0, [pc, %0];"                        \
          "add %0, %1, %0; "                            \
          "b 2f; "                                      \
+         ".align 2; "                                  \
          "1: .word " #x "(gottpoff) + (. - 3b - 8); "  \
          "2: "                                         \
          : "=&r" (__result) : "r" (tp));               \
      __result; })
+#endif
 
 #define TLS_LD(x)                                      \
   ({ char *__result;                                   \
      asm ("ldr %0, 2f; "                               \
          "1: add %0, pc, %0; "                         \
          "b 3f; "                                      \
-         "2: .word " #x "(tlsldm) + (. - 1b - 8); "    \
+         ".align 2; "                                  \
+         "2: .word " #x "(tlsldm) + (. - 1b - "ARM_PC_OFFSET"); "      \
          "3: "                                         \
          : "=r" (__result));                           \
      __result = (char *)__tls_get_addr (__result);     \
      asm ("ldr %0, 1f; "                               \
          "b 2f; "                                      \
+         ".align 2; "                                  \
          "1: .word " #x "(tlsldo); "                   \
          "2: "                                         \
          : "=r" (__offset));                           \
@@ -45,7 +71,8 @@
      asm ("ldr %0, 2f; "                               \
          "1: add %0, pc, %0; "                         \
          "b 3f; "                                      \
-         "2: .word " #x "(tlsgd) + (. - 1b - 8); "     \
+         ".align 2; "                                  \
+         "2: .word " #x "(tlsgd) + (. - 1b - "ARM_PC_OFFSET"); "       \
          "3: "                                         \
          : "=r" (__result));                           \
      (int *)__tls_get_addr (__result); })
diff --git a/sysdeps/unix/sysv/linux/arm/eabi/nptl/aio_misc.h b/sysdeps/unix/sysv/linux/arm/eabi/nptl/aio_misc.h
new file mode 100644 (file)
index 0000000..3fb1ec9
--- /dev/null
@@ -0,0 +1,52 @@
+/* Copyright (C) 2008 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include_next <aio_misc.h>
+
+#ifdef __thumb2__
+
+#include <errno.h>
+
+/* The Thumb-2 definition of INTERNAL_SYSCALL_RAW has to hide the use
+   of r7 from the compiler because it cannot handle asm clobbering the
+   hard frame pointer.  In aio_suspend, GCC does not eliminate the
+   hard frame pointer because the function uses variable-length
+   arrays, so it generates unwind information using r7 as virtual
+   stack pointer.  During system calls, when r7 has been saved on the
+   stack, this means the unwind information is invalid.  Without extra
+   unwind directives, which would need to cause unwind information for
+   the asm to be generated separately from that for the parts of the
+   function before and after the asm (with three index table entries),
+   it is not possible to represent any temporary change to the virtual
+   stack pointer.  Instead, we move the problematic system calls out
+   of line into a function that does not require a frame pointer.  */
+
+static __attribute_noinline__ void
+aio_misc_wait (int *resultp,
+              volatile int *futexp,
+              const struct timespec *timeout,
+              int cancel)
+{
+  AIO_MISC_WAIT (*resultp, *futexp, timeout, cancel);
+}
+
+#undef AIO_MISC_WAIT
+#define AIO_MISC_WAIT(result, futex, timeout, cancel)  \
+  aio_misc_wait (&result, &futex, timeout, cancel)
+
+#endif
index 24ce61b..71ab77c 100644 (file)
@@ -89,7 +89,12 @@ asm (
 "4:    bl      pthread_cancel_init\n"
 "      ldr     r3, [r4, r5]\n"
 "      b       5b\n"
+"      .align 2\n"
+#ifdef __thumb2__
+"1:    .word   _GLOBAL_OFFSET_TABLE_ - 3b - 4\n"
+#else
 "1:    .word   _GLOBAL_OFFSET_TABLE_ - 3b - 8\n"
+#endif
 "2:    .word   libgcc_s_resume(GOTOFF)\n"
 "      .size   _Unwind_Resume, .-_Unwind_Resume\n"
 );
index a9c9d18..3c780b7 100644 (file)
@@ -66,7 +66,12 @@ asm (
 "4:    bl      init\n"
 "      ldr     r3, [r4, r5]\n"
 "      b       5b\n"
+"      .align 2\n"
+#ifdef __thumb2__
+"1:    .word   _GLOBAL_OFFSET_TABLE_ - 3b - 4\n"
+#else
 "1:    .word   _GLOBAL_OFFSET_TABLE_ - 3b - 8\n"
+#endif
 "2:    .word   libgcc_s_resume(GOTOFF)\n"
 "      .size   _Unwind_Resume, .-_Unwind_Resume\n"
 );
index 1444f40..a7dd40d 100644 (file)
    argument; otherwise the (optional) compatibility code for APCS binaries
    may be invoked.  */
 
+#ifdef __thumb__
+/* Hide the use of r7 from the compiler, this would be a lot
+   easier but for the fact that the syscalls can exceed 255.
+   For the moment the LOAD_ARGS_7 is sacrificed.
+   We can't use push/pop inside the asm because that breaks
+   unwinding (ie. thread cancellation).  */
+#undef LOAD_ARGS_7
+#undef INTERNAL_SYSCALL_RAW
+#define INTERNAL_SYSCALL_RAW(name, err, nr, args...)           \
+  ({                                                           \
+      int _sys_buf[2];                                         \
+      register int _a1 asm ("a1");                             \
+      register int *_r6 asm ("r6") = _sys_buf;                 \
+      *_r6 = name;                                             \
+      LOAD_ARGS_##nr (args)                                    \
+      asm volatile ("str        r7, [r6, #4]\n\t"              \
+                    "ldr      r7, [r6]\n\t"                    \
+                    "swi      0       @ syscall " #name "\n\t" \
+                    "ldr      r7, [r6, #4]"                    \
+                   : "=r" (_a1)                                        \
+                    : "r" (_r6) ASM_ARGS_##nr                  \
+                    : "memory");                               \
+       _a1; })
+#else /* ARM */
 #undef INTERNAL_SYSCALL_RAW
 #define INTERNAL_SYSCALL_RAW(name, err, nr, args...)           \
   ({                                                           \
@@ -55,6 +79,7 @@
                     : "r" (_nr) ASM_ARGS_##nr                  \
                     : "memory");                               \
        _a1; })
+#endif
 
 /* For EABI, non-constant syscalls are actually pretty easy...  */
 #undef INTERNAL_SYSCALL_NCS
index 247ddd3..b0586ea 100644 (file)
@@ -37,12 +37,21 @@ typedef uintmax_t uatomic_max_t;
 
 void __arm_link_error (void);
 
+#ifdef __thumb2__
+#define atomic_full_barrier() \
+     __asm__ __volatile__                                                    \
+            ("movw\tip, #0x0fa0\n\t"                                         \
+             "movt\tip, #0xffff\n\t"                                         \
+             "blx\tip"                                                       \
+             : : : "ip", "lr", "cc", "memory");
+#else
 #define atomic_full_barrier() \
      __asm__ __volatile__                                                    \
             ("mov\tip, #0xffff0fff\n\t"                                      \
              "mov\tlr, pc\n\t"                                               \
              "add\tpc, ip, #(0xffff0fa0 - 0xffff0fff)"                       \
              : : : "ip", "lr", "cc", "memory");
+#endif
 
 /* Atomic compare and exchange.  This sequence relies on the kernel to
    provide a compare and exchange operation which is atomic on the
@@ -59,6 +68,32 @@ void __arm_link_error (void);
    specify one to work around GCC PR rtl-optimization/21223.  Otherwise
    it may cause a_oldval or a_tmp to be moved to a different register.  */
 
+#ifdef __thumb2__
+/* Thumb-2 has ldrex/strex.  However it does not have barrier instructions,
+   so we still need to use the kernel helper.  */
+#define __arch_compare_and_exchange_val_32_acq(mem, newval, oldval) \
+  ({ register __typeof (oldval) a_oldval asm ("r0");                         \
+     register __typeof (oldval) a_newval asm ("r1") = (newval);                      \
+     register __typeof (mem) a_ptr asm ("r2") = (mem);                       \
+     register __typeof (oldval) a_tmp asm ("r3");                            \
+     register __typeof (oldval) a_oldval2 asm ("r4") = (oldval);             \
+     __asm__ __volatile__                                                    \
+            ("0:\tldr\t%[tmp],[%[ptr]]\n\t"                                  \
+             "cmp\t%[tmp], %[old2]\n\t"                                      \
+             "bne\t1f\n\t"                                                   \
+             "mov\t%[old], %[old2]\n\t"                                      \
+             "movw\t%[tmp], #0x0fc0\n\t"                                     \
+             "movt\t%[tmp], #0xffff\n\t"                                     \
+             "blx\t%[tmp]\n\t"                                               \
+             "bcc\t0b\n\t"                                                   \
+             "mov\t%[tmp], %[old2]\n\t"                                      \
+             "1:"                                                            \
+             : [old] "=&r" (a_oldval), [tmp] "=&r" (a_tmp)                   \
+             : [new] "r" (a_newval), [ptr] "r" (a_ptr),                      \
+               [old2] "r" (a_oldval2)                                        \
+             : "ip", "lr", "cc", "memory");                                  \
+     a_tmp; })
+#else
 #define __arch_compare_and_exchange_val_32_acq(mem, newval, oldval) \
   ({ register __typeof (oldval) a_oldval asm ("r0");                         \
      register __typeof (oldval) a_newval asm ("r1") = (newval);                      \
@@ -81,6 +116,7 @@ void __arm_link_error (void);
                [old2] "r" (a_oldval2)                                        \
              : "ip", "lr", "cc", "memory");                                  \
      a_tmp; })
+#endif
 
 #define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \
   ({ __arm_link_error (); oldval; })