src/base/atomicops_internals_arm_gcc.h

   1 // Copyright 2010 the V8 project authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 // This file is an internal atomic implementation, use atomicops.h instead.
   6 //
   7 // LinuxKernelCmpxchg and Barrier_AtomicIncrement are from Google Gears.
   8
   9 #ifndef V8_BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
  10 #define V8_BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
  11
  12 #if defined(__QNXNTO__)
  13 #include <sys/cpuinline.h>
  14 #endif
  15
  16 namespace v8 {
  17 namespace base {
  18
  19 // Memory barriers on ARM are funky, but the kernel is here to help:
  20 //
  21 // * ARMv5 didn't support SMP, there is no memory barrier instruction at
  22 //   all on this architecture, or when targeting its machine code.
  23 //
  24 // * Some ARMv6 CPUs support SMP. A full memory barrier can be produced by
  25 //   writing a random value to a very specific coprocessor register.
  26 //
  27 // * On ARMv7, the "dmb" instruction is used to perform a full memory
  28 //   barrier (though writing to the co-processor will still work).
  29 //   However, on single core devices (e.g. Nexus One, or Nexus S),
  30 //   this instruction will take up to 200 ns, which is huge, even though
  31 //   it's completely un-needed on these devices.
  32 //
  33 // * There is no easy way to determine at runtime if the device is
  34 //   single or multi-core. However, the kernel provides a useful helper
  35 //   function at a fixed memory address (0xffff0fa0), which will always
  36 //   perform a memory barrier in the most efficient way. I.e. on single
  37 //   core devices, this is an empty function that exits immediately.
  38 //   On multi-core devices, it implements a full memory barrier.
  39 //
  40 // * This source could be compiled to ARMv5 machine code that runs on a
  41 //   multi-core ARMv6 or ARMv7 device. In this case, memory barriers
  42 //   are needed for correct execution. Always call the kernel helper, even
  43 //   when targeting ARMv5TE.
  44 //
  45
  46 inline void MemoryBarrier() {
  47 #if defined(__linux__) || defined(__ANDROID__)
  48   // Note: This is a function call, which is also an implicit compiler barrier.
  49   typedef void (*KernelMemoryBarrierFunc)();
  50   ((KernelMemoryBarrierFunc)0xffff0fa0)();
  51 #elif defined(__QNXNTO__)
  52   __cpu_membarrier();
  53 #else
  54 #error MemoryBarrier() is not implemented on this platform.
  55 #endif
  56 }
  57
  58 // An ARM toolchain would only define one of these depending on which
  59 // variant of the target architecture is being used. This tests against
  60 // any known ARMv6 or ARMv7 variant, where it is possible to directly
  61 // use ldrex/strex instructions to implement fast atomic operations.
  62 #if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) ||  \
  63     defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || \
  64     defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) ||  \
  65     defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
  66     defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__)
  67
  68 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
  69                                          Atomic32 old_value,
  70                                          Atomic32 new_value) {
  71   Atomic32 prev_value;
  72   int reloop;
  73   do {
  74     // The following is equivalent to:
  75     //
  76     //   prev_value = LDREX(ptr)
  77     //   reloop = 0
  78     //   if (prev_value != old_value)
  79     //      reloop = STREX(ptr, new_value)
  80     __asm__ __volatile__("    ldrex %0, [%3]\n"
  81                          "    mov %1, #0\n"
  82                          "    cmp %0, %4\n"
  83 #ifdef __thumb2__
  84                          "    it eq\n"
  85 #endif
  86                          "    strexeq %1, %5, [%3]\n"
  87                          : "=&r"(prev_value), "=&r"(reloop), "+m"(*ptr)
  88                          : "r"(ptr), "r"(old_value), "r"(new_value)
  89                          : "cc", "memory");
  90   } while (reloop != 0);
  91   return prev_value;
  92 }
  93
  94 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
  95                                        Atomic32 old_value,
  96                                        Atomic32 new_value) {
  97   Atomic32 result = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
  98   MemoryBarrier();
  99   return result;
 100 }
 101
 102 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
 103                                        Atomic32 old_value,
 104                                        Atomic32 new_value) {
 105   MemoryBarrier();
 106   return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
 107 }
 108
 109 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
 110                                           Atomic32 increment) {
 111   Atomic32 value;
 112   int reloop;
 113   do {
 114     // Equivalent to:
 115     //
 116     //  value = LDREX(ptr)
 117     //  value += increment
 118     //  reloop = STREX(ptr, value)
 119     //
 120     __asm__ __volatile__("    ldrex %0, [%3]\n"
 121                          "    add %0, %0, %4\n"
 122                          "    strex %1, %0, [%3]\n"
 123                          : "=&r"(value), "=&r"(reloop), "+m"(*ptr)
 124                          : "r"(ptr), "r"(increment)
 125                          : "cc", "memory");
 126   } while (reloop);
 127   return value;
 128 }
 129
 130 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
 131                                         Atomic32 increment) {
 132   // TODO(digit): Investigate if it's possible to implement this with
 133   // a single MemoryBarrier() operation between the LDREX and STREX.
 134   // See http://crbug.com/246514
 135   MemoryBarrier();
 136   Atomic32 result = NoBarrier_AtomicIncrement(ptr, increment);
 137   MemoryBarrier();
 138   return result;
 139 }
 140
 141 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
 142                                          Atomic32 new_value) {
 143   Atomic32 old_value;
 144   int reloop;
 145   do {
 146     // old_value = LDREX(ptr)
 147     // reloop = STREX(ptr, new_value)
 148     __asm__ __volatile__("   ldrex %0, [%3]\n"
 149                          "   strex %1, %4, [%3]\n"
 150                          : "=&r"(old_value), "=&r"(reloop), "+m"(*ptr)
 151                          : "r"(ptr), "r"(new_value)
 152                          : "cc", "memory");
 153   } while (reloop != 0);
 154   return old_value;
 155 }
 156
 157 // This tests against any known ARMv5 variant.
 158 #elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) || \
 159       defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__)
 160
 161 // The kernel also provides a helper function to perform an atomic
 162 // compare-and-swap operation at the hard-wired address 0xffff0fc0.
 163 // On ARMv5, this is implemented by a special code path that the kernel
 164 // detects and treats specially when thread pre-emption happens.
 165 // On ARMv6 and higher, it uses LDREX/STREX instructions instead.
 166 //
 167 // Note that this always perform a full memory barrier, there is no
 168 // need to add calls MemoryBarrier() before or after it. It also
 169 // returns 0 on success, and 1 on exit.
 170 //
 171 // Available and reliable since Linux 2.6.24. Both Android and ChromeOS
 172 // use newer kernel revisions, so this should not be a concern.
 173 namespace {
 174
 175 inline int LinuxKernelCmpxchg(Atomic32 old_value,
 176                               Atomic32 new_value,
 177                               volatile Atomic32* ptr) {
 178   typedef int (*KernelCmpxchgFunc)(Atomic32, Atomic32, volatile Atomic32*);
 179   return ((KernelCmpxchgFunc)0xffff0fc0)(old_value, new_value, ptr);
 180 }
 181
 182 }  // namespace
 183
 184 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
 185                                          Atomic32 old_value,
 186                                          Atomic32 new_value) {
 187   Atomic32 prev_value;
 188   for (;;) {
 189     prev_value = *ptr;
 190     if (prev_value != old_value)
 191       return prev_value;
 192     if (!LinuxKernelCmpxchg(old_value, new_value, ptr))
 193       return old_value;
 194   }
 195 }
 196
 197 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
 198                                          Atomic32 new_value) {
 199   Atomic32 old_value;
 200   do {
 201     old_value = *ptr;
 202   } while (LinuxKernelCmpxchg(old_value, new_value, ptr));
 203   return old_value;
 204 }
 205
 206 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
 207                                           Atomic32 increment) {
 208   return Barrier_AtomicIncrement(ptr, increment);
 209 }
 210
 211 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
 212                                         Atomic32 increment) {
 213   for (;;) {
 214     // Atomic exchange the old value with an incremented one.
 215     Atomic32 old_value = *ptr;
 216     Atomic32 new_value = old_value + increment;
 217     if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) {
 218       // The exchange took place as expected.
 219       return new_value;
 220     }
 221     // Otherwise, *ptr changed mid-loop and we need to retry.
 222   }
 223 }
 224
 225 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
 226                                        Atomic32 old_value,
 227                                        Atomic32 new_value) {
 228   Atomic32 prev_value;
 229   for (;;) {
 230     prev_value = *ptr;
 231     if (prev_value != old_value) {
 232       // Always ensure acquire semantics.
 233       MemoryBarrier();
 234       return prev_value;
 235     }
 236     if (!LinuxKernelCmpxchg(old_value, new_value, ptr))
 237       return old_value;
 238   }
 239 }
 240
 241 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
 242                                        Atomic32 old_value,
 243                                        Atomic32 new_value) {
 244   // This could be implemented as:
 245   //    MemoryBarrier();
 246   //    return NoBarrier_CompareAndSwap();
 247   //
 248   // But would use 3 barriers per succesful CAS. To save performance,
 249   // use Acquire_CompareAndSwap(). Its implementation guarantees that:
 250   // - A succesful swap uses only 2 barriers (in the kernel helper).
 251   // - An early return due to (prev_value != old_value) performs
 252   //   a memory barrier with no store, which is equivalent to the
 253   //   generic implementation above.
 254   return Acquire_CompareAndSwap(ptr, old_value, new_value);
 255 }
 256
 257 #else
 258 #  error "Your CPU's ARM architecture is not supported yet"
 259 #endif
 260
 261 // NOTE: Atomicity of the following load and store operations is only
 262 // guaranteed in case of 32-bit alignement of |ptr| values.
 263
 264 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
 265   *ptr = value;
 266 }
 267
 268 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
 269   *ptr = value;
 270   MemoryBarrier();
 271 }
 272
 273 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
 274   MemoryBarrier();
 275   *ptr = value;
 276 }
 277
 278 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { return *ptr; }
 279
 280 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
 281   Atomic32 value = *ptr;
 282   MemoryBarrier();
 283   return value;
 284 }
 285
 286 inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
 287   MemoryBarrier();
 288   return *ptr;
 289 }
 290
 291 // Byte accessors.
 292
 293 inline void NoBarrier_Store(volatile Atomic8* ptr, Atomic8 value) {
 294   *ptr = value;
 295 }
 296
 297 inline Atomic8 NoBarrier_Load(volatile const Atomic8* ptr) { return *ptr; }
 298
 299 } }  // namespace v8::base
 300
 301 #endif  // V8_BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_