[ARM] Fix codegen of unaligned volatile load/store of i64
authorMaurice Heumann <MauriceHeumann@gmail.com>
Mon, 26 Jun 2023 16:41:31 +0000 (09:41 -0700)
committerEli Friedman <efriedma@quicinc.com>
Mon, 26 Jun 2023 17:45:41 +0000 (10:45 -0700)
Volatile loads/stores of i64 are lowered to LDRD/STRD on ARMv5TE.
However, these instructions require the addresses to be aligned.
Unaligned loads/stores therefore should be ignored by this handling.

Differential Revision: https://reviews.llvm.org/D152790

llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/lib/Target/ARM/ARMSubtarget.h
llvm/test/CodeGen/ARM/i64_volatile_load_store.ll

index 265ad22..61743a3 100644 (file)
@@ -10083,7 +10083,8 @@ void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
   assert(LD->isUnindexed() && "Loads should be unindexed at this point.");
 
   if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
-      !Subtarget->isThumb1Only() && LD->isVolatile()) {
+      !Subtarget->isThumb1Only() && LD->isVolatile() &&
+      LD->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
     SDLoc dl(N);
     SDValue Result = DAG.getMemIntrinsicNode(
         ARMISD::LDRD, dl, DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
@@ -10140,7 +10141,8 @@ static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG,
   assert(ST->isUnindexed() && "Stores should be unindexed at this point.");
 
   if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
-      !Subtarget->isThumb1Only() && ST->isVolatile()) {
+      !Subtarget->isThumb1Only() && ST->isVolatile() &&
+      ST->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
     SDNode *N = Op.getNode();
     SDLoc dl(N);
 
index c6a845b..715b5be 100644 (file)
@@ -498,6 +498,11 @@ public:
   /// function for this subtarget.
   Align getStackAlignment() const { return stackAlignment; }
 
+  // Returns the required alignment for LDRD/STRD instructions
+  Align getDualLoadStoreAlignment() const {
+    return Align(hasV7Ops() || allowsUnalignedMem() ? 4 : 8);
+  }
+
   unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
 
   unsigned getPartialUpdateClearance() const { return PartialUpdateClearance; }
index 5903134..ca5fd2b 100644 (file)
-; RUN: llc -mtriple=armv5e-arm-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK-ARMV5TE,CHECK
-; RUN: llc -mtriple=thumbv6t2-arm-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK-T2,CHECK
-; RUN: llc -mtriple=armv4t-arm-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK-ARMV4T,CHECK
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=armv5e-arm-none-eabi -mattr=+strict-align %s -o - | FileCheck %s --check-prefixes=CHECK-ARMV5TE
+; RUN: llc -mtriple=thumbv6t2-arm-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK-T2
+; RUN: llc -mtriple=armv4t-arm-none-eabi -mattr=+strict-align %s -o - | FileCheck %s --check-prefixes=CHECK-ARMV4T
+; RUN: llc -mtriple=armv7-arm-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK-ARMV7
+; RUN: llc -mtriple=armv7-arm-none-eabi -mattr=+strict-align %s -o - | FileCheck %s --check-prefixes=CHECK-ARMV7-STRICT
+; RUN: llc -mtriple=armv6-arm-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK-ARMV6
+; RUN: llc -mtriple=armv6-arm-none-eabi -mattr=+strict-align %s -o - | FileCheck %s --check-prefixes=CHECK-ARMV6-STRICT
 
 @x = common dso_local global i64 0, align 8
 @y = common dso_local global i64 0, align 8
 
+@x_unaligned = common dso_local global i64 0, align 1
+@y_unaligned = common dso_local global i64 0, align 1
+
+@x_aligned_4 = common dso_local global i64 0, align 4
+@y_aligned_4 = common dso_local global i64 0, align 4
+
 define void @test() {
+; CHECK-ARMV5TE-LABEL: test:
+; CHECK-ARMV5TE:       @ %bb.0: @ %entry
+; CHECK-ARMV5TE-NEXT:    ldr r0, .LCPI0_0
+; CHECK-ARMV5TE-NEXT:    ldr r2, .LCPI0_1
+; CHECK-ARMV5TE-NEXT:    ldrd r0, r1, [r0]
+; CHECK-ARMV5TE-NEXT:    strd r0, r1, [r2]
+; CHECK-ARMV5TE-NEXT:    bx lr
+; CHECK-ARMV5TE-NEXT:    .p2align 2
+; CHECK-ARMV5TE-NEXT:  @ %bb.1:
+; CHECK-ARMV5TE-NEXT:  .LCPI0_0:
+; CHECK-ARMV5TE-NEXT:    .long x
+; CHECK-ARMV5TE-NEXT:  .LCPI0_1:
+; CHECK-ARMV5TE-NEXT:    .long y
+;
+; CHECK-T2-LABEL: test:
+; CHECK-T2:       @ %bb.0: @ %entry
+; CHECK-T2-NEXT:    movw r0, :lower16:x
+; CHECK-T2-NEXT:    movw r2, :lower16:y
+; CHECK-T2-NEXT:    movt r0, :upper16:x
+; CHECK-T2-NEXT:    movt r2, :upper16:y
+; CHECK-T2-NEXT:    ldrd r0, r1, [r0]
+; CHECK-T2-NEXT:    strd r0, r1, [r2]
+; CHECK-T2-NEXT:    bx lr
+;
+; CHECK-ARMV4T-LABEL: test:
+; CHECK-ARMV4T:       @ %bb.0: @ %entry
+; CHECK-ARMV4T-NEXT:    ldr r0, .LCPI0_0
+; CHECK-ARMV4T-NEXT:    ldr r2, .LCPI0_1
+; CHECK-ARMV4T-NEXT:    ldr r1, [r0]
+; CHECK-ARMV4T-NEXT:    ldr r0, [r0, #4]
+; CHECK-ARMV4T-NEXT:    str r0, [r2, #4]
+; CHECK-ARMV4T-NEXT:    str r1, [r2]
+; CHECK-ARMV4T-NEXT:    bx lr
+; CHECK-ARMV4T-NEXT:    .p2align 2
+; CHECK-ARMV4T-NEXT:  @ %bb.1:
+; CHECK-ARMV4T-NEXT:  .LCPI0_0:
+; CHECK-ARMV4T-NEXT:    .long x
+; CHECK-ARMV4T-NEXT:  .LCPI0_1:
+; CHECK-ARMV4T-NEXT:    .long y
+;
+; CHECK-ARMV7-LABEL: test:
+; CHECK-ARMV7:       @ %bb.0: @ %entry
+; CHECK-ARMV7-NEXT:    movw r0, :lower16:x
+; CHECK-ARMV7-NEXT:    movw r2, :lower16:y
+; CHECK-ARMV7-NEXT:    movt r0, :upper16:x
+; CHECK-ARMV7-NEXT:    movt r2, :upper16:y
+; CHECK-ARMV7-NEXT:    ldrd r0, r1, [r0]
+; CHECK-ARMV7-NEXT:    strd r0, r1, [r2]
+; CHECK-ARMV7-NEXT:    bx lr
+;
+; CHECK-ARMV7-STRICT-LABEL: test:
+; CHECK-ARMV7-STRICT:       @ %bb.0: @ %entry
+; CHECK-ARMV7-STRICT-NEXT:    movw r0, :lower16:x
+; CHECK-ARMV7-STRICT-NEXT:    movw r2, :lower16:y
+; CHECK-ARMV7-STRICT-NEXT:    movt r0, :upper16:x
+; CHECK-ARMV7-STRICT-NEXT:    movt r2, :upper16:y
+; CHECK-ARMV7-STRICT-NEXT:    ldrd r0, r1, [r0]
+; CHECK-ARMV7-STRICT-NEXT:    strd r0, r1, [r2]
+; CHECK-ARMV7-STRICT-NEXT:    bx lr
+;
+; CHECK-ARMV6-LABEL: test:
+; CHECK-ARMV6:       @ %bb.0: @ %entry
+; CHECK-ARMV6-NEXT:    ldr r0, .LCPI0_0
+; CHECK-ARMV6-NEXT:    ldr r2, .LCPI0_1
+; CHECK-ARMV6-NEXT:    ldrd r0, r1, [r0]
+; CHECK-ARMV6-NEXT:    strd r0, r1, [r2]
+; CHECK-ARMV6-NEXT:    bx lr
+; CHECK-ARMV6-NEXT:    .p2align 2
+; CHECK-ARMV6-NEXT:  @ %bb.1:
+; CHECK-ARMV6-NEXT:  .LCPI0_0:
+; CHECK-ARMV6-NEXT:    .long x
+; CHECK-ARMV6-NEXT:  .LCPI0_1:
+; CHECK-ARMV6-NEXT:    .long y
+;
+; CHECK-ARMV6-STRICT-LABEL: test:
+; CHECK-ARMV6-STRICT:       @ %bb.0: @ %entry
+; CHECK-ARMV6-STRICT-NEXT:    ldr r0, .LCPI0_0
+; CHECK-ARMV6-STRICT-NEXT:    ldr r2, .LCPI0_1
+; CHECK-ARMV6-STRICT-NEXT:    ldrd r0, r1, [r0]
+; CHECK-ARMV6-STRICT-NEXT:    strd r0, r1, [r2]
+; CHECK-ARMV6-STRICT-NEXT:    bx lr
+; CHECK-ARMV6-STRICT-NEXT:    .p2align 2
+; CHECK-ARMV6-STRICT-NEXT:  @ %bb.1:
+; CHECK-ARMV6-STRICT-NEXT:  .LCPI0_0:
+; CHECK-ARMV6-STRICT-NEXT:    .long x
+; CHECK-ARMV6-STRICT-NEXT:  .LCPI0_1:
+; CHECK-ARMV6-STRICT-NEXT:    .long y
 entry:
-; CHECK-LABEL: test:
-; CHECK-ARMV5TE:      ldr [[ADDR0:r[0-9]+]]
-; CHECK-ARMV5TE-NEXT: ldr [[ADDR1:r[0-9]+]]
-; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], [[[ADDR0]]]
-; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], [[[ADDR1]]]
-; CHECK-T2:           movw [[ADDR0:r[0-9]+]], :lower16:x
-; CHECK-T2-NEXT:      movw [[ADDR1:r[0-9]+]], :lower16:y
-; CHECK-T2-NEXT:      movt [[ADDR0]], :upper16:x
-; CHECK-T2-NEXT:      movt [[ADDR1]], :upper16:y
-; CHECK-T2-NEXT:      ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], [[[ADDR0]]]
-; CHECK-T2-NEXT:      strd [[R0]], [[R1]], [[[ADDR1]]]
-; CHECK-ARMV4T:       ldr [[ADDR0:r[0-9]+]]
-; CHECK-ARMV4T-NEXT:  ldr [[ADDR1:r[0-9]+]]
-; CHECK-ARMV4T-NEXT:  ldr [[R1:r[0-9]+]], [[[ADDR0]]]
-; CHECK-ARMV4T-NEXT:  ldr [[R0:r[0-9]+]], [[[ADDR0]], #4]
-; CHECK-ARMV4T-NEXT:  str [[R0]], [[[ADDR1]], #4]
-; CHECK-ARMV4T-NEXT:  str [[R1]], [[[ADDR1]]]
+
+
+
+
   %0 = load volatile i64, ptr @x, align 8
   store volatile i64 %0, ptr @y, align 8
   ret void
 }
 
+define void @test_unaligned() {
+; CHECK-ARMV5TE-LABEL: test_unaligned:
+; CHECK-ARMV5TE:       @ %bb.0: @ %entry
+; CHECK-ARMV5TE-NEXT:    .save {r4, r5, r6, lr}
+; CHECK-ARMV5TE-NEXT:    push {r4, r5, r6, lr}
+; CHECK-ARMV5TE-NEXT:    ldr r0, .LCPI1_0
+; CHECK-ARMV5TE-NEXT:    ldr r6, .LCPI1_1
+; CHECK-ARMV5TE-NEXT:    mov r1, r0
+; CHECK-ARMV5TE-NEXT:    ldrb lr, [r1, #4]!
+; CHECK-ARMV5TE-NEXT:    ldrb r3, [r1, #2]
+; CHECK-ARMV5TE-NEXT:    ldrb r12, [r1, #3]
+; CHECK-ARMV5TE-NEXT:    ldrb r1, [r0]
+; CHECK-ARMV5TE-NEXT:    ldrb r2, [r0, #1]
+; CHECK-ARMV5TE-NEXT:    ldrb r4, [r0, #2]
+; CHECK-ARMV5TE-NEXT:    ldrb r5, [r0, #3]
+; CHECK-ARMV5TE-NEXT:    ldrb r0, [r0, #5]
+; CHECK-ARMV5TE-NEXT:    strb r0, [r6, #5]
+; CHECK-ARMV5TE-NEXT:    strb r4, [r6, #2]
+; CHECK-ARMV5TE-NEXT:    strb r5, [r6, #3]
+; CHECK-ARMV5TE-NEXT:    strb r1, [r6]
+; CHECK-ARMV5TE-NEXT:    strb r2, [r6, #1]
+; CHECK-ARMV5TE-NEXT:    strb lr, [r6, #4]!
+; CHECK-ARMV5TE-NEXT:    strb r3, [r6, #2]
+; CHECK-ARMV5TE-NEXT:    strb r12, [r6, #3]
+; CHECK-ARMV5TE-NEXT:    pop {r4, r5, r6, pc}
+; CHECK-ARMV5TE-NEXT:    .p2align 2
+; CHECK-ARMV5TE-NEXT:  @ %bb.1:
+; CHECK-ARMV5TE-NEXT:  .LCPI1_0:
+; CHECK-ARMV5TE-NEXT:    .long x_unaligned
+; CHECK-ARMV5TE-NEXT:  .LCPI1_1:
+; CHECK-ARMV5TE-NEXT:    .long y_unaligned
+;
+; CHECK-T2-LABEL: test_unaligned:
+; CHECK-T2:       @ %bb.0: @ %entry
+; CHECK-T2-NEXT:    movw r0, :lower16:x_unaligned
+; CHECK-T2-NEXT:    movw r2, :lower16:y_unaligned
+; CHECK-T2-NEXT:    movt r0, :upper16:x_unaligned
+; CHECK-T2-NEXT:    movt r2, :upper16:y_unaligned
+; CHECK-T2-NEXT:    ldr r1, [r0]
+; CHECK-T2-NEXT:    ldr r0, [r0, #4]
+; CHECK-T2-NEXT:    str r0, [r2, #4]
+; CHECK-T2-NEXT:    str r1, [r2]
+; CHECK-T2-NEXT:    bx lr
+;
+; CHECK-ARMV4T-LABEL: test_unaligned:
+; CHECK-ARMV4T:       @ %bb.0: @ %entry
+; CHECK-ARMV4T-NEXT:    .save {r4, r5, r6, lr}
+; CHECK-ARMV4T-NEXT:    push {r4, r5, r6, lr}
+; CHECK-ARMV4T-NEXT:    ldr r0, .LCPI1_0
+; CHECK-ARMV4T-NEXT:    ldr r6, .LCPI1_1
+; CHECK-ARMV4T-NEXT:    mov r1, r0
+; CHECK-ARMV4T-NEXT:    ldrb lr, [r1, #4]!
+; CHECK-ARMV4T-NEXT:    ldrb r3, [r1, #2]
+; CHECK-ARMV4T-NEXT:    ldrb r12, [r1, #3]
+; CHECK-ARMV4T-NEXT:    ldrb r1, [r0]
+; CHECK-ARMV4T-NEXT:    ldrb r2, [r0, #1]
+; CHECK-ARMV4T-NEXT:    ldrb r4, [r0, #2]
+; CHECK-ARMV4T-NEXT:    ldrb r5, [r0, #3]
+; CHECK-ARMV4T-NEXT:    ldrb r0, [r0, #5]
+; CHECK-ARMV4T-NEXT:    strb r0, [r6, #5]
+; CHECK-ARMV4T-NEXT:    strb r4, [r6, #2]
+; CHECK-ARMV4T-NEXT:    strb r5, [r6, #3]
+; CHECK-ARMV4T-NEXT:    strb r1, [r6]
+; CHECK-ARMV4T-NEXT:    strb r2, [r6, #1]
+; CHECK-ARMV4T-NEXT:    strb lr, [r6, #4]!
+; CHECK-ARMV4T-NEXT:    strb r3, [r6, #2]
+; CHECK-ARMV4T-NEXT:    strb r12, [r6, #3]
+; CHECK-ARMV4T-NEXT:    pop {r4, r5, r6, lr}
+; CHECK-ARMV4T-NEXT:    bx lr
+; CHECK-ARMV4T-NEXT:    .p2align 2
+; CHECK-ARMV4T-NEXT:  @ %bb.1:
+; CHECK-ARMV4T-NEXT:  .LCPI1_0:
+; CHECK-ARMV4T-NEXT:    .long x_unaligned
+; CHECK-ARMV4T-NEXT:  .LCPI1_1:
+; CHECK-ARMV4T-NEXT:    .long y_unaligned
+;
+; CHECK-ARMV7-LABEL: test_unaligned:
+; CHECK-ARMV7:       @ %bb.0: @ %entry
+; CHECK-ARMV7-NEXT:    movw r0, :lower16:x_unaligned
+; CHECK-ARMV7-NEXT:    movw r2, :lower16:y_unaligned
+; CHECK-ARMV7-NEXT:    movt r0, :upper16:x_unaligned
+; CHECK-ARMV7-NEXT:    movt r2, :upper16:y_unaligned
+; CHECK-ARMV7-NEXT:    ldr r1, [r0]
+; CHECK-ARMV7-NEXT:    ldr r0, [r0, #4]
+; CHECK-ARMV7-NEXT:    str r0, [r2, #4]
+; CHECK-ARMV7-NEXT:    str r1, [r2]
+; CHECK-ARMV7-NEXT:    bx lr
+;
+; CHECK-ARMV7-STRICT-LABEL: test_unaligned:
+; CHECK-ARMV7-STRICT:       @ %bb.0: @ %entry
+; CHECK-ARMV7-STRICT-NEXT:    .save {r4, r5, r6, lr}
+; CHECK-ARMV7-STRICT-NEXT:    push {r4, r5, r6, lr}
+; CHECK-ARMV7-STRICT-NEXT:    movw r0, :lower16:x_unaligned
+; CHECK-ARMV7-STRICT-NEXT:    movw r6, :lower16:y_unaligned
+; CHECK-ARMV7-STRICT-NEXT:    movt r0, :upper16:x_unaligned
+; CHECK-ARMV7-STRICT-NEXT:    movt r6, :upper16:y_unaligned
+; CHECK-ARMV7-STRICT-NEXT:    mov r1, r0
+; CHECK-ARMV7-STRICT-NEXT:    ldrb r12, [r1, #4]!
+; CHECK-ARMV7-STRICT-NEXT:    ldrb r3, [r0]
+; CHECK-ARMV7-STRICT-NEXT:    ldrb lr, [r0, #1]
+; CHECK-ARMV7-STRICT-NEXT:    ldrb r2, [r0, #2]
+; CHECK-ARMV7-STRICT-NEXT:    ldrb r4, [r0, #3]
+; CHECK-ARMV7-STRICT-NEXT:    ldrb r0, [r0, #5]
+; CHECK-ARMV7-STRICT-NEXT:    ldrb r5, [r1, #2]
+; CHECK-ARMV7-STRICT-NEXT:    ldrb r1, [r1, #3]
+; CHECK-ARMV7-STRICT-NEXT:    strb r0, [r6, #5]
+; CHECK-ARMV7-STRICT-NEXT:    strb r2, [r6, #2]
+; CHECK-ARMV7-STRICT-NEXT:    strb r4, [r6, #3]
+; CHECK-ARMV7-STRICT-NEXT:    strb r3, [r6]
+; CHECK-ARMV7-STRICT-NEXT:    strb lr, [r6, #1]
+; CHECK-ARMV7-STRICT-NEXT:    strb r12, [r6, #4]!
+; CHECK-ARMV7-STRICT-NEXT:    strb r5, [r6, #2]
+; CHECK-ARMV7-STRICT-NEXT:    strb r1, [r6, #3]
+; CHECK-ARMV7-STRICT-NEXT:    pop {r4, r5, r6, pc}
+;
+; CHECK-ARMV6-LABEL: test_unaligned:
+; CHECK-ARMV6:       @ %bb.0: @ %entry
+; CHECK-ARMV6-NEXT:    ldr r0, .LCPI1_0
+; CHECK-ARMV6-NEXT:    ldr r2, .LCPI1_1
+; CHECK-ARMV6-NEXT:    ldr r1, [r0]
+; CHECK-ARMV6-NEXT:    ldr r0, [r0, #4]
+; CHECK-ARMV6-NEXT:    str r0, [r2, #4]
+; CHECK-ARMV6-NEXT:    str r1, [r2]
+; CHECK-ARMV6-NEXT:    bx lr
+; CHECK-ARMV6-NEXT:    .p2align 2
+; CHECK-ARMV6-NEXT:  @ %bb.1:
+; CHECK-ARMV6-NEXT:  .LCPI1_0:
+; CHECK-ARMV6-NEXT:    .long x_unaligned
+; CHECK-ARMV6-NEXT:  .LCPI1_1:
+; CHECK-ARMV6-NEXT:    .long y_unaligned
+;
+; CHECK-ARMV6-STRICT-LABEL: test_unaligned:
+; CHECK-ARMV6-STRICT:       @ %bb.0: @ %entry
+; CHECK-ARMV6-STRICT-NEXT:    .save {r4, r5, r6, lr}
+; CHECK-ARMV6-STRICT-NEXT:    push {r4, r5, r6, lr}
+; CHECK-ARMV6-STRICT-NEXT:    ldr r0, .LCPI1_0
+; CHECK-ARMV6-STRICT-NEXT:    ldr r6, .LCPI1_1
+; CHECK-ARMV6-STRICT-NEXT:    mov r1, r0
+; CHECK-ARMV6-STRICT-NEXT:    ldrb lr, [r1, #4]!
+; CHECK-ARMV6-STRICT-NEXT:    ldrb r3, [r1, #2]
+; CHECK-ARMV6-STRICT-NEXT:    ldrb r12, [r1, #3]
+; CHECK-ARMV6-STRICT-NEXT:    ldrb r1, [r0]
+; CHECK-ARMV6-STRICT-NEXT:    ldrb r2, [r0, #1]
+; CHECK-ARMV6-STRICT-NEXT:    ldrb r4, [r0, #2]
+; CHECK-ARMV6-STRICT-NEXT:    ldrb r5, [r0, #3]
+; CHECK-ARMV6-STRICT-NEXT:    ldrb r0, [r0, #5]
+; CHECK-ARMV6-STRICT-NEXT:    strb r0, [r6, #5]
+; CHECK-ARMV6-STRICT-NEXT:    strb r4, [r6, #2]
+; CHECK-ARMV6-STRICT-NEXT:    strb r5, [r6, #3]
+; CHECK-ARMV6-STRICT-NEXT:    strb r1, [r6]
+; CHECK-ARMV6-STRICT-NEXT:    strb r2, [r6, #1]
+; CHECK-ARMV6-STRICT-NEXT:    strb lr, [r6, #4]!
+; CHECK-ARMV6-STRICT-NEXT:    strb r3, [r6, #2]
+; CHECK-ARMV6-STRICT-NEXT:    strb r12, [r6, #3]
+; CHECK-ARMV6-STRICT-NEXT:    pop {r4, r5, r6, pc}
+; CHECK-ARMV6-STRICT-NEXT:    .p2align 2
+; CHECK-ARMV6-STRICT-NEXT:  @ %bb.1:
+; CHECK-ARMV6-STRICT-NEXT:  .LCPI1_0:
+; CHECK-ARMV6-STRICT-NEXT:    .long x_unaligned
+; CHECK-ARMV6-STRICT-NEXT:  .LCPI1_1:
+; CHECK-ARMV6-STRICT-NEXT:    .long y_unaligned
+entry:
+  %0 = load volatile i64, ptr @x_unaligned, align 1
+  store volatile i64 %0, ptr @y_unaligned, align 1
+  ret void
+}
+
+define void @test_align_4() {
+; CHECK-ARMV5TE-LABEL: test_align_4:
+; CHECK-ARMV5TE:       @ %bb.0: @ %entry
+; CHECK-ARMV5TE-NEXT:    ldr r0, .LCPI2_0
+; CHECK-ARMV5TE-NEXT:    ldr r2, .LCPI2_1
+; CHECK-ARMV5TE-NEXT:    ldr r1, [r0]
+; CHECK-ARMV5TE-NEXT:    ldr r0, [r0, #4]
+; CHECK-ARMV5TE-NEXT:    str r0, [r2, #4]
+; CHECK-ARMV5TE-NEXT:    str r1, [r2]
+; CHECK-ARMV5TE-NEXT:    bx lr
+; CHECK-ARMV5TE-NEXT:    .p2align 2
+; CHECK-ARMV5TE-NEXT:  @ %bb.1:
+; CHECK-ARMV5TE-NEXT:  .LCPI2_0:
+; CHECK-ARMV5TE-NEXT:    .long x_aligned_4
+; CHECK-ARMV5TE-NEXT:  .LCPI2_1:
+; CHECK-ARMV5TE-NEXT:    .long y_aligned_4
+;
+; CHECK-T2-LABEL: test_align_4:
+; CHECK-T2:       @ %bb.0: @ %entry
+; CHECK-T2-NEXT:    movw r0, :lower16:x_aligned_4
+; CHECK-T2-NEXT:    movw r2, :lower16:y_aligned_4
+; CHECK-T2-NEXT:    movt r0, :upper16:x_aligned_4
+; CHECK-T2-NEXT:    movt r2, :upper16:y_aligned_4
+; CHECK-T2-NEXT:    ldrd r0, r1, [r0]
+; CHECK-T2-NEXT:    strd r0, r1, [r2]
+; CHECK-T2-NEXT:    bx lr
+;
+; CHECK-ARMV4T-LABEL: test_align_4:
+; CHECK-ARMV4T:       @ %bb.0: @ %entry
+; CHECK-ARMV4T-NEXT:    ldr r0, .LCPI2_0
+; CHECK-ARMV4T-NEXT:    ldr r2, .LCPI2_1
+; CHECK-ARMV4T-NEXT:    ldr r1, [r0]
+; CHECK-ARMV4T-NEXT:    ldr r0, [r0, #4]
+; CHECK-ARMV4T-NEXT:    str r0, [r2, #4]
+; CHECK-ARMV4T-NEXT:    str r1, [r2]
+; CHECK-ARMV4T-NEXT:    bx lr
+; CHECK-ARMV4T-NEXT:    .p2align 2
+; CHECK-ARMV4T-NEXT:  @ %bb.1:
+; CHECK-ARMV4T-NEXT:  .LCPI2_0:
+; CHECK-ARMV4T-NEXT:    .long x_aligned_4
+; CHECK-ARMV4T-NEXT:  .LCPI2_1:
+; CHECK-ARMV4T-NEXT:    .long y_aligned_4
+;
+; CHECK-ARMV7-LABEL: test_align_4:
+; CHECK-ARMV7:       @ %bb.0: @ %entry
+; CHECK-ARMV7-NEXT:    movw r0, :lower16:x_aligned_4
+; CHECK-ARMV7-NEXT:    movw r2, :lower16:y_aligned_4
+; CHECK-ARMV7-NEXT:    movt r0, :upper16:x_aligned_4
+; CHECK-ARMV7-NEXT:    movt r2, :upper16:y_aligned_4
+; CHECK-ARMV7-NEXT:    ldrd r0, r1, [r0]
+; CHECK-ARMV7-NEXT:    strd r0, r1, [r2]
+; CHECK-ARMV7-NEXT:    bx lr
+;
+; CHECK-ARMV7-STRICT-LABEL: test_align_4:
+; CHECK-ARMV7-STRICT:       @ %bb.0: @ %entry
+; CHECK-ARMV7-STRICT-NEXT:    movw r0, :lower16:x_aligned_4
+; CHECK-ARMV7-STRICT-NEXT:    movw r2, :lower16:y_aligned_4
+; CHECK-ARMV7-STRICT-NEXT:    movt r0, :upper16:x_aligned_4
+; CHECK-ARMV7-STRICT-NEXT:    movt r2, :upper16:y_aligned_4
+; CHECK-ARMV7-STRICT-NEXT:    ldrd r0, r1, [r0]
+; CHECK-ARMV7-STRICT-NEXT:    strd r0, r1, [r2]
+; CHECK-ARMV7-STRICT-NEXT:    bx lr
+;
+; CHECK-ARMV6-LABEL: test_align_4:
+; CHECK-ARMV6:       @ %bb.0: @ %entry
+; CHECK-ARMV6-NEXT:    ldr r0, .LCPI2_0
+; CHECK-ARMV6-NEXT:    ldr r2, .LCPI2_1
+; CHECK-ARMV6-NEXT:    ldrd r0, r1, [r0]
+; CHECK-ARMV6-NEXT:    strd r0, r1, [r2]
+; CHECK-ARMV6-NEXT:    bx lr
+; CHECK-ARMV6-NEXT:    .p2align 2
+; CHECK-ARMV6-NEXT:  @ %bb.1:
+; CHECK-ARMV6-NEXT:  .LCPI2_0:
+; CHECK-ARMV6-NEXT:    .long x_aligned_4
+; CHECK-ARMV6-NEXT:  .LCPI2_1:
+; CHECK-ARMV6-NEXT:    .long y_aligned_4
+;
+; CHECK-ARMV6-STRICT-LABEL: test_align_4:
+; CHECK-ARMV6-STRICT:       @ %bb.0: @ %entry
+; CHECK-ARMV6-STRICT-NEXT:    ldr r0, .LCPI2_0
+; CHECK-ARMV6-STRICT-NEXT:    ldr r2, .LCPI2_1
+; CHECK-ARMV6-STRICT-NEXT:    ldr r1, [r0]
+; CHECK-ARMV6-STRICT-NEXT:    ldr r0, [r0, #4]
+; CHECK-ARMV6-STRICT-NEXT:    str r0, [r2, #4]
+; CHECK-ARMV6-STRICT-NEXT:    str r1, [r2]
+; CHECK-ARMV6-STRICT-NEXT:    bx lr
+; CHECK-ARMV6-STRICT-NEXT:    .p2align 2
+; CHECK-ARMV6-STRICT-NEXT:  @ %bb.1:
+; CHECK-ARMV6-STRICT-NEXT:  .LCPI2_0:
+; CHECK-ARMV6-STRICT-NEXT:    .long x_aligned_4
+; CHECK-ARMV6-STRICT-NEXT:  .LCPI2_1:
+; CHECK-ARMV6-STRICT-NEXT:    .long y_aligned_4
+entry:
+  %0 = load volatile i64, ptr @x_aligned_4, align 4
+  store volatile i64 %0, ptr @y_aligned_4, align 4
+  ret void
+}
+
 define void @test_offset() {
+; CHECK-ARMV5TE-LABEL: test_offset:
+; CHECK-ARMV5TE:       @ %bb.0: @ %entry
+; CHECK-ARMV5TE-NEXT:    ldr r0, .LCPI3_0
+; CHECK-ARMV5TE-NEXT:    ldr r2, .LCPI3_1
+; CHECK-ARMV5TE-NEXT:    ldrd r0, r1, [r0, #-4]
+; CHECK-ARMV5TE-NEXT:    strd r0, r1, [r2, #-4]
+; CHECK-ARMV5TE-NEXT:    bx lr
+; CHECK-ARMV5TE-NEXT:    .p2align 2
+; CHECK-ARMV5TE-NEXT:  @ %bb.1:
+; CHECK-ARMV5TE-NEXT:  .LCPI3_0:
+; CHECK-ARMV5TE-NEXT:    .long x
+; CHECK-ARMV5TE-NEXT:  .LCPI3_1:
+; CHECK-ARMV5TE-NEXT:    .long y
+;
+; CHECK-T2-LABEL: test_offset:
+; CHECK-T2:       @ %bb.0: @ %entry
+; CHECK-T2-NEXT:    movw r0, :lower16:x
+; CHECK-T2-NEXT:    movw r2, :lower16:y
+; CHECK-T2-NEXT:    movt r0, :upper16:x
+; CHECK-T2-NEXT:    movt r2, :upper16:y
+; CHECK-T2-NEXT:    ldrd r0, r1, [r0, #-4]
+; CHECK-T2-NEXT:    strd r0, r1, [r2, #-4]
+; CHECK-T2-NEXT:    bx lr
+;
+; CHECK-ARMV4T-LABEL: test_offset:
+; CHECK-ARMV4T:       @ %bb.0: @ %entry
+; CHECK-ARMV4T-NEXT:    ldr r0, .LCPI3_0
+; CHECK-ARMV4T-NEXT:    ldr r2, .LCPI3_1
+; CHECK-ARMV4T-NEXT:    ldr r1, [r0, #-4]
+; CHECK-ARMV4T-NEXT:    ldr r0, [r0]
+; CHECK-ARMV4T-NEXT:    str r0, [r2]
+; CHECK-ARMV4T-NEXT:    str r1, [r2, #-4]
+; CHECK-ARMV4T-NEXT:    bx lr
+; CHECK-ARMV4T-NEXT:    .p2align 2
+; CHECK-ARMV4T-NEXT:  @ %bb.1:
+; CHECK-ARMV4T-NEXT:  .LCPI3_0:
+; CHECK-ARMV4T-NEXT:    .long x
+; CHECK-ARMV4T-NEXT:  .LCPI3_1:
+; CHECK-ARMV4T-NEXT:    .long y
+;
+; CHECK-ARMV7-LABEL: test_offset:
+; CHECK-ARMV7:       @ %bb.0: @ %entry
+; CHECK-ARMV7-NEXT:    movw r0, :lower16:x
+; CHECK-ARMV7-NEXT:    movw r2, :lower16:y
+; CHECK-ARMV7-NEXT:    movt r0, :upper16:x
+; CHECK-ARMV7-NEXT:    movt r2, :upper16:y
+; CHECK-ARMV7-NEXT:    ldrd r0, r1, [r0, #-4]
+; CHECK-ARMV7-NEXT:    strd r0, r1, [r2, #-4]
+; CHECK-ARMV7-NEXT:    bx lr
+;
+; CHECK-ARMV7-STRICT-LABEL: test_offset:
+; CHECK-ARMV7-STRICT:       @ %bb.0: @ %entry
+; CHECK-ARMV7-STRICT-NEXT:    movw r0, :lower16:x
+; CHECK-ARMV7-STRICT-NEXT:    movw r2, :lower16:y
+; CHECK-ARMV7-STRICT-NEXT:    movt r0, :upper16:x
+; CHECK-ARMV7-STRICT-NEXT:    movt r2, :upper16:y
+; CHECK-ARMV7-STRICT-NEXT:    ldrd r0, r1, [r0, #-4]
+; CHECK-ARMV7-STRICT-NEXT:    strd r0, r1, [r2, #-4]
+; CHECK-ARMV7-STRICT-NEXT:    bx lr
+;
+; CHECK-ARMV6-LABEL: test_offset:
+; CHECK-ARMV6:       @ %bb.0: @ %entry
+; CHECK-ARMV6-NEXT:    ldr r0, .LCPI3_0
+; CHECK-ARMV6-NEXT:    ldr r2, .LCPI3_1
+; CHECK-ARMV6-NEXT:    ldrd r0, r1, [r0, #-4]
+; CHECK-ARMV6-NEXT:    strd r0, r1, [r2, #-4]
+; CHECK-ARMV6-NEXT:    bx lr
+; CHECK-ARMV6-NEXT:    .p2align 2
+; CHECK-ARMV6-NEXT:  @ %bb.1:
+; CHECK-ARMV6-NEXT:  .LCPI3_0:
+; CHECK-ARMV6-NEXT:    .long x
+; CHECK-ARMV6-NEXT:  .LCPI3_1:
+; CHECK-ARMV6-NEXT:    .long y
+;
+; CHECK-ARMV6-STRICT-LABEL: test_offset:
+; CHECK-ARMV6-STRICT:       @ %bb.0: @ %entry
+; CHECK-ARMV6-STRICT-NEXT:    ldr r0, .LCPI3_0
+; CHECK-ARMV6-STRICT-NEXT:    ldr r2, .LCPI3_1
+; CHECK-ARMV6-STRICT-NEXT:    ldrd r0, r1, [r0, #-4]
+; CHECK-ARMV6-STRICT-NEXT:    strd r0, r1, [r2, #-4]
+; CHECK-ARMV6-STRICT-NEXT:    bx lr
+; CHECK-ARMV6-STRICT-NEXT:    .p2align 2
+; CHECK-ARMV6-STRICT-NEXT:  @ %bb.1:
+; CHECK-ARMV6-STRICT-NEXT:  .LCPI3_0:
+; CHECK-ARMV6-STRICT-NEXT:    .long x
+; CHECK-ARMV6-STRICT-NEXT:  .LCPI3_1:
+; CHECK-ARMV6-STRICT-NEXT:    .long y
 entry:
-; CHECK-LABEL: test_offset:
-; CHECK-ARMV5TE:      ldr [[ADDR0:r[0-9]+]]
-; CHECK-ARMV5TE-NEXT: ldr [[ADDR1:r[0-9]+]]
-; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], [[[ADDR0]], #-4]
-; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], [[[ADDR1]], #-4]
-; CHECK-T2:           movw [[ADDR0:r[0-9]+]], :lower16:x
-; CHECK-T2-NEXT:      movw [[ADDR1:r[0-9]+]], :lower16:y
-; CHECK-T2-NEXT:      movt [[ADDR0]], :upper16:x
-; CHECK-T2-NEXT:      movt [[ADDR1]], :upper16:y
-; CHECK-T2-NEXT:      ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], [[[ADDR0]], #-4]
-; CHECK-T2-NEXT:      strd [[R0]], [[R1]], [[[ADDR1]], #-4]
-; CHECK-ARMV4T:       ldr [[ADDR0:r[0-9]+]]
-; CHECK-ARMV4T-NEXT:  ldr [[ADDR1:r[0-9]+]]
-; CHECK-ARMV4T-NEXT:  ldr [[R0:r[0-9]+]], [[[ADDR0]], #-4]
-; CHECK-ARMV4T-NEXT:  ldr [[R1:r[0-9]+]], [[[ADDR0]]]
-; CHECK-ARMV4T-NEXT:  str [[R1]], [[[ADDR1]]]
-; CHECK-ARMV4T-NEXT:  str [[R0]], [[[ADDR1]], #-4]
   %0 = load volatile i64, ptr getelementptr (i8, ptr @x, i32 -4), align 8
   store volatile i64 %0, ptr getelementptr (i8, ptr @y, i32 -4), align 8
   ret void
 }
 
 define void @test_offset_1() {
-; CHECK-LABEL: test_offset_1:
-; CHECK-ARMV5TE:      ldr [[ADDR0:r[0-9]+]]
-; CHECK-ARMV5TE-NEXT: ldr [[ADDR1:r[0-9]+]]
-; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], [[[ADDR0]], #255]
-; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], [[[ADDR1]], #255]
-; CHECK-T2:           adds [[ADDR0:r[0-9]+]], #255
-; CHECK-T2-NEXT:      adds [[ADDR1:r[0-9]+]], #255
-; CHECK-T2-NEXT:      ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], [[[ADDR0]]]
-; CHECK-T2-NEXT:      strd [[R0]], [[R1]], [[[ADDR1]]]
-; CHECK-ARMV4T:       ldr [[ADDR0:r[0-9]+]]
-; CHECK-ARMV4T-NEXT:  ldr [[ADDR1:r[0-9]+]]
-; CHECK-ARMV4T-NEXT:  ldr [[R0:r[0-9]+]], [[[ADDR0]], #255]
-; CHECK-ARMV4T-NEXT:  ldr [[R1:r[0-9]+]], [[[ADDR0]], #259]
-; CHECK-ARMV4T-NEXT:  str [[R1]], [[[ADDR1]], #259]
-; CHECK-ARMV4T-NEXT:  str [[R0]], [[[ADDR1]], #255]
+; CHECK-ARMV5TE-LABEL: test_offset_1:
+; CHECK-ARMV5TE:       @ %bb.0: @ %entry
+; CHECK-ARMV5TE-NEXT:    ldr r0, .LCPI4_0
+; CHECK-ARMV5TE-NEXT:    ldr r2, .LCPI4_1
+; CHECK-ARMV5TE-NEXT:    ldrd r0, r1, [r0, #255]
+; CHECK-ARMV5TE-NEXT:    strd r0, r1, [r2, #255]
+; CHECK-ARMV5TE-NEXT:    bx lr
+; CHECK-ARMV5TE-NEXT:    .p2align 2
+; CHECK-ARMV5TE-NEXT:  @ %bb.1:
+; CHECK-ARMV5TE-NEXT:  .LCPI4_0:
+; CHECK-ARMV5TE-NEXT:    .long x
+; CHECK-ARMV5TE-NEXT:  .LCPI4_1:
+; CHECK-ARMV5TE-NEXT:    .long y
+;
+; CHECK-T2-LABEL: test_offset_1:
+; CHECK-T2:       @ %bb.0: @ %entry
+; CHECK-T2-NEXT:    movw r0, :lower16:y
+; CHECK-T2-NEXT:    movw r1, :lower16:x
+; CHECK-T2-NEXT:    movt r0, :upper16:y
+; CHECK-T2-NEXT:    movt r1, :upper16:x
+; CHECK-T2-NEXT:    adds r1, #255
+; CHECK-T2-NEXT:    adds r0, #255
+; CHECK-T2-NEXT:    ldrd r1, r2, [r1]
+; CHECK-T2-NEXT:    strd r1, r2, [r0]
+; CHECK-T2-NEXT:    bx lr
+;
+; CHECK-ARMV4T-LABEL: test_offset_1:
+; CHECK-ARMV4T:       @ %bb.0: @ %entry
+; CHECK-ARMV4T-NEXT:    ldr r0, .LCPI4_0
+; CHECK-ARMV4T-NEXT:    ldr r2, .LCPI4_1
+; CHECK-ARMV4T-NEXT:    ldr r1, [r0, #255]
+; CHECK-ARMV4T-NEXT:    ldr r0, [r0, #259]
+; CHECK-ARMV4T-NEXT:    str r0, [r2, #259]
+; CHECK-ARMV4T-NEXT:    str r1, [r2, #255]
+; CHECK-ARMV4T-NEXT:    bx lr
+; CHECK-ARMV4T-NEXT:    .p2align 2
+; CHECK-ARMV4T-NEXT:  @ %bb.1:
+; CHECK-ARMV4T-NEXT:  .LCPI4_0:
+; CHECK-ARMV4T-NEXT:    .long x
+; CHECK-ARMV4T-NEXT:  .LCPI4_1:
+; CHECK-ARMV4T-NEXT:    .long y
+;
+; CHECK-ARMV7-LABEL: test_offset_1:
+; CHECK-ARMV7:       @ %bb.0: @ %entry
+; CHECK-ARMV7-NEXT:    movw r0, :lower16:x
+; CHECK-ARMV7-NEXT:    movw r2, :lower16:y
+; CHECK-ARMV7-NEXT:    movt r0, :upper16:x
+; CHECK-ARMV7-NEXT:    movt r2, :upper16:y
+; CHECK-ARMV7-NEXT:    ldrd r0, r1, [r0, #255]
+; CHECK-ARMV7-NEXT:    strd r0, r1, [r2, #255]
+; CHECK-ARMV7-NEXT:    bx lr
+;
+; CHECK-ARMV7-STRICT-LABEL: test_offset_1:
+; CHECK-ARMV7-STRICT:       @ %bb.0: @ %entry
+; CHECK-ARMV7-STRICT-NEXT:    movw r0, :lower16:x
+; CHECK-ARMV7-STRICT-NEXT:    movw r2, :lower16:y
+; CHECK-ARMV7-STRICT-NEXT:    movt r0, :upper16:x
+; CHECK-ARMV7-STRICT-NEXT:    movt r2, :upper16:y
+; CHECK-ARMV7-STRICT-NEXT:    ldrd r0, r1, [r0, #255]
+; CHECK-ARMV7-STRICT-NEXT:    strd r0, r1, [r2, #255]
+; CHECK-ARMV7-STRICT-NEXT:    bx lr
+;
+; CHECK-ARMV6-LABEL: test_offset_1:
+; CHECK-ARMV6:       @ %bb.0: @ %entry
+; CHECK-ARMV6-NEXT:    ldr r0, .LCPI4_0
+; CHECK-ARMV6-NEXT:    ldr r2, .LCPI4_1
+; CHECK-ARMV6-NEXT:    ldrd r0, r1, [r0, #255]
+; CHECK-ARMV6-NEXT:    strd r0, r1, [r2, #255]
+; CHECK-ARMV6-NEXT:    bx lr
+; CHECK-ARMV6-NEXT:    .p2align 2
+; CHECK-ARMV6-NEXT:  @ %bb.1:
+; CHECK-ARMV6-NEXT:  .LCPI4_0:
+; CHECK-ARMV6-NEXT:    .long x
+; CHECK-ARMV6-NEXT:  .LCPI4_1:
+; CHECK-ARMV6-NEXT:    .long y
+;
+; CHECK-ARMV6-STRICT-LABEL: test_offset_1:
+; CHECK-ARMV6-STRICT:       @ %bb.0: @ %entry
+; CHECK-ARMV6-STRICT-NEXT:    ldr r0, .LCPI4_0
+; CHECK-ARMV6-STRICT-NEXT:    ldr r2, .LCPI4_1
+; CHECK-ARMV6-STRICT-NEXT:    ldrd r0, r1, [r0, #255]
+; CHECK-ARMV6-STRICT-NEXT:    strd r0, r1, [r2, #255]
+; CHECK-ARMV6-STRICT-NEXT:    bx lr
+; CHECK-ARMV6-STRICT-NEXT:    .p2align 2
+; CHECK-ARMV6-STRICT-NEXT:  @ %bb.1:
+; CHECK-ARMV6-STRICT-NEXT:  .LCPI4_0:
+; CHECK-ARMV6-STRICT-NEXT:    .long x
+; CHECK-ARMV6-STRICT-NEXT:  .LCPI4_1:
+; CHECK-ARMV6-STRICT-NEXT:    .long y
+
 entry:
   %0 = load volatile i64, ptr getelementptr (i8, ptr @x, i32 255), align 8
   store volatile i64 %0, ptr getelementptr (i8, ptr @y, i32 255), align 8
@@ -76,25 +571,104 @@ entry:
 }
 
 define void @test_offset_2() {
-; CHECK-LABEL: test_offset_2:
-; CHECK-ARMV5TE:      ldr [[ADDR0:r[0-9]+]]
-; CHECK-ARMV5TE-NEXT: ldr [[ADDR1:r[0-9]+]]
-; CHECK-ARMV5TE-NEXT: add [[ADDR0]], [[ADDR0]], #256
-; CHECK-ARMV5TE-NEXT: add [[ADDR1]], [[ADDR1]], #256
-; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], [[[ADDR0]]]
-; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], [[[ADDR1]]]
-; CHECK-T2:           movw [[ADDR0:r[0-9]+]], :lower16:x
-; CHECK-T2-NEXT:      movw [[ADDR1:r[0-9]+]], :lower16:y
-; CHECK-T2-NEXT:      movt [[ADDR0]], :upper16:x
-; CHECK-T2-NEXT:      movt [[ADDR1]], :upper16:y
-; CHECK-T2-NEXT:      ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], [[[ADDR0]], #256]
-; CHECK-T2-NEXT:      strd [[R0]], [[R1]], [[[ADDR1]], #256]
-; CHECK-ARMV4T:       ldr [[ADDR0:r[0-9]+]]
-; CHECK-ARMV4T-NEXT:  ldr [[ADDR1:r[0-9]+]]
-; CHECK-ARMV4T-NEXT:  ldr [[R0:r[0-9]+]], [[[ADDR0]], #256]
-; CHECK-ARMV4T-NEXT:  ldr [[R1:r[0-9]+]], [[[ADDR0]], #260]
-; CHECK-ARMV4T-NEXT:  str [[R1]], [[[ADDR1]], #260]
-; CHECK-ARMV4T-NEXT:  str [[R0]], [[[ADDR1]], #256]
+; CHECK-ARMV5TE-LABEL: test_offset_2:
+; CHECK-ARMV5TE:       @ %bb.0: @ %entry
+; CHECK-ARMV5TE-NEXT:    ldr r0, .LCPI5_0
+; CHECK-ARMV5TE-NEXT:    ldr r2, .LCPI5_1
+; CHECK-ARMV5TE-NEXT:    add r0, r0, #256
+; CHECK-ARMV5TE-NEXT:    add r2, r2, #256
+; CHECK-ARMV5TE-NEXT:    ldrd r0, r1, [r0]
+; CHECK-ARMV5TE-NEXT:    strd r0, r1, [r2]
+; CHECK-ARMV5TE-NEXT:    bx lr
+; CHECK-ARMV5TE-NEXT:    .p2align 2
+; CHECK-ARMV5TE-NEXT:  @ %bb.1:
+; CHECK-ARMV5TE-NEXT:  .LCPI5_0:
+; CHECK-ARMV5TE-NEXT:    .long x
+; CHECK-ARMV5TE-NEXT:  .LCPI5_1:
+; CHECK-ARMV5TE-NEXT:    .long y
+;
+; CHECK-T2-LABEL: test_offset_2:
+; CHECK-T2:       @ %bb.0: @ %entry
+; CHECK-T2-NEXT:    movw r0, :lower16:x
+; CHECK-T2-NEXT:    movw r2, :lower16:y
+; CHECK-T2-NEXT:    movt r0, :upper16:x
+; CHECK-T2-NEXT:    movt r2, :upper16:y
+; CHECK-T2-NEXT:    ldrd r0, r1, [r0, #256]
+; CHECK-T2-NEXT:    strd r0, r1, [r2, #256]
+; CHECK-T2-NEXT:    bx lr
+;
+; CHECK-ARMV4T-LABEL: test_offset_2:
+; CHECK-ARMV4T:       @ %bb.0: @ %entry
+; CHECK-ARMV4T-NEXT:    ldr r0, .LCPI5_0
+; CHECK-ARMV4T-NEXT:    ldr r2, .LCPI5_1
+; CHECK-ARMV4T-NEXT:    ldr r1, [r0, #256]
+; CHECK-ARMV4T-NEXT:    ldr r0, [r0, #260]
+; CHECK-ARMV4T-NEXT:    str r0, [r2, #260]
+; CHECK-ARMV4T-NEXT:    str r1, [r2, #256]
+; CHECK-ARMV4T-NEXT:    bx lr
+; CHECK-ARMV4T-NEXT:    .p2align 2
+; CHECK-ARMV4T-NEXT:  @ %bb.1:
+; CHECK-ARMV4T-NEXT:  .LCPI5_0:
+; CHECK-ARMV4T-NEXT:    .long x
+; CHECK-ARMV4T-NEXT:  .LCPI5_1:
+; CHECK-ARMV4T-NEXT:    .long y
+;
+; CHECK-ARMV7-LABEL: test_offset_2:
+; CHECK-ARMV7:       @ %bb.0: @ %entry
+; CHECK-ARMV7-NEXT:    movw r0, :lower16:x
+; CHECK-ARMV7-NEXT:    movw r2, :lower16:y
+; CHECK-ARMV7-NEXT:    movt r0, :upper16:x
+; CHECK-ARMV7-NEXT:    movt r2, :upper16:y
+; CHECK-ARMV7-NEXT:    add r0, r0, #256
+; CHECK-ARMV7-NEXT:    add r2, r2, #256
+; CHECK-ARMV7-NEXT:    ldrd r0, r1, [r0]
+; CHECK-ARMV7-NEXT:    strd r0, r1, [r2]
+; CHECK-ARMV7-NEXT:    bx lr
+;
+; CHECK-ARMV7-STRICT-LABEL: test_offset_2:
+; CHECK-ARMV7-STRICT:       @ %bb.0: @ %entry
+; CHECK-ARMV7-STRICT-NEXT:    movw r0, :lower16:x
+; CHECK-ARMV7-STRICT-NEXT:    movw r2, :lower16:y
+; CHECK-ARMV7-STRICT-NEXT:    movt r0, :upper16:x
+; CHECK-ARMV7-STRICT-NEXT:    movt r2, :upper16:y
+; CHECK-ARMV7-STRICT-NEXT:    add r0, r0, #256
+; CHECK-ARMV7-STRICT-NEXT:    add r2, r2, #256
+; CHECK-ARMV7-STRICT-NEXT:    ldrd r0, r1, [r0]
+; CHECK-ARMV7-STRICT-NEXT:    strd r0, r1, [r2]
+; CHECK-ARMV7-STRICT-NEXT:    bx lr
+;
+; CHECK-ARMV6-LABEL: test_offset_2:
+; CHECK-ARMV6:       @ %bb.0: @ %entry
+; CHECK-ARMV6-NEXT:    ldr r0, .LCPI5_0
+; CHECK-ARMV6-NEXT:    ldr r2, .LCPI5_1
+; CHECK-ARMV6-NEXT:    add r0, r0, #256
+; CHECK-ARMV6-NEXT:    add r2, r2, #256
+; CHECK-ARMV6-NEXT:    ldrd r0, r1, [r0]
+; CHECK-ARMV6-NEXT:    strd r0, r1, [r2]
+; CHECK-ARMV6-NEXT:    bx lr
+; CHECK-ARMV6-NEXT:    .p2align 2
+; CHECK-ARMV6-NEXT:  @ %bb.1:
+; CHECK-ARMV6-NEXT:  .LCPI5_0:
+; CHECK-ARMV6-NEXT:    .long x
+; CHECK-ARMV6-NEXT:  .LCPI5_1:
+; CHECK-ARMV6-NEXT:    .long y
+;
+; CHECK-ARMV6-STRICT-LABEL: test_offset_2:
+; CHECK-ARMV6-STRICT:       @ %bb.0: @ %entry
+; CHECK-ARMV6-STRICT-NEXT:    ldr r0, .LCPI5_0
+; CHECK-ARMV6-STRICT-NEXT:    ldr r2, .LCPI5_1
+; CHECK-ARMV6-STRICT-NEXT:    add r0, r0, #256
+; CHECK-ARMV6-STRICT-NEXT:    add r2, r2, #256
+; CHECK-ARMV6-STRICT-NEXT:    ldrd r0, r1, [r0]
+; CHECK-ARMV6-STRICT-NEXT:    strd r0, r1, [r2]
+; CHECK-ARMV6-STRICT-NEXT:    bx lr
+; CHECK-ARMV6-STRICT-NEXT:    .p2align 2
+; CHECK-ARMV6-STRICT-NEXT:  @ %bb.1:
+; CHECK-ARMV6-STRICT-NEXT:  .LCPI5_0:
+; CHECK-ARMV6-STRICT-NEXT:    .long x
+; CHECK-ARMV6-STRICT-NEXT:  .LCPI5_1:
+; CHECK-ARMV6-STRICT-NEXT:    .long y
+
 entry:
   %0 = load volatile i64, ptr getelementptr (i8, ptr @x, i32 256), align 8
   store volatile i64 %0, ptr getelementptr (i8, ptr @y, i32 256), align 8
@@ -102,25 +676,104 @@ entry:
 }
 
 define void @test_offset_3() {
-; CHECK-LABEL: test_offset_3:
-; CHECK-ARMV5TE:      ldr [[ADDR0:r[0-9]+]]
-; CHECK-ARMV5TE-NEXT: ldr [[ADDR1:r[0-9]+]]
-; CHECK-ARMV5TE-NEXT: add [[ADDR0]], [[ADDR0]], #1020
-; CHECK-ARMV5TE-NEXT: add [[ADDR1]], [[ADDR1]], #1020
-; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], [[[ADDR0]]]
-; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], [[[ADDR1]]]
-; CHECK-T2:           movw [[ADDR0:r[0-9]+]], :lower16:x
-; CHECK-T2-NEXT:      movw [[ADDR1:r[0-9]+]], :lower16:y
-; CHECK-T2-NEXT:      movt [[ADDR0]], :upper16:x
-; CHECK-T2-NEXT:      movt [[ADDR1]], :upper16:y
-; CHECK-T2-NEXT:      ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], [[[ADDR0]], #1020]
-; CHECK-T2-NEXT:      strd [[R0]], [[R1]], [[[ADDR1]], #1020]
-; CHECK-ARMV4T:       ldr [[ADDR0:r[0-9]+]]
-; CHECK-ARMV4T-NEXT:  ldr [[ADDR1:r[0-9]+]]
-; CHECK-ARMV4T-NEXT:  ldr [[R0:r[0-9]+]], [[[ADDR0]], #1020]
-; CHECK-ARMV4T-NEXT:  ldr [[R1:r[0-9]+]], [[[ADDR0]], #1024]
-; CHECK-ARMV4T-NEXT:  str [[R1]], [[[ADDR1]], #1024]
-; CHECK-ARMV4T-NEXT:  str [[R0]], [[[ADDR1]], #1020]
+; CHECK-ARMV5TE-LABEL: test_offset_3:
+; CHECK-ARMV5TE:       @ %bb.0: @ %entry
+; CHECK-ARMV5TE-NEXT:    ldr r0, .LCPI6_0
+; CHECK-ARMV5TE-NEXT:    ldr r2, .LCPI6_1
+; CHECK-ARMV5TE-NEXT:    add r0, r0, #1020
+; CHECK-ARMV5TE-NEXT:    add r2, r2, #1020
+; CHECK-ARMV5TE-NEXT:    ldrd r0, r1, [r0]
+; CHECK-ARMV5TE-NEXT:    strd r0, r1, [r2]
+; CHECK-ARMV5TE-NEXT:    bx lr
+; CHECK-ARMV5TE-NEXT:    .p2align 2
+; CHECK-ARMV5TE-NEXT:  @ %bb.1:
+; CHECK-ARMV5TE-NEXT:  .LCPI6_0:
+; CHECK-ARMV5TE-NEXT:    .long x
+; CHECK-ARMV5TE-NEXT:  .LCPI6_1:
+; CHECK-ARMV5TE-NEXT:    .long y
+;
+; CHECK-T2-LABEL: test_offset_3:
+; CHECK-T2:       @ %bb.0: @ %entry
+; CHECK-T2-NEXT:    movw r0, :lower16:x
+; CHECK-T2-NEXT:    movw r2, :lower16:y
+; CHECK-T2-NEXT:    movt r0, :upper16:x
+; CHECK-T2-NEXT:    movt r2, :upper16:y
+; CHECK-T2-NEXT:    ldrd r0, r1, [r0, #1020]
+; CHECK-T2-NEXT:    strd r0, r1, [r2, #1020]
+; CHECK-T2-NEXT:    bx lr
+;
+; CHECK-ARMV4T-LABEL: test_offset_3:
+; CHECK-ARMV4T:       @ %bb.0: @ %entry
+; CHECK-ARMV4T-NEXT:    ldr r0, .LCPI6_0
+; CHECK-ARMV4T-NEXT:    ldr r2, .LCPI6_1
+; CHECK-ARMV4T-NEXT:    ldr r1, [r0, #1020]
+; CHECK-ARMV4T-NEXT:    ldr r0, [r0, #1024]
+; CHECK-ARMV4T-NEXT:    str r0, [r2, #1024]
+; CHECK-ARMV4T-NEXT:    str r1, [r2, #1020]
+; CHECK-ARMV4T-NEXT:    bx lr
+; CHECK-ARMV4T-NEXT:    .p2align 2
+; CHECK-ARMV4T-NEXT:  @ %bb.1:
+; CHECK-ARMV4T-NEXT:  .LCPI6_0:
+; CHECK-ARMV4T-NEXT:    .long x
+; CHECK-ARMV4T-NEXT:  .LCPI6_1:
+; CHECK-ARMV4T-NEXT:    .long y
+;
+; CHECK-ARMV7-LABEL: test_offset_3:
+; CHECK-ARMV7:       @ %bb.0: @ %entry
+; CHECK-ARMV7-NEXT:    movw r0, :lower16:x
+; CHECK-ARMV7-NEXT:    movw r2, :lower16:y
+; CHECK-ARMV7-NEXT:    movt r0, :upper16:x
+; CHECK-ARMV7-NEXT:    movt r2, :upper16:y
+; CHECK-ARMV7-NEXT:    add r0, r0, #1020
+; CHECK-ARMV7-NEXT:    add r2, r2, #1020
+; CHECK-ARMV7-NEXT:    ldrd r0, r1, [r0]
+; CHECK-ARMV7-NEXT:    strd r0, r1, [r2]
+; CHECK-ARMV7-NEXT:    bx lr
+;
+; CHECK-ARMV7-STRICT-LABEL: test_offset_3:
+; CHECK-ARMV7-STRICT:       @ %bb.0: @ %entry
+; CHECK-ARMV7-STRICT-NEXT:    movw r0, :lower16:x
+; CHECK-ARMV7-STRICT-NEXT:    movw r2, :lower16:y
+; CHECK-ARMV7-STRICT-NEXT:    movt r0, :upper16:x
+; CHECK-ARMV7-STRICT-NEXT:    movt r2, :upper16:y
+; CHECK-ARMV7-STRICT-NEXT:    add r0, r0, #1020
+; CHECK-ARMV7-STRICT-NEXT:    add r2, r2, #1020
+; CHECK-ARMV7-STRICT-NEXT:    ldrd r0, r1, [r0]
+; CHECK-ARMV7-STRICT-NEXT:    strd r0, r1, [r2]
+; CHECK-ARMV7-STRICT-NEXT:    bx lr
+;
+; CHECK-ARMV6-LABEL: test_offset_3:
+; CHECK-ARMV6:       @ %bb.0: @ %entry
+; CHECK-ARMV6-NEXT:    ldr r0, .LCPI6_0
+; CHECK-ARMV6-NEXT:    ldr r2, .LCPI6_1
+; CHECK-ARMV6-NEXT:    add r0, r0, #1020
+; CHECK-ARMV6-NEXT:    add r2, r2, #1020
+; CHECK-ARMV6-NEXT:    ldrd r0, r1, [r0]
+; CHECK-ARMV6-NEXT:    strd r0, r1, [r2]
+; CHECK-ARMV6-NEXT:    bx lr
+; CHECK-ARMV6-NEXT:    .p2align 2
+; CHECK-ARMV6-NEXT:  @ %bb.1:
+; CHECK-ARMV6-NEXT:  .LCPI6_0:
+; CHECK-ARMV6-NEXT:    .long x
+; CHECK-ARMV6-NEXT:  .LCPI6_1:
+; CHECK-ARMV6-NEXT:    .long y
+;
+; CHECK-ARMV6-STRICT-LABEL: test_offset_3:
+; CHECK-ARMV6-STRICT:       @ %bb.0: @ %entry
+; CHECK-ARMV6-STRICT-NEXT:    ldr r0, .LCPI6_0
+; CHECK-ARMV6-STRICT-NEXT:    ldr r2, .LCPI6_1
+; CHECK-ARMV6-STRICT-NEXT:    add r0, r0, #1020
+; CHECK-ARMV6-STRICT-NEXT:    add r2, r2, #1020
+; CHECK-ARMV6-STRICT-NEXT:    ldrd r0, r1, [r0]
+; CHECK-ARMV6-STRICT-NEXT:    strd r0, r1, [r2]
+; CHECK-ARMV6-STRICT-NEXT:    bx lr
+; CHECK-ARMV6-STRICT-NEXT:    .p2align 2
+; CHECK-ARMV6-STRICT-NEXT:  @ %bb.1:
+; CHECK-ARMV6-STRICT-NEXT:  .LCPI6_0:
+; CHECK-ARMV6-STRICT-NEXT:    .long x
+; CHECK-ARMV6-STRICT-NEXT:  .LCPI6_1:
+; CHECK-ARMV6-STRICT-NEXT:    .long y
+
 entry:
   %0 = load volatile i64, ptr getelementptr (i8, ptr @x, i32 1020), align 8
   store volatile i64 %0, ptr getelementptr (i8, ptr @y, i32 1020), align 8
@@ -128,27 +781,106 @@ entry:
 }
 
 define void @test_offset_4() {
-; CHECK-LABEL: test_offset_4:
-; CHECK-ARMV5TE:      ldr [[ADDR0:r[0-9]+]]
-; CHECK-ARMV5TE:      ldr [[ADDR1:r[0-9]+]]
-; CHECK-ARMV5TE-NEXT: add [[ADDR0]], [[ADDR0]], #1024
-; CHECK-ARMV5TE-NEXT: add [[ADDR1]], [[ADDR1]], #1024
-; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], [[[ADDR0]]]
-; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], [[[ADDR1]]]
-; CHECK-T2:           movw [[ADDR1:r[0-9]+]], :lower16:y
-; CHECK-T2-NEXT:      movw [[ADDR0:r[0-9]+]], :lower16:x
-; CHECK-T2-NEXT:      movt [[ADDR1]], :upper16:y
-; CHECK-T2-NEXT:      movt [[ADDR0]], :upper16:x
-; CHECK-T2-NEXT:      add.w [[ADDR0]], [[ADDR0]], #1024
-; CHECK-T2-NEXT:      add.w [[ADDR1]], [[ADDR1]], #1024
-; CHECK-T2-NEXT:      ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], [[[ADDR0]]]
-; CHECK-T2-NEXT:      strd [[R0]], [[R1]], [[[ADDR1]]]
-; CHECK-ARMV4T:       ldr [[ADDR0:r[0-9]+]]
-; CHECK-ARMV4T-NEXT:  ldr [[ADDR1:r[0-9]+]]
-; CHECK-ARMV4T-NEXT:  ldr [[R0:r[0-9]+]], [[[ADDR0]], #1024]
-; CHECK-ARMV4T-NEXT:  ldr [[R1:r[0-9]+]], [[[ADDR0]], #1028]
-; CHECK-ARMV4T-NEXT:  str [[R1]], [[[ADDR1]], #1028]
-; CHECK-ARMV4T-NEXT:  str [[R0]], [[[ADDR1]], #1024]
+; CHECK-ARMV5TE-LABEL: test_offset_4:
+; CHECK-ARMV5TE:       @ %bb.0: @ %entry
+; CHECK-ARMV5TE-NEXT:    ldr r0, .LCPI7_0
+; CHECK-ARMV5TE-NEXT:    ldr r2, .LCPI7_1
+; CHECK-ARMV5TE-NEXT:    add r0, r0, #1024
+; CHECK-ARMV5TE-NEXT:    add r2, r2, #1024
+; CHECK-ARMV5TE-NEXT:    ldrd r0, r1, [r0]
+; CHECK-ARMV5TE-NEXT:    strd r0, r1, [r2]
+; CHECK-ARMV5TE-NEXT:    bx lr
+; CHECK-ARMV5TE-NEXT:    .p2align 2
+; CHECK-ARMV5TE-NEXT:  @ %bb.1:
+; CHECK-ARMV5TE-NEXT:  .LCPI7_0:
+; CHECK-ARMV5TE-NEXT:    .long x
+; CHECK-ARMV5TE-NEXT:  .LCPI7_1:
+; CHECK-ARMV5TE-NEXT:    .long y
+;
+; CHECK-T2-LABEL: test_offset_4:
+; CHECK-T2:       @ %bb.0: @ %entry
+; CHECK-T2-NEXT:    movw r0, :lower16:y
+; CHECK-T2-NEXT:    movw r1, :lower16:x
+; CHECK-T2-NEXT:    movt r0, :upper16:y
+; CHECK-T2-NEXT:    movt r1, :upper16:x
+; CHECK-T2-NEXT:    add.w r1, r1, #1024
+; CHECK-T2-NEXT:    add.w r0, r0, #1024
+; CHECK-T2-NEXT:    ldrd r1, r2, [r1]
+; CHECK-T2-NEXT:    strd r1, r2, [r0]
+; CHECK-T2-NEXT:    bx lr
+;
+; CHECK-ARMV4T-LABEL: test_offset_4:
+; CHECK-ARMV4T:       @ %bb.0: @ %entry
+; CHECK-ARMV4T-NEXT:    ldr r0, .LCPI7_0
+; CHECK-ARMV4T-NEXT:    ldr r2, .LCPI7_1
+; CHECK-ARMV4T-NEXT:    ldr r1, [r0, #1024]
+; CHECK-ARMV4T-NEXT:    ldr r0, [r0, #1028]
+; CHECK-ARMV4T-NEXT:    str r0, [r2, #1028]
+; CHECK-ARMV4T-NEXT:    str r1, [r2, #1024]
+; CHECK-ARMV4T-NEXT:    bx lr
+; CHECK-ARMV4T-NEXT:    .p2align 2
+; CHECK-ARMV4T-NEXT:  @ %bb.1:
+; CHECK-ARMV4T-NEXT:  .LCPI7_0:
+; CHECK-ARMV4T-NEXT:    .long x
+; CHECK-ARMV4T-NEXT:  .LCPI7_1:
+; CHECK-ARMV4T-NEXT:    .long y
+;
+; CHECK-ARMV7-LABEL: test_offset_4:
+; CHECK-ARMV7:       @ %bb.0: @ %entry
+; CHECK-ARMV7-NEXT:    movw r0, :lower16:x
+; CHECK-ARMV7-NEXT:    movw r2, :lower16:y
+; CHECK-ARMV7-NEXT:    movt r0, :upper16:x
+; CHECK-ARMV7-NEXT:    movt r2, :upper16:y
+; CHECK-ARMV7-NEXT:    add r0, r0, #1024
+; CHECK-ARMV7-NEXT:    add r2, r2, #1024
+; CHECK-ARMV7-NEXT:    ldrd r0, r1, [r0]
+; CHECK-ARMV7-NEXT:    strd r0, r1, [r2]
+; CHECK-ARMV7-NEXT:    bx lr
+;
+; CHECK-ARMV7-STRICT-LABEL: test_offset_4:
+; CHECK-ARMV7-STRICT:       @ %bb.0: @ %entry
+; CHECK-ARMV7-STRICT-NEXT:    movw r0, :lower16:x
+; CHECK-ARMV7-STRICT-NEXT:    movw r2, :lower16:y
+; CHECK-ARMV7-STRICT-NEXT:    movt r0, :upper16:x
+; CHECK-ARMV7-STRICT-NEXT:    movt r2, :upper16:y
+; CHECK-ARMV7-STRICT-NEXT:    add r0, r0, #1024
+; CHECK-ARMV7-STRICT-NEXT:    add r2, r2, #1024
+; CHECK-ARMV7-STRICT-NEXT:    ldrd r0, r1, [r0]
+; CHECK-ARMV7-STRICT-NEXT:    strd r0, r1, [r2]
+; CHECK-ARMV7-STRICT-NEXT:    bx lr
+;
+; CHECK-ARMV6-LABEL: test_offset_4:
+; CHECK-ARMV6:       @ %bb.0: @ %entry
+; CHECK-ARMV6-NEXT:    ldr r0, .LCPI7_0
+; CHECK-ARMV6-NEXT:    ldr r2, .LCPI7_1
+; CHECK-ARMV6-NEXT:    add r0, r0, #1024
+; CHECK-ARMV6-NEXT:    add r2, r2, #1024
+; CHECK-ARMV6-NEXT:    ldrd r0, r1, [r0]
+; CHECK-ARMV6-NEXT:    strd r0, r1, [r2]
+; CHECK-ARMV6-NEXT:    bx lr
+; CHECK-ARMV6-NEXT:    .p2align 2
+; CHECK-ARMV6-NEXT:  @ %bb.1:
+; CHECK-ARMV6-NEXT:  .LCPI7_0:
+; CHECK-ARMV6-NEXT:    .long x
+; CHECK-ARMV6-NEXT:  .LCPI7_1:
+; CHECK-ARMV6-NEXT:    .long y
+;
+; CHECK-ARMV6-STRICT-LABEL: test_offset_4:
+; CHECK-ARMV6-STRICT:       @ %bb.0: @ %entry
+; CHECK-ARMV6-STRICT-NEXT:    ldr r0, .LCPI7_0
+; CHECK-ARMV6-STRICT-NEXT:    ldr r2, .LCPI7_1
+; CHECK-ARMV6-STRICT-NEXT:    add r0, r0, #1024
+; CHECK-ARMV6-STRICT-NEXT:    add r2, r2, #1024
+; CHECK-ARMV6-STRICT-NEXT:    ldrd r0, r1, [r0]
+; CHECK-ARMV6-STRICT-NEXT:    strd r0, r1, [r2]
+; CHECK-ARMV6-STRICT-NEXT:    bx lr
+; CHECK-ARMV6-STRICT-NEXT:    .p2align 2
+; CHECK-ARMV6-STRICT-NEXT:  @ %bb.1:
+; CHECK-ARMV6-STRICT-NEXT:  .LCPI7_0:
+; CHECK-ARMV6-STRICT-NEXT:    .long x
+; CHECK-ARMV6-STRICT-NEXT:  .LCPI7_1:
+; CHECK-ARMV6-STRICT-NEXT:    .long y
+
 entry:
   %0 = load volatile i64, ptr getelementptr (i8, ptr @x, i32 1024), align 8
   store volatile i64 %0, ptr getelementptr (i8, ptr @y, i32 1024), align 8
@@ -156,30 +888,85 @@ entry:
 }
 
 define i64 @test_stack() {
-; CHECK-LABEL: test_stack:
-; CHECK-ARMV5TE:      sub sp, sp, #80
-; CHECK-ARMV5TE-NEXT: mov [[R0:r[0-9]+]], #0
-; CHECK-ARMV5TE-NEXT: mov [[R1:r[0-9]+]], #1
-; CHECK-ARMV5TE-NEXT: strd [[R1]], [[R0]], [sp, #8]
-; CHECK-ARMV5TE-NEXT: ldrd r0, r1, [sp, #8]
-; CHECK-ARMV5TE-NEXT: add sp, sp, #80
-; CHECK-ARMV5TE-NEXT: bx lr
-; CHECK-T2:      sub sp, #80
-; CHECK-T2-NEXT: movs [[R0:r[0-9]+]], #0
-; CHECK-T2-NEXT: movs [[R1:r[0-9]+]], #1
-; CHECK-T2-NEXT: strd [[R1]], [[R0]], [sp, #8]
-; CHECK-T2-NEXT: ldrd r0, r1, [sp, #8]
-; CHECK-T2-NEXT: add sp, #80
-; CHECK-T2-NEXT: bx lr
-; CHECK-ARMV4T:      sub sp, sp, #80
-; CHECK-ARMV4T-NEXT: mov [[R0:r[0-9]+]], #0
-; CHECK-ARMV4T-NEXT: str [[R0]], [sp, #12]
-; CHECK-ARMV4T-NEXT: mov [[R1:r[0-9]+]], #1
-; CHECK-ARMV4T-NEXT: str [[R1]], [sp, #8]
-; CHECK-ARMV4T-NEXT: ldr r0, [sp, #8]
-; CHECK-ARMV4T-NEXT: ldr r1, [sp, #12]
-; CHECK-ARMV4T-NEXT: add sp, sp, #80
-; CHECK-ARMV4T-NEXT: bx lr
+; CHECK-ARMV5TE-LABEL: test_stack:
+; CHECK-ARMV5TE:       @ %bb.0: @ %entry
+; CHECK-ARMV5TE-NEXT:    .pad #80
+; CHECK-ARMV5TE-NEXT:    sub sp, sp, #80
+; CHECK-ARMV5TE-NEXT:    mov r1, #0
+; CHECK-ARMV5TE-NEXT:    mov r0, #1
+; CHECK-ARMV5TE-NEXT:    strd r0, r1, [sp, #8]
+; CHECK-ARMV5TE-NEXT:    ldrd r0, r1, [sp, #8]
+; CHECK-ARMV5TE-NEXT:    add sp, sp, #80
+; CHECK-ARMV5TE-NEXT:    bx lr
+;
+; CHECK-T2-LABEL: test_stack:
+; CHECK-T2:       @ %bb.0: @ %entry
+; CHECK-T2-NEXT:    .pad #80
+; CHECK-T2-NEXT:    sub sp, #80
+; CHECK-T2-NEXT:    movs r0, #0
+; CHECK-T2-NEXT:    movs r1, #1
+; CHECK-T2-NEXT:    strd r1, r0, [sp, #8]
+; CHECK-T2-NEXT:    ldrd r0, r1, [sp, #8]
+; CHECK-T2-NEXT:    add sp, #80
+; CHECK-T2-NEXT:    bx lr
+;
+; CHECK-ARMV4T-LABEL: test_stack:
+; CHECK-ARMV4T:       @ %bb.0: @ %entry
+; CHECK-ARMV4T-NEXT:    .pad #80
+; CHECK-ARMV4T-NEXT:    sub sp, sp, #80
+; CHECK-ARMV4T-NEXT:    mov r0, #0
+; CHECK-ARMV4T-NEXT:    str r0, [sp, #12]
+; CHECK-ARMV4T-NEXT:    mov r0, #1
+; CHECK-ARMV4T-NEXT:    str r0, [sp, #8]
+; CHECK-ARMV4T-NEXT:    ldr r0, [sp, #8]
+; CHECK-ARMV4T-NEXT:    ldr r1, [sp, #12]
+; CHECK-ARMV4T-NEXT:    add sp, sp, #80
+; CHECK-ARMV4T-NEXT:    bx lr
+;
+; CHECK-ARMV7-LABEL: test_stack:
+; CHECK-ARMV7:       @ %bb.0: @ %entry
+; CHECK-ARMV7-NEXT:    .pad #80
+; CHECK-ARMV7-NEXT:    sub sp, sp, #80
+; CHECK-ARMV7-NEXT:    mov r1, #0
+; CHECK-ARMV7-NEXT:    mov r0, #1
+; CHECK-ARMV7-NEXT:    strd r0, r1, [sp, #8]
+; CHECK-ARMV7-NEXT:    ldrd r0, r1, [sp, #8]
+; CHECK-ARMV7-NEXT:    add sp, sp, #80
+; CHECK-ARMV7-NEXT:    bx lr
+;
+; CHECK-ARMV7-STRICT-LABEL: test_stack:
+; CHECK-ARMV7-STRICT:       @ %bb.0: @ %entry
+; CHECK-ARMV7-STRICT-NEXT:    .pad #80
+; CHECK-ARMV7-STRICT-NEXT:    sub sp, sp, #80
+; CHECK-ARMV7-STRICT-NEXT:    mov r1, #0
+; CHECK-ARMV7-STRICT-NEXT:    mov r0, #1
+; CHECK-ARMV7-STRICT-NEXT:    strd r0, r1, [sp, #8]
+; CHECK-ARMV7-STRICT-NEXT:    ldrd r0, r1, [sp, #8]
+; CHECK-ARMV7-STRICT-NEXT:    add sp, sp, #80
+; CHECK-ARMV7-STRICT-NEXT:    bx lr
+;
+; CHECK-ARMV6-LABEL: test_stack:
+; CHECK-ARMV6:       @ %bb.0: @ %entry
+; CHECK-ARMV6-NEXT:    .pad #80
+; CHECK-ARMV6-NEXT:    sub sp, sp, #80
+; CHECK-ARMV6-NEXT:    mov r1, #0
+; CHECK-ARMV6-NEXT:    mov r0, #1
+; CHECK-ARMV6-NEXT:    strd r0, r1, [sp, #8]
+; CHECK-ARMV6-NEXT:    ldrd r0, r1, [sp, #8]
+; CHECK-ARMV6-NEXT:    add sp, sp, #80
+; CHECK-ARMV6-NEXT:    bx lr
+;
+; CHECK-ARMV6-STRICT-LABEL: test_stack:
+; CHECK-ARMV6-STRICT:       @ %bb.0: @ %entry
+; CHECK-ARMV6-STRICT-NEXT:    .pad #80
+; CHECK-ARMV6-STRICT-NEXT:    sub sp, sp, #80
+; CHECK-ARMV6-STRICT-NEXT:    mov r1, #0
+; CHECK-ARMV6-STRICT-NEXT:    mov r0, #1
+; CHECK-ARMV6-STRICT-NEXT:    strd r0, r1, [sp, #8]
+; CHECK-ARMV6-STRICT-NEXT:    ldrd r0, r1, [sp, #8]
+; CHECK-ARMV6-STRICT-NEXT:    add sp, sp, #80
+; CHECK-ARMV6-STRICT-NEXT:    bx lr
+
 entry:
   %a = alloca [10 x i64], align 8
   %arrayidx = getelementptr inbounds [10 x i64], ptr %a, i32 0, i32 1