From ab40e44ba1b5ae97f53e5a885dbc93a4bed78302 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 13 Apr 2018 20:21:00 +0000 Subject: [PATCH] Revert r329956, "AArch64: Introduce a DAG combine for folding offsets into addresses." Caused a hang and eventually an assertion failure in LTO builds of 7zip-benchmark on aarch64 iOS targets. http://green.lab.llvm.org/green/job/lnt-ctmark-aarch64-O3-flto/2024/ llvm-svn: 330063 --- llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 18 ++- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 65 ++--------- llvm/test/CodeGen/AArch64/arm64-addrmode.ll | 31 ++--- llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll | 129 +++++++-------------- llvm/test/CodeGen/AArch64/fold-global-offsets.ll | 59 ---------- llvm/test/CodeGen/AArch64/global-merge-3.ll | 4 +- .../global-merge-ignore-single-use-minsize.ll | 11 +- .../AArch64/global-merge-ignore-single-use.ll | 6 +- 8 files changed, 83 insertions(+), 240 deletions(-) delete mode 100644 llvm/test/CodeGen/AArch64/fold-global-offsets.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index d44eee0..eee59f1 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -743,16 +743,14 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, if (!GAN) return true; - if (GAN->getOffset() % Size == 0) { - const GlobalValue *GV = GAN->getGlobal(); - unsigned Alignment = GV->getAlignment(); - Type *Ty = GV->getValueType(); - if (Alignment == 0 && Ty->isSized()) - Alignment = DL.getABITypeAlignment(Ty); - - if (Alignment >= Size) - return true; - } + const GlobalValue *GV = GAN->getGlobal(); + unsigned Alignment = GV->getAlignment(); + Type *Ty = GV->getValueType(); + if (Alignment == 0 && Ty->isSized()) + Alignment = DL.getABITypeAlignment(Ty); + + if (Alignment >= Size) + return true; } if (CurDAG->isBaseWithConstantOffset(N)) { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 9687894..19573e1 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -577,8 +577,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); - setTargetDAGCombine(ISD::GlobalAddress); - MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8; MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4; MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 4; @@ -3679,8 +3677,7 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG, unsigned Flag) const { - return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, - N->getOffset(), Flag); + return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag); } SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty, @@ -3755,9 +3752,8 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op, unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine()); - if (OpFlags != AArch64II::MO_NO_FLAG) - assert(cast(Op)->getOffset() == 0 && - "unexpected offset in global node"); + assert(cast(Op)->getOffset() == 0 && + "unexpected offset in global node"); // This also catches the large code model case for Darwin. if ((OpFlags & AArch64II::MO_GOT) != 0) { @@ -4995,8 +4991,10 @@ SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op, bool AArch64TargetLowering::isOffsetFoldingLegal( const GlobalAddressSDNode *GA) const { - // Offsets are folded in the DAG combine rather than here so that we can - // intelligently choose an offset based on the uses. + DEBUG(dbgs() << "Skipping offset folding global address: "); + DEBUG(GA->dump()); + DEBUG(dbgs() << "AArch64 doesn't support folding offsets into global " + "addresses\n"); return false; } @@ -10619,53 +10617,6 @@ static SDValue performNVCASTCombine(SDNode *N) { return SDValue(); } -// If all users of the globaladdr are of the form (globaladdr + constant), find -// the smallest constant, fold it into the globaladdr's offset and rewrite the -// globaladdr as (globaladdr + constant) - constant. -static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG, - const AArch64Subtarget *Subtarget, - const TargetMachine &TM) { - auto *GN = dyn_cast(N); - if (!GN || Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) != - AArch64II::MO_NO_FLAG) - return SDValue(); - - uint64_t MinOffset = -1ull; - for (SDNode *N : GN->uses()) { - if (N->getOpcode() != ISD::ADD) - return SDValue(); - auto *C = dyn_cast(N->getOperand(0)); - if (!C) - C = dyn_cast(N->getOperand(1)); - if (!C) - return SDValue(); - MinOffset = std::min(MinOffset, C->getZExtValue()); - } - uint64_t Offset = MinOffset + GN->getOffset(); - - // Check whether folding this offset is legal. It must not go out of bounds of - // the referenced object to avoid violating the code model, and must be - // smaller than 2^21 because this is the largest offset expressible in all - // object formats. - // - // This check also prevents us from folding negative offsets, which will end - // up being treated in the same way as large positive ones. They could also - // cause code model violations, and aren't really common enough to matter. - if (Offset >= (1 << 21)) - return SDValue(); - - const GlobalValue *GV = GN->getGlobal(); - Type *T = GV->getValueType(); - if (!T->isSized() || - Offset > GV->getParent()->getDataLayout().getTypeAllocSize(T)) - return SDValue(); - - SDLoc DL(GN); - SDValue Result = DAG.getGlobalAddress(GV, DL, MVT::i64, Offset); - return DAG.getNode(ISD::SUB, DL, MVT::i64, Result, - DAG.getConstant(MinOffset, DL, MVT::i64)); -} - SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -10753,8 +10704,6 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, default: break; } - case ISD::GlobalAddress: - return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine()); } return SDValue(); } diff --git a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll index 16f8d01..6da7679 100644 --- a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll +++ b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll @@ -5,31 +5,32 @@ ; base + offset (imm9) ; CHECK: @t1 -; CHECK: ldr xzr, [x0, #8] +; CHECK: ldr xzr, [x{{[0-9]+}}, #8] ; CHECK: ret -define void @t1(i64* %object) { - %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 1 +define void @t1() { + %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 1 %tmp = load volatile i64, i64* %incdec.ptr, align 8 ret void } ; base + offset (> imm9) ; CHECK: @t2 -; CHECK: sub [[ADDREG:x[0-9]+]], x0, #264 +; CHECK: sub [[ADDREG:x[0-9]+]], x{{[0-9]+}}, #264 ; CHECK: ldr xzr, [ +; CHECK: [[ADDREG]]] ; CHECK: ret -define void @t2(i64* %object) { - %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 -33 +define void @t2() { + %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 -33 %tmp = load volatile i64, i64* %incdec.ptr, align 8 ret void } ; base + unsigned offset (> imm9 and <= imm12 * size of type in bytes) ; CHECK: @t3 -; CHECK: ldr xzr, [x0, #32760] +; CHECK: ldr xzr, [x{{[0-9]+}}, #32760] ; CHECK: ret -define void @t3(i64* %object) { - %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 4095 +define void @t3() { + %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 4095 %tmp = load volatile i64, i64* %incdec.ptr, align 8 ret void } @@ -37,10 +38,10 @@ define void @t3(i64* %object) { ; base + unsigned offset (> imm12 * size of type in bytes) ; CHECK: @t4 ; CHECK: orr w[[NUM:[0-9]+]], wzr, #0x8000 -; CHECK: ldr xzr, [x0, x[[NUM]]] +; CHECK: ldr xzr, [x{{[0-9]+}}, x[[NUM]]] ; CHECK: ret -define void @t4(i64* %object) { - %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 4096 +define void @t4() { + %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 4096 %tmp = load volatile i64, i64* %incdec.ptr, align 8 ret void } @@ -57,12 +58,12 @@ define void @t5(i64 %a) { ; base + reg + imm ; CHECK: @t6 -; CHECK: add [[ADDREG:x[0-9]+]], x1, x0, lsl #3 +; CHECK: add [[ADDREG:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #3 ; CHECK-NEXT: orr w[[NUM:[0-9]+]], wzr, #0x8000 ; CHECK: ldr xzr, [x{{[0-9]+}}, x[[NUM]]] ; CHECK: ret -define void @t6(i64 %a, i64* %object) { - %tmp1 = getelementptr inbounds i64, i64* %object, i64 %a +define void @t6(i64 %a) { + %tmp1 = getelementptr inbounds i64, i64* @object, i64 %a %incdec.ptr = getelementptr inbounds i64, i64* %tmp1, i64 4096 %tmp = load volatile i64, i64* %incdec.ptr, align 8 ret void diff --git a/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll index 6e530cb..938b3d1 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll @@ -264,196 +264,149 @@ entry: ; Add a bunch of tests for rdar://13258794: Match LDUR/STUR for D and Q ; registers for unscaled vector accesses +@str = global [63 x i8] c"Test case for rdar://13258794: LDUR/STUR for D and Q registers\00", align 1 -define <1 x i64> @fct0(i8* %str) nounwind readonly ssp { +define <1 x i64> @fct0() nounwind readonly ssp { entry: ; CHECK-LABEL: fct0: ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3] - %p = getelementptr inbounds i8, i8* %str, i64 3 - %q = bitcast i8* %p to <1 x i64>* - %0 = load <1 x i64>, <1 x i64>* %q, align 8 + %0 = load <1 x i64>, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8 ret <1 x i64> %0 } -define <2 x i32> @fct1(i8* %str) nounwind readonly ssp { +define <2 x i32> @fct1() nounwind readonly ssp { entry: ; CHECK-LABEL: fct1: ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3] - %p = getelementptr inbounds i8, i8* %str, i64 3 - %q = bitcast i8* %p to <2 x i32>* - %0 = load <2 x i32>, <2 x i32>* %q, align 8 + %0 = load <2 x i32>, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8 ret <2 x i32> %0 } -define <4 x i16> @fct2(i8* %str) nounwind readonly ssp { +define <4 x i16> @fct2() nounwind readonly ssp { entry: ; CHECK-LABEL: fct2: ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3] - %p = getelementptr inbounds i8, i8* %str, i64 3 - %q = bitcast i8* %p to <4 x i16>* - %0 = load <4 x i16>, <4 x i16>* %q, align 8 + %0 = load <4 x i16>, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8 ret <4 x i16> %0 } -define <8 x i8> @fct3(i8* %str) nounwind readonly ssp { +define <8 x i8> @fct3() nounwind readonly ssp { entry: ; CHECK-LABEL: fct3: ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3] - %p = getelementptr inbounds i8, i8* %str, i64 3 - %q = bitcast i8* %p to <8 x i8>* - %0 = load <8 x i8>, <8 x i8>* %q, align 8 + %0 = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8 ret <8 x i8> %0 } -define <2 x i64> @fct4(i8* %str) nounwind readonly ssp { +define <2 x i64> @fct4() nounwind readonly ssp { entry: ; CHECK-LABEL: fct4: ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3] - %p = getelementptr inbounds i8, i8* %str, i64 3 - %q = bitcast i8* %p to <2 x i64>* - %0 = load <2 x i64>, <2 x i64>* %q, align 16 + %0 = load <2 x i64>, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16 ret <2 x i64> %0 } -define <4 x i32> @fct5(i8* %str) nounwind readonly ssp { +define <4 x i32> @fct5() nounwind readonly ssp { entry: ; CHECK-LABEL: fct5: ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3] - %p = getelementptr inbounds i8, i8* %str, i64 3 - %q = bitcast i8* %p to <4 x i32>* - %0 = load <4 x i32>, <4 x i32>* %q, align 16 + %0 = load <4 x i32>, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16 ret <4 x i32> %0 } -define <8 x i16> @fct6(i8* %str) nounwind readonly ssp { +define <8 x i16> @fct6() nounwind readonly ssp { entry: ; CHECK-LABEL: fct6: ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3] - %p = getelementptr inbounds i8, i8* %str, i64 3 - %q = bitcast i8* %p to <8 x i16>* - %0 = load <8 x i16>, <8 x i16>* %q, align 16 + %0 = load <8 x i16>, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16 ret <8 x i16> %0 } -define <16 x i8> @fct7(i8* %str) nounwind readonly ssp { +define <16 x i8> @fct7() nounwind readonly ssp { entry: ; CHECK-LABEL: fct7: ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3] - %p = getelementptr inbounds i8, i8* %str, i64 3 - %q = bitcast i8* %p to <16 x i8>* - %0 = load <16 x i8>, <16 x i8>* %q, align 16 + %0 = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16 ret <16 x i8> %0 } -define void @fct8(i8* %str) nounwind ssp { +define void @fct8() nounwind ssp { entry: ; CHECK-LABEL: fct8: ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] - %p = getelementptr inbounds i8, i8* %str, i64 3 - %q = bitcast i8* %p to <1 x i64>* - %0 = load <1 x i64>, <1 x i64>* %q, align 8 - %p2 = getelementptr inbounds i8, i8* %str, i64 4 - %q2 = bitcast i8* %p2 to <1 x i64>* - store <1 x i64> %0, <1 x i64>* %q2, align 8 + %0 = load <1 x i64>, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8 + store <1 x i64> %0, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <1 x i64>*), align 8 ret void } -define void @fct9(i8* %str) nounwind ssp { +define void @fct9() nounwind ssp { entry: ; CHECK-LABEL: fct9: ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] - %p = getelementptr inbounds i8, i8* %str, i64 3 - %q = bitcast i8* %p to <2 x i32>* - %0 = load <2 x i32>, <2 x i32>* %q, align 8 - %p2 = getelementptr inbounds i8, i8* %str, i64 4 - %q2 = bitcast i8* %p2 to <2 x i32>* - store <2 x i32> %0, <2 x i32>* %q2, align 8 + %0 = load <2 x i32>, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8 + store <2 x i32> %0, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <2 x i32>*), align 8 ret void } -define void @fct10(i8* %str) nounwind ssp { +define void @fct10() nounwind ssp { entry: ; CHECK-LABEL: fct10: ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] - %p = getelementptr inbounds i8, i8* %str, i64 3 - %q = bitcast i8* %p to <4 x i16>* - %0 = load <4 x i16>, <4 x i16>* %q, align 8 - %p2 = getelementptr inbounds i8, i8* %str, i64 4 - %q2 = bitcast i8* %p2 to <4 x i16>* - store <4 x i16> %0, <4 x i16>* %q2, align 8 + %0 = load <4 x i16>, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8 + store <4 x i16> %0, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <4 x i16>*), align 8 ret void } -define void @fct11(i8* %str) nounwind ssp { +define void @fct11() nounwind ssp { entry: ; CHECK-LABEL: fct11: ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] - %p = getelementptr inbounds i8, i8* %str, i64 3 - %q = bitcast i8* %p to <8 x i8>* - %0 = load <8 x i8>, <8 x i8>* %q, align 8 - %p2 = getelementptr inbounds i8, i8* %str, i64 4 - %q2 = bitcast i8* %p2 to <8 x i8>* - store <8 x i8> %0, <8 x i8>* %q2, align 8 + %0 = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8 + store <8 x i8> %0, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <8 x i8>*), align 8 ret void } -define void @fct12(i8* %str) nounwind ssp { +define void @fct12() nounwind ssp { entry: ; CHECK-LABEL: fct12: ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] - %p = getelementptr inbounds i8, i8* %str, i64 3 - %q = bitcast i8* %p to <2 x i64>* - %0 = load <2 x i64>, <2 x i64>* %q, align 16 - %p2 = getelementptr inbounds i8, i8* %str, i64 4 - %q2 = bitcast i8* %p2 to <2 x i64>* - store <2 x i64> %0, <2 x i64>* %q2, align 16 + %0 = load <2 x i64>, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16 + store <2 x i64> %0, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <2 x i64>*), align 16 ret void } -define void @fct13(i8* %str) nounwind ssp { +define void @fct13() nounwind ssp { entry: ; CHECK-LABEL: fct13: ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] - %p = getelementptr inbounds i8, i8* %str, i64 3 - %q = bitcast i8* %p to <4 x i32>* - %0 = load <4 x i32>, <4 x i32>* %q, align 16 - %p2 = getelementptr inbounds i8, i8* %str, i64 4 - %q2 = bitcast i8* %p2 to <4 x i32>* - store <4 x i32> %0, <4 x i32>* %q2, align 16 + %0 = load <4 x i32>, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16 + store <4 x i32> %0, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <4 x i32>*), align 16 ret void } -define void @fct14(i8* %str) nounwind ssp { +define void @fct14() nounwind ssp { entry: ; CHECK-LABEL: fct14: ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] - %p = getelementptr inbounds i8, i8* %str, i64 3 - %q = bitcast i8* %p to <8 x i16>* - %0 = load <8 x i16>, <8 x i16>* %q, align 16 - %p2 = getelementptr inbounds i8, i8* %str, i64 4 - %q2 = bitcast i8* %p2 to <8 x i16>* - store <8 x i16> %0, <8 x i16>* %q2, align 16 + %0 = load <8 x i16>, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16 + store <8 x i16> %0, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <8 x i16>*), align 16 ret void } -define void @fct15(i8* %str) nounwind ssp { +define void @fct15() nounwind ssp { entry: ; CHECK-LABEL: fct15: ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] - %p = getelementptr inbounds i8, i8* %str, i64 3 - %q = bitcast i8* %p to <16 x i8>* - %0 = load <16 x i8>, <16 x i8>* %q, align 16 - %p2 = getelementptr inbounds i8, i8* %str, i64 4 - %q2 = bitcast i8* %p2 to <16 x i8>* - store <16 x i8> %0, <16 x i8>* %q2, align 16 + %0 = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16 + store <16 x i8> %0, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <16 x i8>*), align 16 ret void } diff --git a/llvm/test/CodeGen/AArch64/fold-global-offsets.ll b/llvm/test/CodeGen/AArch64/fold-global-offsets.ll deleted file mode 100644 index 4fb9cfd..0000000 --- a/llvm/test/CodeGen/AArch64/fold-global-offsets.ll +++ /dev/null @@ -1,59 +0,0 @@ -; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s - -@x1 = external hidden global [2 x i64] -@x2 = external hidden global [16777216 x i64] - -define i64 @f1() { - ; CHECK: f1: - ; CHECK: adrp x8, x1+16 - ; CHECK: ldr x0, [x8, :lo12:x1+16] - %l = load i64, i64* getelementptr ([2 x i64], [2 x i64]* @x1, i64 0, i64 2) - ret i64 %l -} - -define i64 @f2() { - ; CHECK: f2: - ; CHECK: adrp x8, x1 - ; CHECK: add x8, x8, :lo12:x1 - ; CHECK: ldr x0, [x8, #24] - %l = load i64, i64* getelementptr ([2 x i64], [2 x i64]* @x1, i64 0, i64 3) - ret i64 %l -} - -define i64 @f3() { - ; CHECK: f3: - ; CHECK: adrp x8, x1+1 - ; CHECK: add x8, x8, :lo12:x1+1 - ; CHECK: ldr x0, [x8] - %l = load i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast ([2 x i64]* @x1 to i8*), i64 1) to i64*) - ret i64 %l -} - -define [2 x i64] @f4() { - ; CHECK: f4: - ; CHECK: adrp x8, x2+8 - ; CHECK: add x8, x8, :lo12:x2+8 - ; CHECK: ldp x0, x1, [x8] - %l = load [2 x i64], [2 x i64]* bitcast (i8* getelementptr (i8, i8* bitcast ([16777216 x i64]* @x2 to i8*), i64 8) to [2 x i64]*) - ret [2 x i64] %l -} - -define i64 @f5() { - ; CHECK: f5: - ; CHECK: adrp x8, x2+2097144 - ; CHECK: ldr x0, [x8, :lo12:x2+2097144] - ; CHECK: ret - %l = load i64, i64* getelementptr ([16777216 x i64], [16777216 x i64]* @x2, i64 0, i64 262143) - ret i64 %l -} - -define i64 @f6() { - ; CHECK: f6: - ; CHECK: adrp x8, x2 - ; CHECK: add x8, x8, :lo12:x2 - ; CHECK: orr w9, wzr, #0x200000 - ; CHECK: ldr x0, [x8, x9] - ; CHECK: ret - %l = load i64, i64* getelementptr ([16777216 x i64], [16777216 x i64]* @x2, i64 0, i64 262144) - ret i64 %l -} diff --git a/llvm/test/CodeGen/AArch64/global-merge-3.ll b/llvm/test/CodeGen/AArch64/global-merge-3.ll index 4844d96..106d6da 100644 --- a/llvm/test/CodeGen/AArch64/global-merge-3.ll +++ b/llvm/test/CodeGen/AArch64/global-merge-3.ll @@ -10,8 +10,8 @@ define void @f1(i32 %a1, i32 %a2, i32 %a3) { ;CHECK-APPLE-IOS: adrp x8, __MergedGlobals_x@PAGE ;CHECK-APPLE-IOS-NOT: adrp ;CHECK-APPLE-IOS: add x8, x8, __MergedGlobals_x@PAGEOFF -;CHECK-APPLE-IOS: adrp x9, __MergedGlobals_y@PAGE+12 -;CHECK-APPLE-IOS: str w1, [x9, __MergedGlobals_y@PAGEOFF+12] +;CHECK-APPLE-IOS: adrp x9, __MergedGlobals_y@PAGE +;CHECK-APPLE-IOS: add x9, x9, __MergedGlobals_y@PAGEOFF %x3 = getelementptr inbounds [1000 x i32], [1000 x i32]* @x, i32 0, i64 3 %y3 = getelementptr inbounds [1000 x i32], [1000 x i32]* @y, i32 0, i64 3 store i32 %a1, i32* %x3, align 4 diff --git a/llvm/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll b/llvm/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll index 8207f8c..1c1b4f6 100644 --- a/llvm/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll +++ b/llvm/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll @@ -44,9 +44,9 @@ define void @f2(i32 %a1, i32 %a2) nounwind { ; CHECK-LABEL: f3: define void @f3(i32 %a1, i32 %a2) minsize nounwind { -; CHECK-NEXT: adrp x8, [[SET]]@PAGE+8 -; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF+8 -; CHECK-NEXT: stp w0, w1, [x8] +; CHECK-NEXT: adrp x8, [[SET]]@PAGE +; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF +; CHECK-NEXT: stp w0, w1, [x8, #8] ; CHECK-NEXT: ret store i32 %a1, i32* @m3, align 4 store i32 %a2, i32* @n3, align 4 @@ -57,9 +57,10 @@ define void @f3(i32 %a1, i32 %a2) minsize nounwind { ; CHECK-LABEL: f4: define void @f4(i32 %a1, i32 %a2) nounwind { -; CHECK-NEXT: adrp x8, [[SET]]@PAGE+8 +; CHECK-NEXT: adrp x8, [[SET]]@PAGE +; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF ; CHECK-NEXT: adrp x9, _n4@PAGE -; CHECK-NEXT: str w0, [x8, [[SET]]@PAGEOFF+8] +; CHECK-NEXT: str w0, [x8, #8] ; CHECK-NEXT: str w1, [x9, _n4@PAGEOFF] ; CHECK-NEXT: ret store i32 %a1, i32* @m3, align 4 diff --git a/llvm/test/CodeGen/AArch64/global-merge-ignore-single-use.ll b/llvm/test/CodeGen/AArch64/global-merge-ignore-single-use.ll index b3b8406..97e283c 100644 --- a/llvm/test/CodeGen/AArch64/global-merge-ignore-single-use.ll +++ b/llvm/test/CodeGen/AArch64/global-merge-ignore-single-use.ll @@ -38,9 +38,9 @@ define void @f2(i32 %a1, i32 %a2, i32 %a3) #0 { ; CHECK-LABEL: f3: define void @f3(i32 %a1, i32 %a2) #0 { -; CHECK-NEXT: adrp x8, [[SET]]@PAGE+12 -; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF+12 -; CHECK-NEXT: stp w0, w1, [x8] +; CHECK-NEXT: adrp x8, [[SET]]@PAGE +; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF +; CHECK-NEXT: stp w0, w1, [x8, #12] ; CHECK-NEXT: ret store i32 %a1, i32* @m2, align 4 store i32 %a2, i32* @n2, align 4 -- 2.7.4