From 18c8ed54160bec12344288c6993b0f548fbe809e Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 25 Sep 2021 18:35:39 +0100 Subject: [PATCH] [DAG] ReduceLoadOpStoreWidth - replace getABITypeAlign with allowsMemoryAccess (PR45116) One of the cases identified in PR45116 - we don't need to limit store narrowing to ABI alignment, we can use allowsMemoryAccess - which tests using getABITypeAlign, but also checks if a target permits (fast) misaligned memory access by checking allowsMisalignedMemoryAccesses as a fallback. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 7 +++++-- llvm/test/CodeGen/X86/i1narrowfail.ll | 11 +++++++++++ llvm/test/CodeGen/X86/narrow_op-1.ll | 12 +++++++++++- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c7a5b8c..b290fb0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16767,9 +16767,12 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { if (DAG.getDataLayout().isBigEndian()) PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; + bool IsFast = false; Align NewAlign = commonAlignment(LD->getAlign(), PtrOff); - Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); - if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy)) + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT, + LD->getAddressSpace(), NewAlign, + LD->getMemOperand()->getFlags(), &IsFast) || + !IsFast) return SDValue(); SDValue NewPtr = diff --git a/llvm/test/CodeGen/X86/i1narrowfail.ll b/llvm/test/CodeGen/X86/i1narrowfail.ll index 282d1ac..61c25bd 100644 --- a/llvm/test/CodeGen/X86/i1narrowfail.ll +++ b/llvm/test/CodeGen/X86/i1narrowfail.ll @@ -11,3 +11,14 @@ define void @foo(i64* %ptr) { store i64 %r12, i64* %ptr, align 8 ret void } + +define void @foo_noalign(i64* %ptr) { +; CHECK-LABEL: foo_noalign: +; CHECK: ## %bb.0: +; CHECK-NEXT: orb $16, (%rdi) +; CHECK-NEXT: retq + %r11 = load i64, i64* %ptr, align 1 + %r12 = or i64 16, %r11 + store i64 %r12, i64* %ptr, align 1 + ret void +} diff --git a/llvm/test/CodeGen/X86/narrow_op-1.ll b/llvm/test/CodeGen/X86/narrow_op-1.ll index 0254c11..459f69e 100644 --- a/llvm/test/CodeGen/X86/narrow_op-1.ll +++ b/llvm/test/CodeGen/X86/narrow_op-1.ll @@ -14,7 +14,18 @@ entry: %1 = or i32 %0, 65536 store i32 %1, i32* bitcast (i16* getelementptr (%struct.bf, %struct.bf* @bfi, i32 0, i32 1) to i32*), align 8 ret void +} +define dso_local void @t1_noalign() nounwind optsize ssp { +; CHECK-LABEL: t1_noalign: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: orb $1, bfi+10(%rip) +; CHECK-NEXT: retq +entry: + %0 = load i32, i32* bitcast (i16* getelementptr (%struct.bf, %struct.bf* @bfi, i32 0, i32 1) to i32*), align 1 + %1 = or i32 %0, 65536 + store i32 %1, i32* bitcast (i16* getelementptr (%struct.bf, %struct.bf* @bfi, i32 0, i32 1) to i32*), align 1 + ret void } define dso_local void @t2() nounwind optsize ssp { @@ -27,5 +38,4 @@ entry: %1 = or i32 %0, 16842752 store i32 %1, i32* bitcast (i16* getelementptr (%struct.bf, %struct.bf* @bfi, i32 0, i32 1) to i32*), align 8 ret void - } -- 2.7.4