From 18c8ed54160bec12344288c6993b0f548fbe809e Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sat, 25 Sep 2021 18:35:39 +0100
Subject: [PATCH] [DAG] ReduceLoadOpStoreWidth - replace getABITypeAlign with
 allowsMemoryAccess (PR45116)

One of the cases identified in PR45116 - we don't need to limit store narrowing to ABI alignment, we can use allowsMemoryAccess - which tests using getABITypeAlign, but also checks if a target permits (fast) misaligned memory access by checking allowsMisalignedMemoryAccesses as a fallback.
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  7 +++++--
 llvm/test/CodeGen/X86/i1narrowfail.ll         | 11 +++++++++++
 llvm/test/CodeGen/X86/narrow_op-1.ll          | 12 +++++++++++-
 3 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c7a5b8c..b290fb0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16767,9 +16767,12 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
       if (DAG.getDataLayout().isBigEndian())
         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
 
+      bool IsFast = false;
       Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
-      Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
-      if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy))
+      if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
+                                  LD->getAddressSpace(), NewAlign,
+                                  LD->getMemOperand()->getFlags(), &IsFast) ||
+          !IsFast)
         return SDValue();
 
       SDValue NewPtr =
diff --git a/llvm/test/CodeGen/X86/i1narrowfail.ll b/llvm/test/CodeGen/X86/i1narrowfail.ll
index 282d1ac..61c25bd 100644
--- a/llvm/test/CodeGen/X86/i1narrowfail.ll
+++ b/llvm/test/CodeGen/X86/i1narrowfail.ll
@@ -11,3 +11,14 @@ define void @foo(i64* %ptr) {
   store i64 %r12, i64* %ptr, align 8
   ret void
 }
+
+define void @foo_noalign(i64* %ptr) {
+; CHECK-LABEL: foo_noalign:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    orb $16, (%rdi)
+; CHECK-NEXT:    retq
+  %r11 = load i64, i64* %ptr, align 1
+  %r12 = or i64 16, %r11
+  store i64 %r12, i64* %ptr, align 1
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/narrow_op-1.ll b/llvm/test/CodeGen/X86/narrow_op-1.ll
index 0254c11..459f69e 100644
--- a/llvm/test/CodeGen/X86/narrow_op-1.ll
+++ b/llvm/test/CodeGen/X86/narrow_op-1.ll
@@ -14,7 +14,18 @@ entry:
 	%1 = or i32 %0, 65536
 	store i32 %1, i32* bitcast (i16* getelementptr (%struct.bf, %struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
 	ret void
+}
 
+define dso_local void @t1_noalign() nounwind optsize ssp {
+; CHECK-LABEL: t1_noalign:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    orb $1, bfi+10(%rip)
+; CHECK-NEXT:    retq
+entry:
+	%0 = load i32, i32* bitcast (i16* getelementptr (%struct.bf, %struct.bf* @bfi, i32 0, i32 1) to i32*), align 1
+	%1 = or i32 %0, 65536
+	store i32 %1, i32* bitcast (i16* getelementptr (%struct.bf, %struct.bf* @bfi, i32 0, i32 1) to i32*), align 1
+	ret void
 }
 
 define dso_local void @t2() nounwind optsize ssp {
@@ -27,5 +38,4 @@ entry:
 	%1 = or i32 %0, 16842752
 	store i32 %1, i32* bitcast (i16* getelementptr (%struct.bf, %struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
 	ret void
-
 }
-- 
2.7.4