From c8903125cd7c7766ef9c8eef3ed3a9d89a6fe554 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 14 Nov 2017 23:46:42 +0000
Subject: [PATCH] AMDGPU: Handle or in multi-use shl ptr combine

llvm-svn: 318223
---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp |  4 ++--
 llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll   | 34 +++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 0be8e81..f7fe652 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5199,8 +5199,8 @@ SDValue SITargetLowering::performSHLPtrCombine(SDNode *N,
 
   // We only do this to handle cases where it's profitable when there are
   // multiple uses of the add, so defer to the standard combine.
-  // TODO: Support or
-  if (N0.getOpcode() != ISD::ADD || N0->hasOneUse())
+  if ((N0.getOpcode() != ISD::ADD && N0.getOpcode() != ISD::OR) ||
+      N0->hasOneUse())
     return SDValue();
 
   const ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N1);
diff --git a/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll b/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll
index 60fdab66..cee5708 100644
--- a/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll
@@ -384,5 +384,39 @@ define void @shl_add_ptr_combine_2use_both_max_private_offset(i16 zeroext %idx.a
   ret void
 }
 
+; GCN-LABEL: {{^}}shl_or_ptr_combine_2use_lds:
+; GCN: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 3, v0
+; GCN: ds_write_b32 [[SCALE0]], v{{[0-9]+}} offset:32
+
+; GCN: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 4, v0
+; GCN: ds_write_b32 [[SCALE1]], v{{[0-9]+}} offset:64
+define void @shl_or_ptr_combine_2use_lds(i32 %idx) #0 {
+  %idx.add = or i32 %idx, 4
+  %shl0 = shl i32 %idx.add, 3
+  %shl1 = shl i32 %idx.add, 4
+  %ptr0 = inttoptr i32 %shl0 to i32 addrspace(3)*
+  %ptr1 = inttoptr i32 %shl1 to i32 addrspace(3)*
+  store volatile i32 9, i32 addrspace(3)* %ptr0
+  store volatile i32 10, i32 addrspace(3)* %ptr1
+  ret void
+}
+
+; GCN-LABEL: {{^}}shl_or_ptr_combine_2use_max_lds_offset:
+; GCN-DAG: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 3, v0
+; GCN-DAG: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 4, v0
+; GCN-DAG: ds_write_b32 [[SCALE0]], v{{[0-9]+}} offset:65528
+; GCN-DAG: v_or_b32_e32 [[ADD1:v[0-9]+]], 0x1fff0, [[SCALE1]]
+; GCN: ds_write_b32 [[ADD1]], v{{[0-9]+$}}
+define void @shl_or_ptr_combine_2use_max_lds_offset(i32 %idx) #0 {
+  %idx.add = or i32 %idx, 8191
+  %shl0 = shl i32 %idx.add, 3
+  %shl1 = shl i32 %idx.add, 4
+  %ptr0 = inttoptr i32 %shl0 to i32 addrspace(3)*
+  %ptr1 = inttoptr i32 %shl1 to i32 addrspace(3)*
+  store volatile i32 9, i32 addrspace(3)* %ptr0
+  store volatile i32 10, i32 addrspace(3)* %ptr1
+  ret void
+}
+
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
-- 
2.7.4