From 443a7f97882db4479866a11d0cc67fb2f1b1b4d2 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Date: Tue, 27 Nov 2018 15:13:37 +0000
Subject: [PATCH] [AMDGPU] Disable DAG combine at -O0

Differential Revision: https://reviews.llvm.org/D54358

llvm-svn: 347659
---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp          | 11 ++--
 .../test/CodeGen/AMDGPU/dagcombine-setcc-select.ll | 60 ++++------------------
 .../CodeGen/AMDGPU/indirect-addressing-si-noopt.ll | 10 ++--
 3 files changed, 20 insertions(+), 61 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 230acbc..9f51980 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -8637,6 +8637,9 @@ SDValue SITargetLowering::performClampCombine(SDNode *N,
 
 SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
                                             DAGCombinerInfo &DCI) const {
+  if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
+    return SDValue();
+
   switch (N->getOpcode()) {
   default:
     return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
@@ -8662,12 +8665,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::UMAX:
   case ISD::UMIN:
   case AMDGPUISD::FMIN_LEGACY:
-  case AMDGPUISD::FMAX_LEGACY: {
-    if (//DCI.getDAGCombineLevel() >= AfterLegalizeDAG &&
-        getTargetMachine().getOptLevel() > CodeGenOpt::None)
-      return performMinMaxCombine(N, DCI);
-    break;
-  }
+  case AMDGPUISD::FMAX_LEGACY:
+    return performMinMaxCombine(N, DCI);
   case ISD::FMA:
     return performFMACombine(N, DCI);
   case ISD::LOAD: {
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-setcc-select.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-setcc-select.ll
index 5bcdaa0..9e1c58b 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-setcc-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-setcc-select.ll
@@ -1,19 +1,12 @@
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -O0 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
 
 ; GCN-LABEL: {{^}}eq_t:
 ; GCN-DAG: s_load_dword [[X:s[0-9]+]]
-; GCN-DAG: s_mov_b32 [[SONE:s[0-9]+]], 1.0
-; GCN-DAG: v_mov_b32_e32 [[VONE:v[0-9]+]], [[SONE]]
-; GCN:     v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], [[VONE]]{{$}}
+; GCN:     v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], 1.0{{$}}
 ; GCN-NOT: 0xddd5
 ; GCN-NOT: v_cndmask_b32
 ; GCN-NOT: v_cmp_eq_u32
-; GCN-NOT: v_cndmask_b32
-; GCN-DAG: s_mov_b32 [[STWO:s[0-9]+]], 2.0
-; GCN-DAG: v_mov_b32_e32 [[VTWO:v[0-9]+]], [[STWO]]
-; GCN-DAG: s_mov_b32 [[SFOUR:s[0-9]+]], 4.0
-; GCN-DAG: v_mov_b32_e32 [[VFOUR:v[0-9]+]], [[SFOUR]]
-; GCN:     v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], [[VTWO]], [[VFOUR]], [[CC]]
+; GCN:     v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], 2.0, 4.0, [[CC]]
 ; GCN:     store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
 define amdgpu_kernel void @eq_t(float %x) {
   %c1 = fcmp olt float %x, 1.0
@@ -26,18 +19,11 @@ define amdgpu_kernel void @eq_t(float %x) {
 
 ; GCN-LABEL: {{^}}ne_t:
 ; GCN-DAG: s_load_dword [[X:s[0-9]+]]
-; GCN-DAG: s_mov_b32 [[SONE:s[0-9]+]], 1.0
-; GCN-DAG: v_mov_b32_e32 [[VONE:v[0-9]+]], [[SONE]]
-; GCN:     v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], [[VONE]]{{$}}
+; GCN:     v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], 1.0{{$}}
 ; GCN-NOT: 0xddd5
 ; GCN-NOT: v_cndmask_b32
 ; GCN-NOT: v_cmp_eq_u32
-; GCN-NOT: v_cndmask_b32
-; GCN-DAG: s_mov_b32 [[STWO:s[0-9]+]], 2.0
-; GCN-DAG: v_mov_b32_e32 [[VTWO:v[0-9]+]], [[STWO]]
-; GCN-DAG: s_mov_b32 [[SFOUR:s[0-9]+]], 4.0
-; GCN-DAG: v_mov_b32_e32 [[VFOUR:v[0-9]+]], [[SFOUR]]
-; GCN:     v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], [[VFOUR]], [[VTWO]], [[CC]]
+; GCN:     v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], 4.0, 2.0, [[CC]]
 ; GCN:     store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
 define amdgpu_kernel void @ne_t(float %x) {
   %c1 = fcmp olt float %x, 1.0
@@ -50,18 +36,11 @@ define amdgpu_kernel void @ne_t(float %x) {
 
 ; GCN-LABEL: {{^}}eq_f:
 ; GCN-DAG: s_load_dword [[X:s[0-9]+]]
-; GCN-DAG: s_mov_b32 [[SONE:s[0-9]+]], 1.0
-; GCN-DAG: v_mov_b32_e32 [[VONE:v[0-9]+]], [[SONE]]
-; GCN:     v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], [[VONE]]{{$}}
+; GCN:     v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], 1.0{{$}}
 ; GCN-NOT: 0xddd5
 ; GCN-NOT: v_cndmask_b32
 ; GCN-NOT: v_cmp_eq_u32
-; GCN-NOT: v_cndmask_b32
-; GCN-DAG: s_mov_b32 [[STWO:s[0-9]+]], 2.0
-; GCN-DAG: v_mov_b32_e32 [[VTWO:v[0-9]+]], [[STWO]]
-; GCN-DAG: s_mov_b32 [[SFOUR:s[0-9]+]], 4.0
-; GCN-DAG: v_mov_b32_e32 [[VFOUR:v[0-9]+]], [[SFOUR]]
-; GCN:     v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], [[VFOUR]], [[VTWO]], [[CC]]
+; GCN:     v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], 4.0, 2.0, [[CC]]
 ; GCN:     store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
 define amdgpu_kernel void @eq_f(float %x) {
   %c1 = fcmp olt float %x, 1.0
@@ -74,18 +53,11 @@ define amdgpu_kernel void @eq_f(float %x) {
 
 ; GCN-LABEL: {{^}}ne_f:
 ; GCN-DAG: s_load_dword [[X:s[0-9]+]]
-; GCN-DAG: s_mov_b32 [[SONE:s[0-9]+]], 1.0
-; GCN-DAG: v_mov_b32_e32 [[VONE:v[0-9]+]], [[SONE]]
-; GCN:     v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], [[VONE]]{{$}}
+; GCN:     v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], 1.0{{$}}
 ; GCN-NOT: 0xddd5
 ; GCN-NOT: v_cndmask_b32
 ; GCN-NOT: v_cmp_eq_u32
-; GCN-NOT: v_cndmask_b32
-; GCN-DAG: s_mov_b32 [[STWO:s[0-9]+]], 2.0
-; GCN-DAG: v_mov_b32_e32 [[VTWO:v[0-9]+]], [[STWO]]
-; GCN-DAG: s_mov_b32 [[SFOUR:s[0-9]+]], 4.0
-; GCN-DAG: v_mov_b32_e32 [[VFOUR:v[0-9]+]], [[SFOUR]]
-; GCN:     v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], [[VTWO]], [[VFOUR]], [[CC]]
+; GCN:     v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], 2.0, 4.0, [[CC]]
 ; GCN:     store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
 define amdgpu_kernel void @ne_f(float %x) {
   %c1 = fcmp olt float %x, 1.0
@@ -97,18 +69,8 @@ define amdgpu_kernel void @ne_f(float %x) {
 }
 
 ; GCN-LABEL: {{^}}different_constants:
-; GCN-DAG: s_load_dword [[X:s[0-9]+]]
-; GCN-DAG: s_mov_b32 [[SONE:s[0-9]+]], 1.0
-; GCN-DAG: v_mov_b32_e32 [[VONE:v[0-9]+]], [[SONE]]
-; GCN-DAG: v_cmp_lt_f32_e{{32|64}} [[CC1:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], [[VONE]]{{$}}
-; GCN-DAG: v_cndmask_b32_e{{32|64}} [[CND1:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, [[CC1]]
-; GCN-DAG: v_cmp_eq_u32_e{{32|64}} [[CC2:s\[[0-9]+:[0-9]+\]|vcc]], s{{[0-9]+}}, v{{[0-9]+}}{{$}}
-; GCN-DAG: s_mov_b32 [[STWO:s[0-9]+]], 2.0
-; GCN-DAG: v_mov_b32_e32 [[VTWO:v[0-9]+]], [[STWO]]
-; GCN-DAG: s_mov_b32 [[SFOUR:s[0-9]+]], 4.0
-; GCN-DAG: v_mov_b32_e32 [[VFOUR:v[0-9]+]], [[SFOUR]]
-; GCN:     v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], [[VTWO]], [[VFOUR]], [[CC2]]
-; GCN:     store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
+; GCN: v_mov_b32_e32 [[RES:v[0-9]+]], 2.0
+; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
 define amdgpu_kernel void @different_constants(float %x) {
   %c1 = fcmp olt float %x, 1.0
   %s1 = select i1 %c1, i32 56789, i32 1
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll
index 59abe69..693d06c 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll
@@ -29,17 +29,15 @@ entry:
 ; CHECK: s_cbranch_scc1 [[BB4:BB[0-9]+_[0-9]+]]
 
 ; CHECK: buffer_load_dwordx4
-; CHECK: v_cndmask_b32_e64
-; CHECK: v_cndmask_b32_e64
-; CHECK: v_cndmask_b32_e64
+; CHECK: s_mov_b32 m0,
+; CHECK: v_movrels_b32_e32
 
 ; CHECK: s_branch [[ENDBB:BB[0-9]+_[0-9]+]]
 
 ; CHECK: [[BB4]]:
 ; CHECK: buffer_load_dwordx4
-; CHECK: v_cndmask_b32_e64
-; CHECK: v_cndmask_b32_e64
-; CHECK: v_cndmask_b32_e64
+; CHECK: s_mov_b32 m0,
+; CHECK: v_movrels_b32_e32
 
 ; CHECK: [[ENDBB]]:
 ; CHECK: buffer_store_dword
-- 
2.7.4