From 443a7f97882db4479866a11d0cc67fb2f1b1b4d2 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Tue, 27 Nov 2018 15:13:37 +0000 Subject: [PATCH] [AMDGPU] Disable DAG combine at -O0 Differential Revision: https://reviews.llvm.org/D54358 llvm-svn: 347659 --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 11 ++-- .../test/CodeGen/AMDGPU/dagcombine-setcc-select.ll | 60 ++++------------------ .../CodeGen/AMDGPU/indirect-addressing-si-noopt.ll | 10 ++-- 3 files changed, 20 insertions(+), 61 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 230acbc..9f51980 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -8637,6 +8637,9 @@ SDValue SITargetLowering::performClampCombine(SDNode *N, SDValue SITargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { + if (getTargetMachine().getOptLevel() == CodeGenOpt::None) + return SDValue(); + switch (N->getOpcode()) { default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI); @@ -8662,12 +8665,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, case ISD::UMAX: case ISD::UMIN: case AMDGPUISD::FMIN_LEGACY: - case AMDGPUISD::FMAX_LEGACY: { - if (//DCI.getDAGCombineLevel() >= AfterLegalizeDAG && - getTargetMachine().getOptLevel() > CodeGenOpt::None) - return performMinMaxCombine(N, DCI); - break; - } + case AMDGPUISD::FMAX_LEGACY: + return performMinMaxCombine(N, DCI); case ISD::FMA: return performFMACombine(N, DCI); case ISD::LOAD: { diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-setcc-select.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-setcc-select.ll index 5bcdaa0..9e1c58b 100644 --- a/llvm/test/CodeGen/AMDGPU/dagcombine-setcc-select.ll +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-setcc-select.ll @@ -1,19 +1,12 @@ -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -O0 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; GCN-LABEL: {{^}}eq_t: ; GCN-DAG: s_load_dword [[X:s[0-9]+]] -; GCN-DAG: s_mov_b32 [[SONE:s[0-9]+]], 1.0 -; GCN-DAG: v_mov_b32_e32 [[VONE:v[0-9]+]], [[SONE]] -; GCN: v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], [[VONE]]{{$}} +; GCN: v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], 1.0{{$}} ; GCN-NOT: 0xddd5 ; GCN-NOT: v_cndmask_b32 ; GCN-NOT: v_cmp_eq_u32 -; GCN-NOT: v_cndmask_b32 -; GCN-DAG: s_mov_b32 [[STWO:s[0-9]+]], 2.0 -; GCN-DAG: v_mov_b32_e32 [[VTWO:v[0-9]+]], [[STWO]] -; GCN-DAG: s_mov_b32 [[SFOUR:s[0-9]+]], 4.0 -; GCN-DAG: v_mov_b32_e32 [[VFOUR:v[0-9]+]], [[SFOUR]] -; GCN: v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], [[VTWO]], [[VFOUR]], [[CC]] +; GCN: v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], 2.0, 4.0, [[CC]] ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} define amdgpu_kernel void @eq_t(float %x) { %c1 = fcmp olt float %x, 1.0 @@ -26,18 +19,11 @@ define amdgpu_kernel void @eq_t(float %x) { ; GCN-LABEL: {{^}}ne_t: ; GCN-DAG: s_load_dword [[X:s[0-9]+]] -; GCN-DAG: s_mov_b32 [[SONE:s[0-9]+]], 1.0 -; GCN-DAG: v_mov_b32_e32 [[VONE:v[0-9]+]], [[SONE]] -; GCN: v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], [[VONE]]{{$}} +; GCN: v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], 1.0{{$}} ; GCN-NOT: 0xddd5 ; GCN-NOT: v_cndmask_b32 ; GCN-NOT: v_cmp_eq_u32 -; GCN-NOT: v_cndmask_b32 -; GCN-DAG: s_mov_b32 [[STWO:s[0-9]+]], 2.0 -; GCN-DAG: v_mov_b32_e32 [[VTWO:v[0-9]+]], [[STWO]] -; GCN-DAG: s_mov_b32 [[SFOUR:s[0-9]+]], 4.0 -; GCN-DAG: v_mov_b32_e32 [[VFOUR:v[0-9]+]], [[SFOUR]] -; GCN: v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], [[VFOUR]], [[VTWO]], [[CC]] +; GCN: v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], 4.0, 2.0, [[CC]] ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} define amdgpu_kernel void @ne_t(float %x) { %c1 = fcmp olt float %x, 1.0 @@ -50,18 +36,11 @@ define amdgpu_kernel void @ne_t(float %x) { ; GCN-LABEL: {{^}}eq_f: ; GCN-DAG: s_load_dword [[X:s[0-9]+]] -; GCN-DAG: s_mov_b32 [[SONE:s[0-9]+]], 1.0 -; GCN-DAG: v_mov_b32_e32 [[VONE:v[0-9]+]], [[SONE]] -; GCN: v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], [[VONE]]{{$}} +; GCN: v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], 1.0{{$}} ; GCN-NOT: 0xddd5 ; GCN-NOT: v_cndmask_b32 ; GCN-NOT: v_cmp_eq_u32 -; GCN-NOT: v_cndmask_b32 -; GCN-DAG: s_mov_b32 [[STWO:s[0-9]+]], 2.0 -; GCN-DAG: v_mov_b32_e32 [[VTWO:v[0-9]+]], [[STWO]] -; GCN-DAG: s_mov_b32 [[SFOUR:s[0-9]+]], 4.0 -; GCN-DAG: v_mov_b32_e32 [[VFOUR:v[0-9]+]], [[SFOUR]] -; GCN: v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], [[VFOUR]], [[VTWO]], [[CC]] +; GCN: v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], 4.0, 2.0, [[CC]] ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} define amdgpu_kernel void @eq_f(float %x) { %c1 = fcmp olt float %x, 1.0 @@ -74,18 +53,11 @@ define amdgpu_kernel void @eq_f(float %x) { ; GCN-LABEL: {{^}}ne_f: ; GCN-DAG: s_load_dword [[X:s[0-9]+]] -; GCN-DAG: s_mov_b32 [[SONE:s[0-9]+]], 1.0 -; GCN-DAG: v_mov_b32_e32 [[VONE:v[0-9]+]], [[SONE]] -; GCN: v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], [[VONE]]{{$}} +; GCN: v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], 1.0{{$}} ; GCN-NOT: 0xddd5 ; GCN-NOT: v_cndmask_b32 ; GCN-NOT: v_cmp_eq_u32 -; GCN-NOT: v_cndmask_b32 -; GCN-DAG: s_mov_b32 [[STWO:s[0-9]+]], 2.0 -; GCN-DAG: v_mov_b32_e32 [[VTWO:v[0-9]+]], [[STWO]] -; GCN-DAG: s_mov_b32 [[SFOUR:s[0-9]+]], 4.0 -; GCN-DAG: v_mov_b32_e32 [[VFOUR:v[0-9]+]], [[SFOUR]] -; GCN: v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], [[VTWO]], [[VFOUR]], [[CC]] +; GCN: v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], 2.0, 4.0, [[CC]] ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} define amdgpu_kernel void @ne_f(float %x) { %c1 = fcmp olt float %x, 1.0 @@ -97,18 +69,8 @@ define amdgpu_kernel void @ne_f(float %x) { } ; GCN-LABEL: {{^}}different_constants: -; GCN-DAG: s_load_dword [[X:s[0-9]+]] -; GCN-DAG: s_mov_b32 [[SONE:s[0-9]+]], 1.0 -; GCN-DAG: v_mov_b32_e32 [[VONE:v[0-9]+]], [[SONE]] -; GCN-DAG: v_cmp_lt_f32_e{{32|64}} [[CC1:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], [[VONE]]{{$}} -; GCN-DAG: v_cndmask_b32_e{{32|64}} [[CND1:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, [[CC1]] -; GCN-DAG: v_cmp_eq_u32_e{{32|64}} [[CC2:s\[[0-9]+:[0-9]+\]|vcc]], s{{[0-9]+}}, v{{[0-9]+}}{{$}} -; GCN-DAG: s_mov_b32 [[STWO:s[0-9]+]], 2.0 -; GCN-DAG: v_mov_b32_e32 [[VTWO:v[0-9]+]], [[STWO]] -; GCN-DAG: s_mov_b32 [[SFOUR:s[0-9]+]], 4.0 -; GCN-DAG: v_mov_b32_e32 [[VFOUR:v[0-9]+]], [[SFOUR]] -; GCN: v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], [[VTWO]], [[VFOUR]], [[CC2]] -; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} +; GCN: v_mov_b32_e32 [[RES:v[0-9]+]], 2.0 +; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} define amdgpu_kernel void @different_constants(float %x) { %c1 = fcmp olt float %x, 1.0 %s1 = select i1 %c1, i32 56789, i32 1 diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll index 59abe69..693d06c 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll @@ -29,17 +29,15 @@ entry: ; CHECK: s_cbranch_scc1 [[BB4:BB[0-9]+_[0-9]+]] ; CHECK: buffer_load_dwordx4 -; CHECK: v_cndmask_b32_e64 -; CHECK: v_cndmask_b32_e64 -; CHECK: v_cndmask_b32_e64 +; CHECK: s_mov_b32 m0, +; CHECK: v_movrels_b32_e32 ; CHECK: s_branch [[ENDBB:BB[0-9]+_[0-9]+]] ; CHECK: [[BB4]]: ; CHECK: buffer_load_dwordx4 -; CHECK: v_cndmask_b32_e64 -; CHECK: v_cndmask_b32_e64 -; CHECK: v_cndmask_b32_e64 +; CHECK: s_mov_b32 m0, +; CHECK: v_movrels_b32_e32 ; CHECK: [[ENDBB]]: ; CHECK: buffer_store_dword -- 2.7.4