From 045a8921d74d99e21b454ee8ef4f97d6c81b8cc1 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 6 Feb 2020 21:54:41 -0500 Subject: [PATCH] AMDGPU/GlobalISel: Select G_CTLZ_ZERO_UNDEF Directly select this rather than going through the intermediate instruction, which may provide some combine value in the future. --- llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 2 +- llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td | 8 ++- llvm/lib/Target/AMDGPU/SOPInstructions.td | 4 +- .../GlobalISel/inst-select-ctlz-zero-undef.mir | 82 ++++++++++++++++++++++ 4 files changed, 92 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctlz-zero-undef.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index 6396e87..3dcef2f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -140,7 +140,7 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; -def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index aaeace0..03a62e9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -283,8 +283,8 @@ def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>; def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>; def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>; -def AMDGPUffbh_u32 : SDNode<"AMDGPUISD::FFBH_U32", SDTIntUnaryOp>; -def AMDGPUffbh_i32_impl : SDNode<"AMDGPUISD::FFBH_I32", SDTIntUnaryOp>; +def AMDGPUffbh_u32_impl : SDNode<"AMDGPUISD::FFBH_U32", SDTIntBitCountUnaryOp>; +def AMDGPUffbh_i32_impl : SDNode<"AMDGPUISD::FFBH_I32", SDTIntBitCountUnaryOp>; def AMDGPUffbl_b32 : SDNode<"AMDGPUISD::FFBL_B32", SDTIntUnaryOp>; @@ -421,6 +421,10 @@ def AMDGPUffbh_i32 : PatFrags<(ops node:$src), [(int_amdgcn_sffbh node:$src), (AMDGPUffbh_i32_impl node:$src)]>; +def AMDGPUffbh_u32 : PatFrags<(ops node:$src), + [(ctlz_zero_undef node:$src), + (AMDGPUffbh_u32_impl node:$src)]>; + def AMDGPUpkrtz_f16_f32 : PatFrags<(ops node:$src0, node:$src1), [(int_amdgcn_cvt_pkrtz node:$src0, node:$src1), (AMDGPUpkrtz_f16_f32_impl node:$src0, node:$src1)]>; diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 93e6458..760078d 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -220,7 +220,9 @@ def S_FLBIT_I32_B32 : SOP1_32 <"s_flbit_i32_b32", [(set i32:$sdst, (AMDGPUffbh_u32 i32:$src0))] >; -def S_FLBIT_I32_B64 : SOP1_32_64 <"s_flbit_i32_b64">; +def S_FLBIT_I32_B64 : SOP1_32_64 <"s_flbit_i32_b64", + [(set i32:$sdst, (AMDGPUffbh_u32 i64:$src0))] +>; def S_FLBIT_I32 : SOP1_32 <"s_flbit_i32", [(set i32:$sdst, (AMDGPUffbh_i32 i32:$src0))] >; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctlz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctlz-zero-undef.mir new file mode 100644 index 0000000..8f51caa --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctlz-zero-undef.mir @@ -0,0 +1,82 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: ctlz_zero_undef_s32_ss +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: ctlz_zero_undef_s32_ss + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK: [[S_FLBIT_I32_B32_:%[0-9]+]]:sreg_32 = S_FLBIT_I32_B32 [[COPY]] + ; CHECK: S_ENDPGM 0, implicit [[S_FLBIT_I32_B32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = G_CTLZ_ZERO_UNDEF %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: ctlz_zero_undef_s32_vs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: ctlz_zero_undef_s32_vs + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK: [[V_FFBH_U32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e64 [[COPY]], implicit $exec + ; CHECK: S_ENDPGM 0, implicit [[V_FFBH_U32_e64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = G_CTLZ_ZERO_UNDEF %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: ctlz_zero_undef_s32_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: ctlz_zero_undef_s32_vv + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[V_FFBH_U32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e64 [[COPY]], implicit $exec + ; CHECK: S_ENDPGM 0, implicit [[V_FFBH_U32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = G_CTLZ_ZERO_UNDEF %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: ctlz_zero_undef_s64_ss +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: ctlz_zero_undef_s64_ss + ; CHECK: liveins: $sgpr0_sgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK: [[S_FLBIT_I32_B64_:%[0-9]+]]:sreg_32 = S_FLBIT_I32_B64 [[COPY]] + ; CHECK: S_ENDPGM 0, implicit [[S_FLBIT_I32_B64_]] + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:sgpr(s32) = G_CTLZ_ZERO_UNDEF %0 + S_ENDPGM 0, implicit %1 +... -- 2.7.4