From 182f9248e8f2c11e5aeeb08263c5b56dbf1ea9c6 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 9 Sep 2019 17:04:18 +0000 Subject: [PATCH] AMDGPU/GlobalISel: Legalize G_BUILD_VECTOR_TRUNC Treat this as legal on gfx9 since it can use S_PACK_* instructions for this. This isn't used by anything yet. The same will probably apply to 16-bit G_BUILD_VECTOR without the trunc. llvm-svn: 371423 --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 9 +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 57 +++++++++++++++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 4 ++ .../GlobalISel/legalize-build-vector-trunc.mir | 19 +++++ .../regbankselect-build-vector-trunc.mir | 83 ++++++++++++++++++++++ 5 files changed, 172 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector-trunc.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 6a08f04..82c738d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -719,6 +719,15 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .legalIf(isRegisterType(0)) .minScalarOrElt(0, S32); + if (ST.hasScalarPackInsts()) { + getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC) + .legalFor({V2S16, S32}) + .lower(); + } else { + getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC) + .lower(); + } + getActionDefinitionsBuilder(G_CONCAT_VECTORS) .legalIf(isRegisterType(0)); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index c573111..4a245099 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1305,6 +1305,44 @@ void AMDGPURegisterBankInfo::applyMappingImpl( MI.eraseFromParent(); return; } + case AMDGPU::G_BUILD_VECTOR_TRUNC: { + assert(MI.getNumOperands() == 3 && empty(OpdMapper.getVRegs(0))); + substituteSimpleCopyRegs(OpdMapper, 1); + substituteSimpleCopyRegs(OpdMapper, 2); + + Register DstReg = MI.getOperand(0).getReg(); + const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI); + if (DstBank == &AMDGPU::SGPRRegBank) + break; // Can use S_PACK_* instructions. + + MachineIRBuilder B(MI); + + Register Lo = MI.getOperand(1).getReg(); + Register Hi = MI.getOperand(2).getReg(); + + const RegisterBank *BankLo = getRegBank(Lo, MRI, *TRI); + const RegisterBank *BankHi = getRegBank(Hi, MRI, *TRI); + + const LLT S32 = LLT::scalar(32); + auto MaskLo = B.buildConstant(S32, 0xffff); + MRI.setRegBank(MaskLo.getReg(0), *BankLo); + + auto ShiftAmt = B.buildConstant(S32, 16); + MRI.setRegBank(ShiftAmt.getReg(0), *BankHi); + + auto ShiftHi = B.buildShl(S32, Hi, ShiftAmt); + MRI.setRegBank(ShiftHi.getReg(0), *BankHi); + + auto Masked = B.buildAnd(S32, Lo, MaskLo); + MRI.setRegBank(Masked.getReg(0), *BankLo); + + auto Or = B.buildOr(S32, Masked, ShiftHi); + MRI.setRegBank(Or.getReg(0), *DstBank); + + B.buildBitcast(DstReg, Or); + MI.eraseFromParent(); + return; + } case AMDGPU::G_EXTRACT_VECTOR_ELT: applyDefaultMapping(OpdMapper); executeInWaterfallLoop(MI, MRI, { 2 }); @@ -1513,6 +1551,11 @@ AMDGPURegisterBankInfo::getRegBankID(Register Reg, return Bank ? Bank->getID() : Default; } +static unsigned regBankUnion(unsigned RB0, unsigned RB1) { + return (RB0 == AMDGPU::SGPRRegBankID && RB1 == AMDGPU::SGPRRegBankID) ? + AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; +} + /// /// This function must return a legal mapping, because /// AMDGPURegisterBankInfo::getInstrAlternativeMappings() is not called @@ -1774,6 +1817,20 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize); break; } + case AMDGPU::G_BUILD_VECTOR_TRUNC: { + assert(MI.getNumOperands() == 3); + + unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + unsigned Src0BankID = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI); + unsigned Src1BankID = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI); + unsigned DstBankID = regBankUnion(Src0BankID, Src1BankID); + + OpdsMapping[0] = AMDGPU::getValueMapping(DstBankID, DstSize); + OpdsMapping[1] = AMDGPU::getValueMapping(Src0BankID, SrcSize); + OpdsMapping[2] = AMDGPU::getValueMapping(Src1BankID, SrcSize); + break; + } case AMDGPU::G_BITCAST: case AMDGPU::G_INTTOPTR: case AMDGPU::G_PTRTOINT: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 2da723a..c9d305a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -551,6 +551,10 @@ public: return GFX9Insts; } + bool hasScalarPackInsts() const { + return GFX9Insts; + } + TrapHandlerAbi getTrapHandlerAbi() const { return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir new file mode 100644 index 0000000..b57c036 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir @@ -0,0 +1,19 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s + +--- +name: legal_s32_to_v2s16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX9-LABEL: name: legal_s32_to_v2s16 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32) + ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x s16>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 + S_NOP 0, implicit %2 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector-trunc.mir new file mode 100644 index 0000000..9a9ef25 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-build-vector-trunc.mir @@ -0,0 +1,83 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s + +--- +name: build_vector_trunc_v2s16_s32_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; CHECK-LABEL: name: build_vector_trunc_v2s16_s32_ss + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 +... + +--- +name: build_vector_trunc_v2s16_s32_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + + ; CHECK-LABEL: name: build_vector_trunc_v2s16_s32_sv + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535 + ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:vgpr(s32) = G_SHL [[COPY1]], [[C1]](s32) + ; CHECK: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY]], [[C]] + ; CHECK: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[BITCAST:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 +... + +--- +name: build_vector_trunc_v2s16_s32_vs +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + ; CHECK-LABEL: name: build_vector_trunc_v2s16_s32_vs + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 65535 + ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[COPY1]], [[C1]](s32) + ; CHECK: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY]], [[C]] + ; CHECK: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[BITCAST:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr0 + %2:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 +... + +--- +name: build_vector_trunc_v2s16_s32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: build_vector_trunc_v2s16_s32_vv + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 65535 + ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:vgpr(s32) = G_SHL [[COPY1]], [[C1]](s32) + ; CHECK: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY]], [[C]] + ; CHECK: [[OR:%[0-9]+]]:vgpr(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[BITCAST:%[0-9]+]]:vgpr(<2 x s16>) = G_BITCAST [[OR]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 +... -- 2.7.4