From a2202f6a3f1c596d805755c2221a5b5527a1416b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 20 Feb 2020 14:00:56 -0500 Subject: [PATCH] AMDGPU/GlobalISel: Manually RegBankSelect copies This was failng on any pre-assigned copy to the VCC bank. This is something of a workaround for the default implementation in getInstrMappingImpl, and how it treats copy-like operations in general. Copy-like operations are considered to only have one result register bank, rather than separate banks for each source like a normal instruction. To avoid potentially mishandling reg_sequence with impossible operand combinations, the generic implementation errors on impossible costs. If the bank was already assigned, is treated it as-if it were an unsatisfiable REG_SEQUENCE mapping. We really don't get any value from any of what getInstrMappingImpl tries to do for copies, so just directly emit the simple mapping we really want. --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 21 +++ .../AMDGPU/GlobalISel/regbankselect-copy.mir | 181 +++++++++++++++++++++ 2 files changed, 202 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-copy.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index abea934..da0f6b0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3080,6 +3080,27 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); + if (MI.isCopy()) { + // The default logic bothers to analyze impossible alternative mappings. We + // want the most straightforward mapping, so just directly handle this. + const RegisterBank *DstBank = getRegBank(MI.getOperand(0).getReg(), MRI, + *TRI); + const RegisterBank *SrcBank = getRegBank(MI.getOperand(1).getReg(), MRI, + *TRI); + assert(SrcBank && "src bank should have been assigned already"); + if (!DstBank) + DstBank = SrcBank; + + unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); + if (cannotCopy(*DstBank, *SrcBank, Size)) + return getInvalidInstructionMapping(); + + const ValueMapping &ValMap = getValueMapping(0, Size, *DstBank); + return getInstructionMapping( + 1, /*Cost*/ 1, + /*OperandsMapping*/ getOperandsMapping({&ValMap}), 1); + } + if (MI.isRegSequence()) { // If any input is a VGPR, the result must be a VGPR. The default handling // assumes any copy between banks is legal. diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-copy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-copy.mir new file mode 100644 index 0000000..00dd8f0 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-copy.mir @@ -0,0 +1,181 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx908 -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=gfx908 -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: copy_s32_vgpr_to_vgpr +legalized: true + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: copy_s32_vgpr_to_vgpr + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: $vgpr0 = COPY [[COPY]](s32) + %0:_(s32) = COPY $vgpr0 + $vgpr0 = COPY %0 + +... + +--- +name: copy_s32_sgpr_to_sgpr +legalized: true + +body: | + bb.0: + liveins: $sgpr0 + ; CHECK-LABEL: name: copy_s32_sgpr_to_sgpr + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: $sgpr0 = COPY [[COPY]](s32) + %0:_(s32) = COPY $sgpr0 + $sgpr0 = COPY %0 + +... + +--- +name: copy_s32_sgpr_to_vgpr +legalized: true + +body: | + bb.0: + liveins: $sgpr0 + ; CHECK-LABEL: name: copy_s32_sgpr_to_vgpr + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: $vgpr0 = COPY [[COPY]](s32) + %0:_(s32) = COPY $sgpr0 + $vgpr0 = COPY %0 + +... + +--- +name: copy_s32_vgpr_to_agpr +legalized: true + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: copy_s32_vgpr_to_agpr + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: $agpr0 = COPY [[COPY]](s32) + %0:_(s32) = COPY $vgpr0 + $agpr0 = COPY %0 + +... + +--- +name: copy_s32_sgpr_to_agpr +legalized: true + +body: | + bb.0: + liveins: $sgpr0 + ; CHECK-LABEL: name: copy_s32_sgpr_to_agpr + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: $agpr0 = COPY [[COPY]](s32) + %0:_(s32) = COPY $sgpr0 + $agpr0 = COPY %0 + +... + +--- +name: copy_s32_agpr_to_vgpr +legalized: true + +body: | + bb.0: + liveins: $agpr0 + ; CHECK-LABEL: name: copy_s32_agpr_to_vgpr + ; CHECK: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 + ; CHECK: $vgpr0 = COPY [[COPY]](s32) + %0:_(s32) = COPY $agpr0 + $vgpr0 = COPY %0 + +... + +--- +name: copy_s32_agpr_to_agpr +legalized: true + +body: | + bb.0: + liveins: $agpr0 + ; CHECK-LABEL: name: copy_s32_agpr_to_agpr + ; CHECK: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0 + ; CHECK: $agpr0 = COPY [[COPY]](s32) + %0:_(s32) = COPY $agpr0 + $agpr0 = COPY %0 + +... + +--- +name: copy_s1_sgpr_to_vcc_preassigned +legalized: true + +body: | + bb.0: + liveins: $sgpr0 + ; CHECK-LABEL: name: copy_s1_sgpr_to_vcc_preassigned + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; CHECK: S_ENDPGM 0, implicit [[COPY1]](s1) + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s1) = G_TRUNC %0 + %2:vcc(s1) = COPY %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: copy_s1_vgpr_to_vcc_preassigned +legalized: true + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: copy_s1_vgpr_to_vcc_preassigned + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; CHECK: S_ENDPGM 0, implicit [[COPY1]](s1) + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s1) = G_TRUNC %0 + %2:vcc(s1) = COPY %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: copy_s1_sgpr_to_vcc +legalized: true + +body: | + bb.0: + liveins: $sgpr0 + ; CHECK-LABEL: name: copy_s1_sgpr_to_vcc + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; CHECK: S_ENDPGM 0, implicit [[COPY1]](s1) + %0:_(s32) = COPY $sgpr0 + %1:_(s1) = G_TRUNC %0 + %2:vcc(s1) = COPY %1 + S_ENDPGM 0, implicit %2 +... + + +--- +name: copy_s1_vgpr_to_vcc +legalized: true + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: copy_s1_vgpr_to_vcc + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; CHECK: S_ENDPGM 0, implicit [[COPY1]](s1) + %0:_(s32) = COPY $vgpr0 + %1:_(s1) = G_TRUNC %0 + %2:vcc(s1) = COPY %1 + S_ENDPGM 0, implicit %2 +... -- 2.7.4