From 41f32196a0b3850fb578b741fb81f44dbd30a68b Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 28 Feb 2019 23:37:48 +0000 Subject: [PATCH] AMDGPU/GlobalISel: Implement select for G_EXTRACT Reviewers: arsenm Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D49714 llvm-svn: 355156 --- .../Target/AMDGPU/AMDGPUInstructionSelector.cpp | 24 +++++++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h | 1 + llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 7 ++ .../AMDGPU/GlobalISel/inst-select-extract.mir | 77 ++++++++++++++++++++++ 4 files changed, 109 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 399ade5..0619f1f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -160,6 +160,28 @@ bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const { return true; } +bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const { + MachineBasicBlock *BB = I.getParent(); + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + assert(I.getOperand(2).getImm() % 32 == 0); + unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32); + const DebugLoc &DL = I.getDebugLoc(); + MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY), + I.getOperand(0).getReg()) + .addReg(I.getOperand(1).getReg(), 0, SubReg); + + for (const MachineOperand &MO : Copy->operands()) { + const TargetRegisterClass *RC = + TRI.getConstrainedRegClassForOperand(MO, MRI); + if (!RC) + continue; + RBI.constrainGenericRegister(MO.getReg(), *RC, MRI); + } + I.eraseFromParent(); + return true; +} + bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const { return selectG_ADD(I); } @@ -509,6 +531,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I, case TargetOpcode::G_CONSTANT: case TargetOpcode::G_FCONSTANT: return selectG_CONSTANT(I); + case TargetOpcode::G_EXTRACT: + return selectG_EXTRACT(I); case TargetOpcode::G_GEP: return selectG_GEP(I); case TargetOpcode::G_IMPLICIT_DEF: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 639585a..dea13b9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -65,6 +65,7 @@ private: bool selectCOPY(MachineInstr &I) const; bool selectG_CONSTANT(MachineInstr &I) const; bool selectG_ADD(MachineInstr &I) const; + bool selectG_EXTRACT(MachineInstr &I) const; bool selectG_GEP(MachineInstr &I) const; bool selectG_IMPLICIT_DEF(MachineInstr &I) const; bool selectG_INTRINSIC(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index fdbafd9..11b03c2 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1587,6 +1587,7 @@ SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO, if (!RB) return nullptr; + Size = PowerOf2Ceil(Size); switch (Size) { case 32: return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass : @@ -1600,6 +1601,12 @@ SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO, case 128: return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass : &AMDGPU::SReg_128RegClass; + case 256: + return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_256RegClass : + &AMDGPU::SReg_256RegClass; + case 512: + return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_512RegClass : + &AMDGPU::SReg_512RegClass; default: llvm_unreachable("not implemented"); } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir new file mode 100644 index 0000000..5f4a1b4 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir @@ -0,0 +1,77 @@ +# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +--- +name: extract512 +legalized: true +regBankSelected: true + +# CHECK-LABEL: extract512 +# CHECK: [[BASE:%[0-9]+]]:sreg_512 = IMPLICIT_DEF +# CHECK: [[SGPR0:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub0 +# CHECK: [[SGPR1:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub1 +# CHECK: [[SGPR2:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub2 +# CHECK: [[SGPR3:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub3 +# CHECK: [[SGPR4:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub4 +# CHECK: [[SGPR5:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub5 +# CHECK: [[SGPR6:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub6 +# CHECK: [[SGPR7:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub7 +# CHECK: [[SGPR8:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub8 +# CHECK: [[SGPR9:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub9 +# CHECK: [[SGPR10:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub10 +# CHECK: [[SGPR11:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub11 +# CHECK: [[SGPR12:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub12 +# CHECK: [[SGPR13:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub13 +# CHECK: [[SGPR14:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub14 +# CHECK: [[SGPR15:%[0-9]+]]:sreg_32_xm0 = COPY [[BASE]].sub15 +# CHECK: $sgpr0 = COPY [[SGPR0]] +# CHECK: $sgpr1 = COPY [[SGPR1]] +# CHECK: $sgpr2 = COPY [[SGPR2]] +# CHECK: $sgpr3 = COPY [[SGPR3]] +# CHECK: $sgpr4 = COPY [[SGPR4]] +# CHECK: $sgpr5 = COPY [[SGPR5]] +# CHECK: $sgpr6 = COPY [[SGPR6]] +# CHECK: $sgpr7 = COPY [[SGPR7]] +# CHECK: $sgpr8 = COPY [[SGPR8]] +# CHECK: $sgpr9 = COPY [[SGPR9]] +# CHECK: $sgpr10 = COPY [[SGPR10]] +# CHECK: $sgpr11 = COPY [[SGPR11]] +# CHECK: $sgpr12 = COPY [[SGPR12]] +# CHECK: $sgpr13 = COPY [[SGPR13]] +# CHECK: $sgpr14 = COPY [[SGPR14]] +# CHECK: $sgpr15 = COPY [[SGPR15]] + +body: | + bb.0: + %0:sgpr(s512) = G_IMPLICIT_DEF + %1:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 0 + %2:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 32 + %3:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 64 + %4:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 96 + %5:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 128 + %6:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 160 + %7:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 192 + %8:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 224 + %9:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 256 + %10:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 288 + %11:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 320 + %12:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 352 + %13:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 384 + %14:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 416 + %15:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 448 + %16:sgpr(s32) = G_EXTRACT %0:sgpr(s512), 480 + $sgpr0 = COPY %1:sgpr(s32) + $sgpr1 = COPY %2:sgpr(s32) + $sgpr2 = COPY %3:sgpr(s32) + $sgpr3 = COPY %4:sgpr(s32) + $sgpr4 = COPY %5:sgpr(s32) + $sgpr5 = COPY %6:sgpr(s32) + $sgpr6 = COPY %7:sgpr(s32) + $sgpr7 = COPY %8:sgpr(s32) + $sgpr8 = COPY %9:sgpr(s32) + $sgpr9 = COPY %10:sgpr(s32) + $sgpr10 = COPY %11:sgpr(s32) + $sgpr11 = COPY %12:sgpr(s32) + $sgpr12 = COPY %13:sgpr(s32) + $sgpr13 = COPY %14:sgpr(s32) + $sgpr14 = COPY %15:sgpr(s32) + $sgpr15 = COPY %16:sgpr(s32) + SI_RETURN_TO_EPILOG $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15 -- 2.7.4