From d1bfc8d0c3a89124464e92fddd8f5991e7975dd2 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 31 Jan 2019 02:34:03 +0000 Subject: [PATCH] GlobalISel: Implement narrowScalar for bswap llvm-svn: 352719 --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 25 +++++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 6 +- .../CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir | 125 +++++++++++++++++++++ 3 files changed, 155 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index f8f39be..08ea4d9 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -948,7 +948,31 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_BSWAP: { + Observer.changingInstr(MI); + unsigned DstReg = MI.getOperand(0).getReg(); + + unsigned ShrReg = MRI.createGenericVirtualRegister(WideTy); + unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); + unsigned ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + + MI.getOperand(0).setReg(DstExt); + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); + + LLT Ty = MRI.getType(DstReg); + unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits(); + MIRBuilder.buildConstant(ShiftAmtReg, DiffBits); + MIRBuilder.buildInstr(TargetOpcode::G_LSHR) + .addDef(ShrReg) + .addUse(DstExt) + .addUse(ShiftAmtReg); + + MIRBuilder.buildTrunc(DstReg, ShrReg); + Observer.changedInstr(MI); + return Legalized; + } case TargetOpcode::G_ADD: case TargetOpcode::G_AND: case TargetOpcode::G_MUL: @@ -1879,6 +1903,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_FCOS: case G_FSIN: case G_FSQRT: + case G_BSWAP: return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy); case G_ZEXT: case G_SEXT: diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index cf1c075..7f39295 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -220,8 +220,12 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, .clampScalar(1, S32, S64); // TODO: Scalarize + // TODO: Expand for > s32 + getActionDefinitionsBuilder(G_BSWAP) + .legalFor({S32}) + .clampScalar(0, S32, S32) + .scalarize(0); - setAction({G_BSWAP, S32}, Legal); getActionDefinitionsBuilder(G_INTTOPTR) .legalIf([](const LegalityQuery &Query) { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir new file mode 100644 index 0000000..ed45be9 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir @@ -0,0 +1,125 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s + +--- +name: bswap_s8 + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: bswap_s8 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY1]] + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: $vgpr0 = COPY [[COPY2]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s8) = G_TRUNC %0 + %2:_(s8) = G_BSWAP %1 + %3:_(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: bswap_s16 + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: bswap_s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY1]] + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: $vgpr0 = COPY [[COPY2]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_TRUNC %0 + %2:_(s16) = G_BSWAP %1 + %3:_(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: bswap_s24 + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: bswap_s24 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY1]] + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: $vgpr0 = COPY [[COPY2]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s24) = G_TRUNC %0 + %2:_(s24) = G_BSWAP %1 + %3:_(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: bswap_s32 + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: bswap_s32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY]] + ; CHECK: $vgpr0 = COPY [[BSWAP]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = G_BSWAP %0 + $vgpr0 = COPY %1 +... + +--- +name: bswap_v2s16 + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: bswap_v2s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) + ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[ANYEXT]] + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) + ; CHECK: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[ANYEXT1]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP1]], [[C1]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = G_BSWAP %0 + $vgpr0 = COPY %1 +... + +--- +name: bswap_v2s32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: bswap_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV]] + ; CHECK: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BSWAP]](s32), [[BSWAP1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = G_BSWAP %0 + $vgpr0_vgpr1 = COPY %1 +... + -- 2.7.4