GlobalISel: Implement narrowScalar for bswap
authorMatt Arsenault <Matthew.Arsenault@amd.com>
Thu, 31 Jan 2019 02:34:03 +0000 (02:34 +0000)
committerMatt Arsenault <Matthew.Arsenault@amd.com>
Thu, 31 Jan 2019 02:34:03 +0000 (02:34 +0000)
llvm-svn: 352719

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir [new file with mode: 0644]

index f8f39be..08ea4d9 100644 (file)
@@ -948,7 +948,31 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
     Observer.changedInstr(MI);
     return Legalized;
   }
+  case TargetOpcode::G_BSWAP: {
+    Observer.changingInstr(MI);
+    unsigned DstReg = MI.getOperand(0).getReg();
+
+    unsigned ShrReg = MRI.createGenericVirtualRegister(WideTy);
+    unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
+    unsigned ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
+    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+
+    MI.getOperand(0).setReg(DstExt);
 
+    MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+
+    LLT Ty = MRI.getType(DstReg);
+    unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
+    MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
+    MIRBuilder.buildInstr(TargetOpcode::G_LSHR)
+      .addDef(ShrReg)
+      .addUse(DstExt)
+      .addUse(ShiftAmtReg);
+
+    MIRBuilder.buildTrunc(DstReg, ShrReg);
+    Observer.changedInstr(MI);
+    return Legalized;
+  }
   case TargetOpcode::G_ADD:
   case TargetOpcode::G_AND:
   case TargetOpcode::G_MUL:
@@ -1879,6 +1903,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
   case G_FCOS:
   case G_FSIN:
   case G_FSQRT:
+  case G_BSWAP:
     return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy);
   case G_ZEXT:
   case G_SEXT:
index cf1c075..7f39295 100644 (file)
@@ -220,8 +220,12 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
     .clampScalar(1, S32, S64);
   // TODO: Scalarize
 
+  // TODO: Expand for > s32
+  getActionDefinitionsBuilder(G_BSWAP)
+    .legalFor({S32})
+    .clampScalar(0, S32, S32)
+    .scalarize(0);
 
-  setAction({G_BSWAP, S32}, Legal);
 
   getActionDefinitionsBuilder(G_INTTOPTR)
     .legalIf([](const LegalityQuery &Query) {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bswap.mir
new file mode 100644 (file)
index 0000000..ed45be9
--- /dev/null
@@ -0,0 +1,125 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s
+
+---
+name: bswap_s8
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: bswap_s8
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY1]]
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s32)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: $vgpr0 = COPY [[COPY2]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s8) = G_TRUNC %0
+    %2:_(s8) = G_BSWAP %1
+    %3:_(s32) = G_ANYEXT %2
+    $vgpr0 = COPY %3
+...
+
+---
+name: bswap_s16
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: bswap_s16
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY1]]
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s32)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: $vgpr0 = COPY [[COPY2]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s16) = G_TRUNC %0
+    %2:_(s16) = G_BSWAP %1
+    %3:_(s32) = G_ANYEXT %2
+    $vgpr0 = COPY %3
+...
+
+---
+name: bswap_s24
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: bswap_s24
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY1]]
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s32)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: $vgpr0 = COPY [[COPY2]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s24) = G_TRUNC %0
+    %2:_(s24) = G_BSWAP %1
+    %3:_(s32) = G_ANYEXT %2
+    $vgpr0 = COPY %3
+...
+
+---
+name: bswap_s32
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: bswap_s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY]]
+    ; CHECK: $vgpr0 = COPY [[BSWAP]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = G_BSWAP %0
+    $vgpr0 = COPY %1
+...
+
+---
+name: bswap_v2s16
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: bswap_v2s16
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16)
+    ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[ANYEXT]]
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s32)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16)
+    ; CHECK: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[ANYEXT1]]
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP1]], [[C1]](s32)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+    ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(<2 x s16>) = G_BSWAP %0
+    $vgpr0 = COPY %1
+...
+
+---
+name: bswap_v2s32
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; CHECK-LABEL: name: bswap_v2s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[UV]]
+    ; CHECK: [[BSWAP1:%[0-9]+]]:_(s32) = G_BSWAP [[UV1]]
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BSWAP]](s32), [[BSWAP1]](s32)
+    ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:_(<2 x s32>) = G_BSWAP %0
+    $vgpr0_vgpr1 = COPY %1
+...
+