AMDGPU/GlobalISel: RegBankSelect for some DS intrinsics
authorMatt Arsenault <Matthew.Arsenault@amd.com>
Sat, 29 Jun 2019 00:33:13 +0000 (00:33 +0000)
committerMatt Arsenault <Matthew.Arsenault@amd.com>
Sat, 29 Jun 2019 00:33:13 +0000 (00:33 +0000)
llvm-svn: 364698

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.append.mir [new file with mode: 0644]
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.bpermute.mir [new file with mode: 0644]
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.consume.mir [new file with mode: 0644]
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.fmax.mir [new file with mode: 0644]
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.fmin.mir [new file with mode: 0644]
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.permute.mir [new file with mode: 0644]

index d2e7e4d..a58b995 100644 (file)
@@ -1042,7 +1042,11 @@ AMDGPURegisterBankInfo::getDefaultMappingAllVGPR(const MachineInstr &MI) const {
   SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
 
   for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
-    unsigned Size = getSizeInBits(MI.getOperand(I).getReg(), MRI, *TRI);
+    const MachineOperand &Op = MI.getOperand(I);
+    if (!Op.isReg())
+      continue;
+
+    unsigned Size = getSizeInBits(Op.getReg(), MRI, *TRI);
     OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
   }
 
@@ -1503,6 +1507,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     case Intrinsic::amdgcn_udot8:
     case Intrinsic::amdgcn_fdiv_fast:
       return getDefaultMappingVOP(MI);
+    case Intrinsic::amdgcn_ds_permute:
+    case Intrinsic::amdgcn_ds_bpermute:
+      return getDefaultMappingAllVGPR(MI);
     case Intrinsic::amdgcn_kernarg_segment_ptr:
     case Intrinsic::amdgcn_s_getpc:
     case Intrinsic::amdgcn_groupstaticsize: {
@@ -1592,6 +1599,15 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
       OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
       break;
     }
+    case Intrinsic::amdgcn_ds_append:
+    case Intrinsic::amdgcn_ds_consume:
+    case Intrinsic::amdgcn_ds_fadd:
+    case Intrinsic::amdgcn_ds_fmin:
+    case Intrinsic::amdgcn_ds_fmax:
+      return getDefaultMappingAllVGPR(MI);
+    case Intrinsic::amdgcn_ds_ordered_add:
+    case Intrinsic::amdgcn_ds_ordered_swap:
+      return getInvalidInstructionMapping();
     case Intrinsic::amdgcn_exp_compr:
       OpdsMapping[0] = nullptr; // IntrinsicID
       // FIXME: These are immediate values which can't be read from registers.
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.append.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.append.mir
new file mode 100644 (file)
index 0000000..0267f76
--- /dev/null
@@ -0,0 +1,36 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast  -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy  -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: ds_append_s
+legalized: true
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $sgpr0
+    ; CHECK-LABEL: name: ds_append_s
+    ; CHECK: liveins: $sgpr0
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
+    ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[COPY1]](p3), 0
+    %0:_(p3) = COPY $sgpr0
+    %1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), %0, 0
+
+...
+
+---
+name: ds_append_v
+legalized: true
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: ds_append_v
+    ; CHECK: liveins: $vgpr0
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+    ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[COPY]](p3), 0
+    %0:_(p3) = COPY $vgpr0
+    %1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), %0, 0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.bpermute.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.bpermute.mir
new file mode 100644 (file)
index 0000000..996a01a
--- /dev/null
@@ -0,0 +1,24 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast  -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy  -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: ds_bpermute_ss
+legalized: true
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+
+    ; CHECK-LABEL: name: ds_bpermute_ss
+    ; CHECK: liveins: $sgpr0, $sgpr1
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+    ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+    ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.bpermute), [[COPY2]](s32), [[COPY3]](s32)
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $sgpr1
+    %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.bpermute), %0, %1
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.consume.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.consume.mir
new file mode 100644 (file)
index 0000000..50dd920
--- /dev/null
@@ -0,0 +1,36 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast  -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy  -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: ds_consume_s
+legalized: true
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $sgpr0
+    ; CHECK-LABEL: name: ds_consume_s
+    ; CHECK: liveins: $sgpr0
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
+    ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[COPY1]](p3), 0
+    %0:_(p3) = COPY $sgpr0
+    %1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), %0, 0
+
+...
+
+---
+name: ds_consume_v
+legalized: true
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: ds_consume_v
+    ; CHECK: liveins: $vgpr0
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+    ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[COPY]](p3), 0
+    %0:_(p3) = COPY $vgpr0
+    %1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), %0, 0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.fmax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.fmax.mir
new file mode 100644 (file)
index 0000000..5415dbf
--- /dev/null
@@ -0,0 +1,83 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: ds_fmax_ss
+legalized: true
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+
+    ; CHECK-LABEL: name: ds_fmax_ss
+    ; CHECK: liveins: $sgpr0, $sgpr1
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
+    ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+    ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmax), [[COPY2]](p3), [[COPY3]](s32), 0, 0, 0
+    %0:_(p3) = COPY $sgpr0
+    %1:_(s32) = COPY $sgpr1
+    %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmax), %0, %1, 0, 0, 0
+
+...
+
+---
+name: ds_fmax_sv
+legalized: true
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    ; CHECK-LABEL: name: ds_fmax_sv
+    ; CHECK: liveins: $sgpr0, $vgpr0
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
+    ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmax), [[COPY2]](p3), [[COPY1]](s32), 0, 0, 0
+    %0:_(p3) = COPY $sgpr0
+    %1:_(s32) = COPY $vgpr0
+    %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmax), %0, %1, 0, 0, 0
+
+...
+
+---
+name: ds_fmax_vs
+legalized: true
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $sgpr0
+
+    ; CHECK-LABEL: name: ds_fmax_vs
+    ; CHECK: liveins: $vgpr0, $sgpr0
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+    ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmax), [[COPY]](p3), [[COPY2]](s32), 0, 0, 0
+    %0:_(p3) = COPY $vgpr0
+    %1:_(s32) = COPY $sgpr0
+    %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmax), %0, %1, 0, 0, 0
+
+...
+
+---
+name: ds_fmax_vv
+legalized: true
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: ds_fmax_vv
+    ; CHECK: liveins: $vgpr0, $vgpr1
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmax), [[COPY]](p3), [[COPY1]](s32), 0, 0, 0
+    %0:_(p3) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmax), %0, %1, 0, 0, 0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.fmin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.fmin.mir
new file mode 100644 (file)
index 0000000..0043de5
--- /dev/null
@@ -0,0 +1,83 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: ds_fmin_ss
+legalized: true
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+
+    ; CHECK-LABEL: name: ds_fmin_ss
+    ; CHECK: liveins: $sgpr0, $sgpr1
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
+    ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+    ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmin), [[COPY2]](p3), [[COPY3]](s32), 0, 0, 0
+    %0:_(p3) = COPY $sgpr0
+    %1:_(s32) = COPY $sgpr1
+    %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmin), %0, %1, 0, 0, 0
+
+...
+
+---
+name: ds_fmin_sv
+legalized: true
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    ; CHECK-LABEL: name: ds_fmin_sv
+    ; CHECK: liveins: $sgpr0, $vgpr0
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
+    ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmin), [[COPY2]](p3), [[COPY1]](s32), 0, 0, 0
+    %0:_(p3) = COPY $sgpr0
+    %1:_(s32) = COPY $vgpr0
+    %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmin), %0, %1, 0, 0, 0
+
+...
+
+---
+name: ds_fmin_vs
+legalized: true
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $sgpr0
+
+    ; CHECK-LABEL: name: ds_fmin_vs
+    ; CHECK: liveins: $vgpr0, $sgpr0
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+    ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmin), [[COPY]](p3), [[COPY2]](s32), 0, 0, 0
+    %0:_(p3) = COPY $vgpr0
+    %1:_(s32) = COPY $sgpr0
+    %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmin), %0, %1, 0, 0, 0
+
+...
+
+---
+name: ds_fmin_vv
+legalized: true
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: ds_fmin_vv
+    ; CHECK: liveins: $vgpr0, $vgpr1
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmin), [[COPY]](p3), [[COPY1]](s32), 0, 0, 0
+    %0:_(p3) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmin), %0, %1, 0, 0, 0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.permute.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.permute.mir
new file mode 100644 (file)
index 0000000..f622905
--- /dev/null
@@ -0,0 +1,24 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast  -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy  -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: ds_permute_ss
+legalized: true
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+
+    ; CHECK-LABEL: name: ds_permute_ss
+    ; CHECK: liveins: $sgpr0, $sgpr1
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+    ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+    ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.permute), [[COPY2]](s32), [[COPY3]](s32)
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $sgpr1
+    %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.permute), %0, %1
+
+...