From af162ac785dd6aa371afc0f111544c82d72c5242 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 12 Aug 2020 12:34:46 -0400 Subject: [PATCH] AMDGPU/GlobalISel: Fix using readfirstlane with ballot intrinsics This should use the default mapping and insert a copy to the vcc bank, and not try to insert a readfirstlane. --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 4 +- .../GlobalISel/regbankselect-amdgcn.ballot.i64.mir | 64 ++++++++++++++++++++++ 2 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ballot.i64.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index eb41e56..8f0f53d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -2987,7 +2987,6 @@ void AMDGPURegisterBankInfo::applyMappingImpl( constrainOpWithReadfirstlane(MI, MRI, 3); // Index return; } - case Intrinsic::amdgcn_ballot: case Intrinsic::amdgcn_interp_p1: case Intrinsic::amdgcn_interp_p2: case Intrinsic::amdgcn_interp_mov: @@ -3015,6 +3014,9 @@ void AMDGPURegisterBankInfo::applyMappingImpl( case Intrinsic::amdgcn_ubfe: applyMappingBFEIntrinsic(OpdMapper, false); return; + case Intrinsic::amdgcn_ballot: + // Use default handling and insert copy to vcc source. + break; } break; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ballot.i64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ballot.i64.mir new file mode 100644 index 0000000..15e422c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ballot.i64.mir @@ -0,0 +1,64 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s + +--- +name: ballot_sgpr_src +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0 + ; CHECK-LABEL: name: ballot_sgpr_src + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; CHECK: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[COPY1]](s1) + ; CHECK: S_ENDPGM 0, implicit [[INT]](s64) + %0:_(s32) = COPY $sgpr0 + %1:_(s1) = G_TRUNC %0 + %2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: ballot_vgpr_src +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: ballot_vgpr_src + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; CHECK: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[COPY1]](s1) + ; CHECK: S_ENDPGM 0, implicit [[INT]](s64) + %0:_(s32) = COPY $vgpr0 + %1:_(s1) = G_TRUNC %0 + %2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: ballot_vcc_src +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: ballot_vcc_src + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] + ; CHECK: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1) + ; CHECK: S_ENDPGM 0, implicit [[INT]](s64) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s1) = G_ICMP intpred(eq), %0, %1 + %3:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), %2 + S_ENDPGM 0, implicit %3 +... -- 2.7.4