From 8de2dad9e0d729663f83d80f9a9e9b98ef41992e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 6 Feb 2020 21:11:52 -0500 Subject: [PATCH] GlobalISel: Fix lowering of G_CTLZ/G_CTTZ The type passed to lower was invalid, so I'm not sure how this was even working before. The source and destination type also do not have to match, so make sure to use the right ones. --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 50 +++++---- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 27 ++++- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 2 - .../CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir | 123 +++++++++++++++------ .../CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir | 102 ++++++++++++----- .../AMDGPU/GlobalISel/regbankselect-ctlz.mir | 31 ------ .../AMDGPU/GlobalISel/regbankselect-cttz.mir | 31 ------ .../ARM/GlobalISel/arm-legalize-bitcounts.mir | 4 +- .../CodeGen/GlobalISel/LegalizerHelperTest.cpp | 30 ++--- 9 files changed, 234 insertions(+), 166 deletions(-) delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctlz.mir delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-cttz.mir diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 4a9b295..88da57b 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -4077,16 +4077,20 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return Legalized; } case TargetOpcode::G_CTLZ: { + Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); - unsigned Len = Ty.getSizeInBits(); - if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) { + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(SrcReg); + unsigned Len = SrcTy.getSizeInBits(); + + if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) { // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero. - auto MIBCtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(Ty, SrcReg); - auto MIBZero = MIRBuilder.buildConstant(Ty, 0); - auto MIBLen = MIRBuilder.buildConstant(Ty, Len); - auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), - SrcReg, MIBZero); - MIRBuilder.buildSelect(MI.getOperand(0), MIBICmp, MIBLen, MIBCtlzZU); + auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg); + auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0); + auto ICmp = MIRBuilder.buildICmp( + CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc); + auto LenConst = MIRBuilder.buildConstant(DstTy, Len); + MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU); MI.eraseFromParent(); return Legalized; } @@ -4104,13 +4108,13 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { Register Op = SrcReg; unsigned NewLen = PowerOf2Ceil(Len); for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) { - auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i); - auto MIBOp = - MIRBuilder.buildOr(Ty, Op, MIRBuilder.buildLShr(Ty, Op, MIBShiftAmt)); + auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i); + auto MIBOp = MIRBuilder.buildOr( + SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt)); Op = MIBOp.getReg(0); } - auto MIBPop = MIRBuilder.buildCTPOP(Ty, Op); - MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(Ty, Len), + auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op); + MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len), MIBPop); MI.eraseFromParent(); return Legalized; @@ -4123,17 +4127,21 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return Legalized; } case TargetOpcode::G_CTTZ: { + Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); - unsigned Len = Ty.getSizeInBits(); - if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) { + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(SrcReg); + + unsigned Len = SrcTy.getSizeInBits(); + if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) { // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with // zero. - auto MIBCttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(Ty, SrcReg); - auto MIBZero = MIRBuilder.buildConstant(Ty, 0); - auto MIBLen = MIRBuilder.buildConstant(Ty, Len); - auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), - SrcReg, MIBZero); - MIRBuilder.buildSelect(MI.getOperand(0), MIBICmp, MIBLen, MIBCttzZU); + auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg); + auto Zero = MIRBuilder.buildConstant(SrcTy, 0); + auto ICmp = MIRBuilder.buildICmp( + CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero); + auto LenConst = MIRBuilder.buildConstant(DstTy, Len); + MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU); MI.eraseFromParent(); return Legalized; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 78eaab4..88ace05 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -465,7 +465,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .legalFor({{S64, S32}, {S32, S16}, {S64, S16}, {S32, S1}, {S64, S1}, {S16, S1}}) .scalarize(0) - .clampScalar(0, S32, S64); + .clampScalar(0, S32, S64) + .widenScalarToNextPow2(1, 32); // TODO: Split s1->s64 during regbankselect for VALU. auto &IToFP = getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) @@ -566,9 +567,27 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .scalarize(0); // The 64-bit versions produce 32-bit results, but only on the SALU. - getActionDefinitionsBuilder({G_CTLZ, G_CTLZ_ZERO_UNDEF, - G_CTTZ, G_CTTZ_ZERO_UNDEF, - G_CTPOP}) + getActionDefinitionsBuilder(G_CTPOP) + .legalFor({{S32, S32}, {S32, S64}}) + .clampScalar(0, S32, S32) + .clampScalar(1, S32, S64) + .scalarize(0) + .widenScalarToNextPow2(0, 32) + .widenScalarToNextPow2(1, 32); + + // The hardware instructions return a different result on 0 than the generic + // instructions expect. The hardware produces -1, but these produce the + // bitwidth. + getActionDefinitionsBuilder({G_CTLZ, G_CTTZ}) + .scalarize(0) + .clampScalar(0, S32, S32) + .clampScalar(1, S32, S64) + .widenScalarToNextPow2(0, 32) + .widenScalarToNextPow2(1, 32) + .lower(); + + // The 64-bit versions produce 32-bit results, but only on the SALU. + getActionDefinitionsBuilder({G_CTLZ_ZERO_UNDEF, G_CTTZ_ZERO_UNDEF}) .legalFor({{S32, S32}, {S32, S64}}) .clampScalar(0, S32, S32) .clampScalar(1, S32, S64) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index dedb7b0..8523fad 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -2947,9 +2947,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_BITCAST: case AMDGPU::G_INTTOPTR: case AMDGPU::G_PTRTOINT: - case AMDGPU::G_CTLZ: case AMDGPU::G_CTLZ_ZERO_UNDEF: - case AMDGPU::G_CTTZ: case AMDGPU::G_CTTZ_ZERO_UNDEF: case AMDGPU::G_CTPOP: case AMDGPU::G_BSWAP: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir index c7dcb54..373fe97 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir @@ -9,8 +9,12 @@ body: | liveins: $vgpr0 ; CHECK-LABEL: name: ctlz_s32_s32 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[COPY]](s32) - ; CHECK: $vgpr0 = COPY [[CTLZ]](s32) + ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[CTLZ_ZERO_UNDEF]] + ; CHECK: $vgpr0 = COPY [[SELECT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_CTLZ %0 $vgpr0 = COPY %1 @@ -24,8 +28,12 @@ body: | liveins: $vgpr0_vgpr1 ; CHECK-LABEL: name: ctlz_s32_s64 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[COPY]](s64) - ; CHECK: $vgpr0 = COPY [[CTLZ]](s32) + ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s64) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s64), [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[CTLZ_ZERO_UNDEF]] + ; CHECK: $vgpr0 = COPY [[SELECT]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CTLZ %0 $vgpr0 = COPY %1 @@ -39,8 +47,12 @@ body: | liveins: $vgpr0_vgpr1 ; CHECK-LABEL: name: ctlz_s64_s64 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[COPY]](s64) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTLZ]](s32) + ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s64) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s64), [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[CTLZ_ZERO_UNDEF]] + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[SELECT]](s32) ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = G_CTLZ %0 @@ -55,10 +67,14 @@ body: | liveins: $vgpr0 ; CHECK-LABEL: name: ctlz_s16_s32 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[COPY]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTLZ]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[CTLZ_ZERO_UNDEF]] + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; CHECK: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_CTLZ %0 @@ -77,9 +93,13 @@ body: | ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ]], [[C1]] + ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s32), [[C1]] + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[CTLZ_ZERO_UNDEF]] + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C3]] ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] @@ -100,9 +120,15 @@ body: | ; CHECK-LABEL: name: ctlz_v2s32_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[UV]](s32) - ; CHECK: [[CTLZ1:%[0-9]+]]:_(s32) = G_CTLZ [[UV1]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[CTLZ]](s32), [[CTLZ1]](s32) + ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV]](s32), [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[CTLZ_ZERO_UNDEF]] + ; CHECK: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV1]](s32) + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[C]] + ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C1]], [[CTLZ_ZERO_UNDEF1]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_CTLZ %0 @@ -118,9 +144,15 @@ body: | ; CHECK-LABEL: name: ctlz_v2s32_v2s64 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[UV]](s64) - ; CHECK: [[CTLZ1:%[0-9]+]]:_(s32) = G_CTLZ [[UV1]](s64) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[CTLZ]](s32), [[CTLZ1]](s32) + ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV]](s64) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV]](s64), [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[CTLZ_ZERO_UNDEF]] + ; CHECK: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV1]](s64) + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s64), [[C]] + ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C1]], [[CTLZ_ZERO_UNDEF1]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s32>) = G_CTLZ %0 @@ -141,13 +173,19 @@ body: | ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ]], [[C]] + ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s32), [[C2]] + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[CTLZ_ZERO_UNDEF]] + ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C]] ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; CHECK: [[CTLZ1:%[0-9]+]]:_(s32) = G_CTLZ [[AND1]](s32) - ; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[CTLZ1]], [[C]] + ; CHECK: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND1]](s32) + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s32), [[C2]] + ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C3]], [[CTLZ_ZERO_UNDEF1]] + ; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SELECT1]], [[C]] ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] @@ -174,9 +212,13 @@ body: | ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ]], [[C1]] + ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s32), [[C1]] + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[CTLZ_ZERO_UNDEF]] + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[C3]] ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] @@ -200,17 +242,21 @@ body: | ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] - ; CHECK: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s64) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTLZ]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 31 + ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s64) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s64), [[C1]] + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[CTLZ_ZERO_UNDEF]] + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[SELECT]](s32) + ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 31 ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64) + ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C3]](s64) ; CHECK: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] ; CHECK: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64) - ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[C2]](s64) + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[C4]](s64) ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[COPY3]] ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY [[AND1]](s64) ; CHECK: $vgpr0_vgpr1 = COPY [[COPY4]](s64) @@ -220,3 +266,16 @@ body: | %3:_(s64) = G_ANYEXT %2 $vgpr0_vgpr1 = COPY %3 ... + +# --- +# name: ctlz_v2s7_v2s7 + +# body: | +# bb.0: +# liveins: $vgpr0 +# %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 +# %1:_(<2 x s7>) = G_TRUNC %0 +# %2:_(<2 x s7>) = G_CTLZ %1 +# %3:_(<2 x s32>) = G_ANYEXT %2 +# $vgpr0_vgpr1 = COPY %3 +# ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir index 0edd6e8..88d05f9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir @@ -9,8 +9,12 @@ body: | liveins: $vgpr0 ; CHECK-LABEL: name: cttz_s32_s32 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[COPY]](s32) - ; CHECK: $vgpr0 = COPY [[CTTZ]](s32) + ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[CTTZ_ZERO_UNDEF]] + ; CHECK: $vgpr0 = COPY [[SELECT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_CTTZ %0 $vgpr0 = COPY %1 @@ -24,8 +28,12 @@ body: | liveins: $vgpr0_vgpr1 ; CHECK-LABEL: name: cttz_s32_s64 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[COPY]](s64) - ; CHECK: $vgpr0 = COPY [[CTTZ]](s32) + ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s64) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s64), [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[CTTZ_ZERO_UNDEF]] + ; CHECK: $vgpr0 = COPY [[SELECT]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CTTZ %0 $vgpr0 = COPY %1 @@ -39,8 +47,12 @@ body: | liveins: $vgpr0_vgpr1 ; CHECK-LABEL: name: cttz_s64_s64 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[COPY]](s64) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTTZ]](s32) + ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s64) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s64), [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[CTTZ_ZERO_UNDEF]] + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[SELECT]](s32) ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = G_CTTZ %0 @@ -55,10 +67,14 @@ body: | liveins: $vgpr0 ; CHECK-LABEL: name: cttz_s16_s32 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[COPY]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[CTTZ_ZERO_UNDEF]] + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] ; CHECK: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_CTTZ %0 @@ -79,8 +95,12 @@ body: | ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[C1]] - ; CHECK: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32) + ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[OR]](s32), [[C2]] + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[CTTZ_ZERO_UNDEF]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] ; CHECK: $vgpr0 = COPY [[AND1]](s32) @@ -100,9 +120,15 @@ body: | ; CHECK-LABEL: name: cttz_v2s32_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[UV]](s32) - ; CHECK: [[CTTZ1:%[0-9]+]]:_(s32) = G_CTTZ [[UV1]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[CTTZ]](s32), [[CTTZ1]](s32) + ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV]](s32), [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[CTTZ_ZERO_UNDEF]] + ; CHECK: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV1]](s32) + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[C]] + ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C1]], [[CTTZ_ZERO_UNDEF1]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_CTTZ %0 @@ -118,9 +144,15 @@ body: | ; CHECK-LABEL: name: cttz_v2s32_v2s64 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[UV]](s64) - ; CHECK: [[CTTZ1:%[0-9]+]]:_(s32) = G_CTTZ [[UV1]](s64) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[CTTZ]](s32), [[CTTZ1]](s32) + ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV]](s64) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV]](s64), [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[CTTZ_ZERO_UNDEF]] + ; CHECK: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UV1]](s64) + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s64), [[C]] + ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C1]], [[CTTZ_ZERO_UNDEF1]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s32>) = G_CTTZ %0 @@ -143,13 +175,19 @@ body: | ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[C2]] - ; CHECK: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32) + ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[OR]](s32), [[C3]] + ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C4]], [[CTTZ_ZERO_UNDEF]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[C2]] - ; CHECK: [[CTTZ1:%[0-9]+]]:_(s32) = G_CTTZ [[OR1]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[CTTZ1]](s32) + ; CHECK: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR1]](s32) + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[OR1]](s32), [[C3]] + ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C4]], [[CTTZ_ZERO_UNDEF1]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) @@ -177,8 +215,12 @@ body: | ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[C1]] - ; CHECK: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ]](s32) + ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[OR]](s32), [[C2]] + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[CTTZ_ZERO_UNDEF]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] ; CHECK: $vgpr0 = COPY [[AND1]](s32) @@ -203,11 +245,15 @@ body: | ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]] ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934592 ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[C1]] - ; CHECK: [[CTTZ:%[0-9]+]]:_(s32) = G_CTTZ [[OR]](s64) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTTZ]](s32) - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s64) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[OR]](s64), [[C2]] + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[CTTZ_ZERO_UNDEF]] + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[SELECT]](s32) + ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[ZEXT]](s64) - ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[C2]](s64) + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[C4]](s64) ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[COPY3]] ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY [[AND1]](s64) ; CHECK: $vgpr0_vgpr1 = COPY [[COPY4]](s64) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctlz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctlz.mir deleted file mode 100644 index 441bc89..0000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctlz.mir +++ /dev/null @@ -1,31 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s - ---- -name: ctlz_i32_s -legalized: true - -body: | - bb.0: - liveins: $sgpr0 - ; CHECK-LABEL: name: ctlz_i32_s - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK: [[CTLZ:%[0-9]+]]:sgpr(s32) = G_CTLZ [[COPY]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_CTLZ %0 -... - ---- -name: ctlz_i32_v -legalized: true - -body: | - bb.0: - liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: ctlz_i32_v - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[CTLZ:%[0-9]+]]:vgpr(s32) = G_CTLZ [[COPY]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CTLZ %0 -... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-cttz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-cttz.mir deleted file mode 100644 index c75d89b..0000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-cttz.mir +++ /dev/null @@ -1,31 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s - ---- -name: cttz_i32_s -legalized: true - -body: | - bb.0: - liveins: $sgpr0 - ; CHECK-LABEL: name: cttz_i32_s - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK: [[CTTZ:%[0-9]+]]:sgpr(s32) = G_CTTZ [[COPY]] - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = G_CTTZ %0 -... - ---- -name: cttz_i32_v -legalized: true - -body: | - bb.0: - liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: cttz_i32_v - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[CTTZ:%[0-9]+]]:vgpr(s32) = G_CTTZ [[COPY]] - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = G_CTTZ %0 -... diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-bitcounts.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-bitcounts.mir index ca41f45..68a0088 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-bitcounts.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-bitcounts.mir @@ -36,8 +36,8 @@ body: | ; LIBCALLS: ADJCALLSTACKUP ; LIBCALLS-NOT: G_CTLZ ; LIBCALLS: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; LIBCALLS: [[BITS:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; LIBCALLS: [[CMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[X]](s32), [[ZERO]] + ; LIBCALLS: [[BITS:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; LIBCALLS: [[R:%[0-9]+]]:_(s32) = G_SELECT [[CMP]](s1), [[BITS]], [[COUNT]] ; LIBCALLS-NOT: G_CTLZ %1(s32) = G_CTLZ %0 @@ -112,8 +112,8 @@ body: | ; LIBCALLS: ADJCALLSTACKUP ; LIBCALLS-NOT: G_CTLZ ; LIBCALLS: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; LIBCALLS: [[BITS:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; LIBCALLS: [[CMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), {{%[0-9]+}}(s32), [[ZERO]] + ; LIBCALLS: [[BITS:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; LIBCALLS: [[COUNT:%[0-9]+]]:_(s32) = G_SELECT [[CMP]](s1), [[BITS]], [[UNDEFCOUNT]] ; LIBCALLS-NOT: G_CTLZ ; CHECK: [[BITDIFF:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp index 5dfe741..5386f61 100644 --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp @@ -32,24 +32,24 @@ TEST_F(GISelMITest, LowerBitCountingCTTZ0) { // Declare your legalization info DefineLegalizerInfo(A, { - getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).legalFor({{s64, s64}}); + getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).legalFor({{s32, s64}}); }); // Build Instr auto MIBCTTZ = - B.buildInstr(TargetOpcode::G_CTTZ, {LLT::scalar(64)}, {Copies[0]}); + B.buildInstr(TargetOpcode::G_CTTZ, {LLT::scalar(32)}, {Copies[0]}); AInfo Info(MF->getSubtarget()); DummyGISelObserver Observer; LegalizerHelper Helper(*MF, Info, Observer, B); // Perform Legalization - EXPECT_TRUE(Helper.lower(*MIBCTTZ, 0, LLT::scalar(64)) == - LegalizerHelper::LegalizeResult::Legalized); + EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, + Helper.lower(*MIBCTTZ, 0, LLT::scalar(64))); auto CheckStr = R"( - CHECK: [[CZU:%[0-9]+]]:_(s64) = G_CTTZ_ZERO_UNDEF %0 + CHECK: [[CZU:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF %0 CHECK: [[ZERO:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - CHECK: [[SIXTY4:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 CHECK: [[CMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), %0:_(s64), [[ZERO]] - CHECK: [[SEL:%[0-9]+]]:_(s64) = G_SELECT [[CMP]]:_(s1), [[SIXTY4]]:_, [[CZU]] + CHECK: [[SIXTY4:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + CHECK: [[SEL:%[0-9]+]]:_(s32) = G_SELECT [[CMP]]:_(s1), [[SIXTY4]]:_, [[CZU]] )"; // Check @@ -235,8 +235,8 @@ TEST_F(GISelMITest, LowerBitCountingCTLZ0) { auto CheckStr = R"( CHECK: [[CZU:%[0-9]+]]:_(s64) = G_CTLZ_ZERO_UNDEF %0 CHECK: [[ZERO:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - CHECK: [[SIXTY4:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 CHECK: [[CMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), %0:_(s64), [[ZERO]] + CHECK: [[SIXTY4:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 CHECK: [[SEL:%[0-9]+]]:_(s64) = G_SELECT [[CMP]]:_(s1), [[SIXTY4]]:_, [[CZU]] )"; @@ -252,23 +252,23 @@ TEST_F(GISelMITest, LowerBitCountingCTLZLibcall) { // Declare your legalization info DefineLegalizerInfo(A, { - getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).libcallFor({{s64, s64}}); + getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).libcallFor({{s32, s64}}); }); // Build auto MIBCTLZ = - B.buildInstr(TargetOpcode::G_CTLZ, {LLT::scalar(64)}, {Copies[0]}); + B.buildInstr(TargetOpcode::G_CTLZ, {LLT::scalar(32)}, {Copies[0]}); AInfo Info(MF->getSubtarget()); DummyGISelObserver Observer; LegalizerHelper Helper(*MF, Info, Observer, B); - EXPECT_TRUE(Helper.lower(*MIBCTLZ, 0, LLT::scalar(64)) == - LegalizerHelper::LegalizeResult::Legalized); + EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, + Helper.lower(*MIBCTLZ, 0, LLT::scalar(32))); auto CheckStr = R"( - CHECK: [[CZU:%[0-9]+]]:_(s64) = G_CTLZ_ZERO_UNDEF %0 + CHECK: [[CZU:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF %0 CHECK: [[ZERO:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - CHECK: [[THIRTY2:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 CHECK: [[CMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), %0:_(s64), [[ZERO]] - CHECK: [[SEL:%[0-9]+]]:_(s64) = G_SELECT [[CMP]]:_(s1), [[THIRTY2]]:_, [[CZU]] + CHECK: [[THIRTY2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + CHECK: [[SEL:%[0-9]+]]:_(s32) = G_SELECT [[CMP]]:_(s1), [[THIRTY2]]:_, [[CZU]] )"; // Check -- 2.7.4