From 47d0780f454d3f7c42bdba13c0682bf2a0095bca Mon Sep 17 00:00:00 2001 From: Jessica Paquette Date: Tue, 29 Jun 2021 17:01:28 -0700 Subject: [PATCH] [GlobalISel] Handle more types in narrowScalar for eq/ne G_ICMP Generalize the existing eq/ne case using `extractParts`. The original code only handled narrowings for types of width 2n->n. This generalization allows for any type that can be broken down by `extractParts`. General overview is: - Loop over each narrow-sized part and do exactly what the 2-register case did. - Loop over the leftover-sized parts and do the same thing - Widen the leftover-sized XOR results to the desired narrow size - OR that all together and then do the comparison against 0 (just like the old code) This shows up a lot when building clang for AArch64 using GlobalISel, so it's worth fixing. For the sake of simplicity, this doesn't handle the non-eq/ne case yet. Also remove the code in this case that notifies the observer; we're just going to delete MI anyway so talking to the observer shouldn't be necessary. Differential Revision: https://reviews.llvm.org/D105161 --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 85 ++++++++--- .../CodeGen/AArch64/GlobalISel/legalize-cmp.mir | 166 +++++++++++++++++++++ 2 files changed, 230 insertions(+), 21 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 40f6f4a..b31cf3f 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1063,38 +1063,81 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, return Legalized; } case TargetOpcode::G_ICMP: { - uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); - if (NarrowSize * 2 != SrcSize) + Register LHS = MI.getOperand(2).getReg(); + LLT SrcTy = MRI.getType(LHS); + uint64_t SrcSize = SrcTy.getSizeInBits(); + CmpInst::Predicate Pred = + static_cast(MI.getOperand(1).getPredicate()); + + // TODO: Handle the non-equality case for weird sizes. + if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred)) return UnableToLegalize; - Observer.changingInstr(MI); - Register LHSL = MRI.createGenericVirtualRegister(NarrowTy); - Register LHSH = MRI.createGenericVirtualRegister(NarrowTy); - MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2)); + LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover) + SmallVector LHSPartRegs, LHSLeftoverRegs; + if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs, + LHSLeftoverRegs)) + return UnableToLegalize; + + LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type. + SmallVector RHSPartRegs, RHSLeftoverRegs; + if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused, + RHSPartRegs, RHSLeftoverRegs)) + return UnableToLegalize; - Register RHSL = MRI.createGenericVirtualRegister(NarrowTy); - Register RHSH = MRI.createGenericVirtualRegister(NarrowTy); - MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3)); + // We now have the LHS and RHS of the compare split into narrow-type + // registers, plus potentially some leftover type. + Register Dst = MI.getOperand(0).getReg(); + LLT ResTy = MRI.getType(Dst); + if (ICmpInst::isEquality(Pred)) { + // For each part on the LHS and RHS, keep track of the result of XOR-ing + // them together. For each equal part, the result should be all 0s. For + // each non-equal part, we'll get at least one 1. + auto Zero = MIRBuilder.buildConstant(NarrowTy, 0); + SmallVector Xors; + for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) { + auto LHS = std::get<0>(LHSAndRHS); + auto RHS = std::get<1>(LHSAndRHS); + auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0); + Xors.push_back(Xor); + } - CmpInst::Predicate Pred = - static_cast(MI.getOperand(1).getPredicate()); - LLT ResTy = MRI.getType(MI.getOperand(0).getReg()); - - if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) { - MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL); - MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH); - MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH); - MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0); - MIRBuilder.buildICmp(Pred, MI.getOperand(0), Or, Zero); + // Build a G_XOR for each leftover register. Each G_XOR must be widened + // to the desired narrow type so that we can OR them together later. + SmallVector WidenedXors; + for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) { + auto LHS = std::get<0>(LHSAndRHS); + auto RHS = std::get<1>(LHSAndRHS); + auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0); + LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor); + buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors, + /* PadStrategy = */ TargetOpcode::G_ZEXT); + Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end()); + } + + // Now, for each part we broke up, we know if they are equal/not equal + // based off the G_XOR. We can OR these all together and compare against + // 0 to get the result. + assert(Xors.size() >= 2 && "Should have gotten at least two Xors?"); + auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]); + for (unsigned I = 2, E = Xors.size(); I < E; ++I) + Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]); + MIRBuilder.buildICmp(Pred, Dst, Or, Zero); } else { + // TODO: Handle non-power-of-two types. + assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?"); + assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?"); + Register LHSL = LHSPartRegs[0]; + Register LHSH = LHSPartRegs[1]; + Register RHSL = RHSPartRegs[0]; + Register RHSH = RHSPartRegs[1]; MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH); MachineInstrBuilder CmpHEQ = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH); MachineInstrBuilder CmpLU = MIRBuilder.buildICmp( ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL); - MIRBuilder.buildSelect(MI.getOperand(0), CmpHEQ, CmpLU, CmpH); + MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH); } - Observer.changedInstr(MI); MI.eraseFromParent(); return Legalized; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir index d4c3d43..8a9bf79 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir @@ -79,3 +79,169 @@ body: | RET_ReallyLR ... +--- +name: test_s128_eq +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: test_s128_eq + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[DEF]], [[DEF]] + ; CHECK: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[DEF]], [[DEF]] + ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[XOR1]] + ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s64), [[C]] + ; CHECK: %cmp:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK: G_BRCOND %cmp(s1), %bb.1 + ; CHECK: G_BR %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: + ; CHECK: bb.2: + ; CHECK: RET_ReallyLR + bb.1: + %lhs:_(s128) = G_IMPLICIT_DEF + %rhs:_(s128) = G_IMPLICIT_DEF + %cmp:_(s1) = G_ICMP intpred(eq), %lhs(s128), %rhs + G_BRCOND %cmp(s1), %bb.2 + G_BR %bb.3 + bb.2: + successors: + bb.3: + RET_ReallyLR + +... +--- +name: test_s88_eq +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: test_s88_eq + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF]](s64) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8), [[UV2]](s8), [[UV3]](s8), [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[UV7]](s8) + ; CHECK: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF]](s64) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV11]](s8), [[UV12]](s8), [[UV13]](s8), [[UV14]](s8), [[UV15]](s8) + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64) + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[MV1]](s64) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[COPY1]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) + ; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[COPY3]] + ; CHECK: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[XOR1]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 + ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8) + ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[MV2]] + ; CHECK: [[OR1:%[0-9]+]]:_(s64) = G_OR [[OR]], [[C]] + ; CHECK: [[OR2:%[0-9]+]]:_(s64) = G_OR [[OR1]], [[C]] + ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR2]](s64), [[C]] + ; CHECK: %cmp:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK: G_BRCOND %cmp(s1), %bb.1 + ; CHECK: G_BR %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: + ; CHECK: bb.2: + ; CHECK: RET_ReallyLR + bb.1: + %lhs:_(s88) = G_IMPLICIT_DEF + %rhs:_(s88) = G_IMPLICIT_DEF + %cmp:_(s1) = G_ICMP intpred(eq), %lhs(s88), %rhs + G_BRCOND %cmp(s1), %bb.2 + G_BR %bb.3 + bb.2: + successors: + bb.3: + RET_ReallyLR + +... +--- +name: test_s88_ne +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: test_s88_ne + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF]](s64) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8), [[UV2]](s8), [[UV3]](s8), [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[UV7]](s8) + ; CHECK: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF]](s64) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV11]](s8), [[UV12]](s8), [[UV13]](s8), [[UV14]](s8), [[UV15]](s8) + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64) + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[MV1]](s64) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[COPY1]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC1]](s32) + ; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[COPY3]] + ; CHECK: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[XOR1]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 + ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8) + ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[MV2]] + ; CHECK: [[OR1:%[0-9]+]]:_(s64) = G_OR [[OR]], [[C]] + ; CHECK: [[OR2:%[0-9]+]]:_(s64) = G_OR [[OR1]], [[C]] + ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[OR2]](s64), [[C]] + ; CHECK: %cmp:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK: G_BRCOND %cmp(s1), %bb.1 + ; CHECK: G_BR %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: + ; CHECK: bb.2: + ; CHECK: RET_ReallyLR + bb.1: + %lhs:_(s88) = G_IMPLICIT_DEF + %rhs:_(s88) = G_IMPLICIT_DEF + %cmp:_(s1) = G_ICMP intpred(ne), %lhs(s88), %rhs + G_BRCOND %cmp(s1), %bb.2 + G_BR %bb.3 + bb.2: + successors: + bb.3: + RET_ReallyLR + +... +--- +name: test_s96_eq +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: test_s96_eq + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) + ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[UV3]](s32) + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s64), 0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[MV1]](s64) + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s64), 0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[COPY1]] + ; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[EXTRACT]], [[EXTRACT1]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[XOR1]](s32), [[C1]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[MV2]] + ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s64), [[C]] + ; CHECK: %cmp:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK: G_BRCOND %cmp(s1), %bb.1 + ; CHECK: G_BR %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: + ; CHECK: bb.2: + ; CHECK: RET_ReallyLR + bb.1: + %lhs:_(s96) = G_IMPLICIT_DEF + %rhs:_(s96) = G_IMPLICIT_DEF + %cmp:_(s1) = G_ICMP intpred(eq), %lhs(s96), %rhs + G_BRCOND %cmp(s1), %bb.2 + G_BR %bb.3 + bb.2: + successors: + bb.3: + RET_ReallyLR -- 2.7.4