From 95ac3d15e9fe86d9b51b51d02cb3c1640bf30dee Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Wed, 18 Aug 2021 00:19:58 -0700 Subject: [PATCH] [AArch64][GlobalISel] Add G_VECREDUCE fewerElements support for full scalarization. For some reductions like G_VECREDUCE_OR on AArch64, we need to scalarize completely if the source is <= 64b. This change adds support for that in the legalizer. If the source has a pow-2 num elements, then we can do a tree reduction using the scalar operation in the individual elements. Otherwise, we just create a sequential chain of operations. For AArch64, we only need to scalarize if the input is <64b. If it's great than 64b then we can first do a fewElements step to 64b, taking advantage of vector instructions until we reach the point of scalarization. I also had to relax the verifier checks for reductions because the intrinsics support <1 x EltTy> types, which we lower to scalars for GlobalISel. Differential Revision: https://reviews.llvm.org/D108276 --- .../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 1 + llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 124 +++- llvm/lib/CodeGen/MachineVerifier.cpp | 3 - .../Target/AArch64/GISel/AArch64LegalizerInfo.cpp | 21 + .../AArch64/GlobalISel/legalize-reduce-or.mir | 701 +++++++++++++++++++++ .../GlobalISel/legalizer-info-validation.mir | 4 +- llvm/test/CodeGen/AArch64/reduce-or.ll | 318 ++++++++++ .../MachineVerifier/test_vector_reductions.mir | 2 - 8 files changed, 1137 insertions(+), 37 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/legalize-reduce-or.mir diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index e804c9d..678efda 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -403,6 +403,7 @@ public: LegalizeResult lowerAbsToAddXor(MachineInstr &MI); LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI); LegalizeResult lowerIsNaN(MachineInstr &MI); + LegalizeResult lowerVectorReduction(MachineInstr &MI); }; /// Helper function that creates a libcall to the given \p Name using the given diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 611cf10..463437a 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3489,6 +3489,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { return lowerRotate(MI); case G_ISNAN: return lowerIsNaN(MI); + GISEL_VECREDUCE_CASES_NONSEQ + return lowerVectorReduction(MI); } } @@ -4637,35 +4639,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle( return Legalized; } -LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions( - MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) { - unsigned Opc = MI.getOpcode(); - assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD && - Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL && - "Sequential reductions not expected"); - - if (TypeIdx != 1) - return UnableToLegalize; - - // The semantics of the normal non-sequential reductions allow us to freely - // re-associate the operation. - Register SrcReg = MI.getOperand(1).getReg(); - LLT SrcTy = MRI.getType(SrcReg); - Register DstReg = MI.getOperand(0).getReg(); - LLT DstTy = MRI.getType(DstReg); - - if (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0) - return UnableToLegalize; - - SmallVector SplitSrcs; - const unsigned NumParts = SrcTy.getNumElements() / NarrowTy.getNumElements(); - extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs); - SmallVector PartialReductions; - for (unsigned Part = 0; Part < NumParts; ++Part) { - PartialReductions.push_back( - MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0)); - } - +static unsigned getScalarOpcForReduction(unsigned Opc) { unsigned ScalarOpc; switch (Opc) { case TargetOpcode::G_VECREDUCE_FADD: @@ -4708,10 +4682,81 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions( ScalarOpc = TargetOpcode::G_UMIN; break; default: - LLVM_DEBUG(dbgs() << "Can't legalize: unknown reduction kind.\n"); + llvm_unreachable("Unhandled reduction"); + } + return ScalarOpc; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions( + MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) { + unsigned Opc = MI.getOpcode(); + assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD && + Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL && + "Sequential reductions not expected"); + + if (TypeIdx != 1) return UnableToLegalize; + + // The semantics of the normal non-sequential reductions allow us to freely + // re-associate the operation. + Register SrcReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + + if (NarrowTy.isVector() && + (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0)) + return UnableToLegalize; + + unsigned ScalarOpc = getScalarOpcForReduction(Opc); + SmallVector SplitSrcs; + // If NarrowTy is a scalar then we're being asked to scalarize. + const unsigned NumParts = + NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements() + : SrcTy.getNumElements(); + + extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs); + if (NarrowTy.isScalar()) { + if (DstTy != NarrowTy) + return UnableToLegalize; // FIXME: handle implicit extensions. + + if (isPowerOf2_32(NumParts)) { + // Generate a tree of scalar operations to reduce the critical path. + SmallVector PartialResults; + unsigned NumPartsLeft = NumParts; + while (NumPartsLeft > 1) { + for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) { + PartialResults.emplace_back( + MIRBuilder + .buildInstr(ScalarOpc, {NarrowTy}, + {SplitSrcs[Idx], SplitSrcs[Idx + 1]}) + .getReg(0)); + } + SplitSrcs = PartialResults; + PartialResults.clear(); + NumPartsLeft = SplitSrcs.size(); + } + assert(SplitSrcs.size() == 1); + MIRBuilder.buildCopy(DstReg, SplitSrcs[0]); + MI.eraseFromParent(); + return Legalized; + } + // If we can't generate a tree, then just do sequential operations. + Register Acc = SplitSrcs[0]; + for (unsigned Idx = 1; Idx < NumParts; ++Idx) + Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]}) + .getReg(0); + MIRBuilder.buildCopy(DstReg, Acc); + MI.eraseFromParent(); + return Legalized; + } + SmallVector PartialReductions; + for (unsigned Part = 0; Part < NumParts; ++Part) { + PartialReductions.push_back( + MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0)); } + // If the types involved are powers of 2, we can generate intermediate vector // ops, before generating a final reduction operation. if (isPowerOf2_32(SrcTy.getNumElements()) && @@ -7389,3 +7434,22 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerIsNaN(MachineInstr &MI) { MI.eraseFromParent(); return Legalized; } + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerVectorReduction(MachineInstr &MI) { + Register SrcReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + LLT DstTy = MRI.getType(SrcReg); + + // The source could be a scalar if the IR type was <1 x sN>. + if (SrcTy.isScalar()) { + if (DstTy.getSizeInBits() > SrcTy.getSizeInBits()) + return UnableToLegalize; // FIXME: handle extension. + // This can be just a plain copy. + Observer.changingInstr(MI); + MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY)); + Observer.changedInstr(MI); + return Legalized; + } + return UnableToLegalize;; +} diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index 0c49504..b527833 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -1589,11 +1589,8 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { case TargetOpcode::G_VECREDUCE_UMAX: case TargetOpcode::G_VECREDUCE_UMIN: { LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); - LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); if (!DstTy.isScalar()) report("Vector reduction requires a scalar destination type", MI); - if (!SrcTy.isVector()) - report("Vector reduction requires vector source=", MI); break; } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 5320f44..d68467b 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -691,6 +691,27 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .clampMaxNumElements(1, s32, 4) .lower(); + getActionDefinitionsBuilder(G_VECREDUCE_OR) + // Try to break down into smaller vectors as long as they're at least 64 + // bits. This lets us use vector operations for some parts of the + // reduction. + .fewerElementsIf( + [=](const LegalityQuery &Q) { + LLT SrcTy = Q.Types[1]; + if (SrcTy.isScalar()) + return false; + if (!isPowerOf2_32(SrcTy.getNumElements())) + return false; + // We can usually perform 64b vector operations. + return SrcTy.getSizeInBits() > 64; + }, + [=](const LegalityQuery &Q) { + LLT SrcTy = Q.Types[1]; + return std::make_pair(1, SrcTy.divide(2)); + }) + .scalarize(1) + .lower(); + getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT}) .lowerIf([=](const LegalityQuery &Q) { return Q.Types[0].isScalar(); }); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-reduce-or.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-reduce-or.mir new file mode 100644 index 0000000..2ebd88a --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-reduce-or.mir @@ -0,0 +1,701 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -march=aarch64 -run-pass=legalizer -global-isel-abort=1 -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: test_redor_v1i1 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } +body: | + bb.1: + liveins: $w0 + + ; CHECK-LABEL: name: test_redor_v1i1 + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK: $w0 = COPY [[AND]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %1:_(s32) = COPY $w0 + %0:_(s1) = G_TRUNC %1(s32) + %2:_(s1) = G_VECREDUCE_OR %0(s1) + %4:_(s32) = G_ZEXT %2(s1) + $w0 = COPY %4(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test_redor_v2i1 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$d0' } +body: | + bb.1: + liveins: $d0 + + ; CHECK-LABEL: name: test_redor_v2i1 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[COPY2]] + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; CHECK: $w0 = COPY [[AND]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %1:_(<2 x s32>) = COPY $d0 + %0:_(<2 x s1>) = G_TRUNC %1(<2 x s32>) + %2:_(s1) = G_VECREDUCE_OR %0(<2 x s1>) + %4:_(s32) = G_ZEXT %2(s1) + $w0 = COPY %4(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test_redor_v4i1 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$d0' } +body: | + bb.1: + liveins: $d0 + + ; CHECK-LABEL: name: test_redor_v4i1 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR1]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[COPY2]] + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; CHECK: $w0 = COPY [[AND]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %1:_(<4 x s16>) = COPY $d0 + %0:_(<4 x s1>) = G_TRUNC %1(<4 x s16>) + %2:_(s1) = G_VECREDUCE_OR %0(<4 x s1>) + %4:_(s32) = G_ZEXT %2(s1) + $w0 = COPY %4(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test_redor_v8i1 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$d0' } +body: | + bb.1: + liveins: $d0 + + ; CHECK-LABEL: name: test_redor_v8i1 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](<8 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR1]](s32) + ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[COPY2]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR3]](s32) + ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[COPY4]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR4]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[OR5]](s32) + ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[COPY5]], [[COPY6]] + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[OR6]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C]] + ; CHECK: $w0 = COPY [[AND]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %1:_(<8 x s8>) = COPY $d0 + %0:_(<8 x s1>) = G_TRUNC %1(<8 x s8>) + %2:_(s1) = G_VECREDUCE_OR %0(<8 x s1>) + %4:_(s32) = G_ZEXT %2(s1) + $w0 = COPY %4(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test_redor_v16i1 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$q0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1: + liveins: $q0 + + ; CHECK-LABEL: name: test_redor_v16i1 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0 + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](<16 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s8) + ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[UV9]](s8) + ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ANYEXT8]], [[ANYEXT9]] + ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[UV10]](s8) + ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[UV11]](s8) + ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ANYEXT10]], [[ANYEXT11]] + ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[UV12]](s8) + ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[UV13]](s8) + ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ANYEXT12]], [[ANYEXT13]] + ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[UV14]](s8) + ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[UV15]](s8) + ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ANYEXT14]], [[ANYEXT15]] + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR1]](s32) + ; CHECK: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[COPY2]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR3]](s32) + ; CHECK: [[OR9:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[COPY4]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR4]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[OR5]](s32) + ; CHECK: [[OR10:%[0-9]+]]:_(s32) = G_OR [[COPY5]], [[COPY6]] + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[OR6]](s32) + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[OR7]](s32) + ; CHECK: [[OR11:%[0-9]+]]:_(s32) = G_OR [[COPY7]], [[COPY8]] + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[OR8]](s32) + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[OR9]](s32) + ; CHECK: [[OR12:%[0-9]+]]:_(s32) = G_OR [[COPY9]], [[COPY10]] + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[OR10]](s32) + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[OR11]](s32) + ; CHECK: [[OR13:%[0-9]+]]:_(s32) = G_OR [[COPY11]], [[COPY12]] + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[OR12]](s32) + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[OR13]](s32) + ; CHECK: [[OR14:%[0-9]+]]:_(s32) = G_OR [[COPY13]], [[COPY14]] + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[OR14]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C]] + ; CHECK: $w0 = COPY [[AND]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %1:_(<16 x s8>) = COPY $q0 + %0:_(<16 x s1>) = G_TRUNC %1(<16 x s8>) + %2:_(s1) = G_VECREDUCE_OR %0(<16 x s1>) + %4:_(s32) = G_ZEXT %2(s1) + $w0 = COPY %4(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test_redor_v1i8 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$d0' } +body: | + bb.1: + liveins: $d0 + + ; CHECK-LABEL: name: test_redor_v1i8 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 + ; CHECK: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<8 x s8>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[BITCAST]](s64) + ; CHECK: $w0 = COPY [[TRUNC]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %1:_(<8 x s8>) = COPY $d0 + %11:_(s64) = G_BITCAST %1(<8 x s8>) + %0:_(s8) = G_TRUNC %11(s64) + %9:_(s8) = G_VECREDUCE_OR %0(s8) + %10:_(s32) = G_ANYEXT %9(s8) + $w0 = COPY %10(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test_redor_v3i8 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } + - { reg: '$w1' } + - { reg: '$w2' } +body: | + bb.1: + liveins: $w0, $w1, $w2 + + ; CHECK-LABEL: name: test_redor_v3i8 + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[COPY4]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY5]], [[COPY6]] + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[OR1]](s32) + ; CHECK: $w0 = COPY [[COPY7]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %1:_(s32) = COPY $w0 + %2:_(s32) = COPY $w1 + %3:_(s32) = COPY $w2 + %4:_(<3 x s32>) = G_BUILD_VECTOR %1(s32), %2(s32), %3(s32) + %0:_(<3 x s8>) = G_TRUNC %4(<3 x s32>) + %5:_(s8) = G_VECREDUCE_OR %0(<3 x s8>) + %6:_(s32) = G_ANYEXT %5(s8) + $w0 = COPY %6(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test_redor_v4i8 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$d0' } +body: | + bb.1: + liveins: $d0 + + ; CHECK-LABEL: name: test_redor_v4i8 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR1]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[COPY2]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; CHECK: $w0 = COPY [[COPY3]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %1:_(<4 x s16>) = COPY $d0 + %0:_(<4 x s8>) = G_TRUNC %1(<4 x s16>) + %2:_(s8) = G_VECREDUCE_OR %0(<4 x s8>) + %3:_(s32) = G_ANYEXT %2(s8) + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test_redor_v8i8 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$d0' } +body: | + bb.1: + liveins: $d0 + + ; CHECK-LABEL: name: test_redor_v8i8 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](<8 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR1]](s32) + ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[COPY2]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR3]](s32) + ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[COPY4]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR4]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[OR5]](s32) + ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[COPY5]], [[COPY6]] + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[OR6]](s32) + ; CHECK: $w0 = COPY [[COPY7]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(<8 x s8>) = COPY $d0 + %1:_(s8) = G_VECREDUCE_OR %0(<8 x s8>) + %2:_(s32) = G_ANYEXT %1(s8) + $w0 = COPY %2(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test_redor_v16i8 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$q0' } +body: | + bb.1: + liveins: $q0 + + ; CHECK-LABEL: name: test_redor_v16i8 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0 + ; CHECK: [[UV:%[0-9]+]]:_(<8 x s8>), [[UV1:%[0-9]+]]:_(<8 x s8>) = G_UNMERGE_VALUES [[COPY]](<16 x s8>) + ; CHECK: [[OR:%[0-9]+]]:_(<8 x s8>) = G_OR [[UV]], [[UV1]] + ; CHECK: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[OR]](<8 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV9]](s8) + ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR1]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[COPY2]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR4]](s32) + ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[COPY4]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR5]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[OR6]](s32) + ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[COPY5]], [[COPY6]] + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[OR7]](s32) + ; CHECK: $w0 = COPY [[COPY7]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(<16 x s8>) = COPY $q0 + %1:_(s8) = G_VECREDUCE_OR %0(<16 x s8>) + %2:_(s32) = G_ANYEXT %1(s8) + $w0 = COPY %2(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test_redor_v32i8 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$q0' } + - { reg: '$q1' } +body: | + bb.1: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: test_redor_v32i8 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<16 x s8>) = COPY $q1 + ; CHECK: [[OR:%[0-9]+]]:_(<16 x s8>) = G_OR [[COPY]], [[COPY1]] + ; CHECK: [[UV:%[0-9]+]]:_(<8 x s8>), [[UV1:%[0-9]+]]:_(<8 x s8>) = G_UNMERGE_VALUES [[OR]](<16 x s8>) + ; CHECK: [[OR1:%[0-9]+]]:_(<8 x s8>) = G_OR [[UV]], [[UV1]] + ; CHECK: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[OR1]](<8 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV9]](s8) + ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR3]](s32) + ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[COPY3]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR4]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR5]](s32) + ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[COPY5]] + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[OR6]](s32) + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[OR7]](s32) + ; CHECK: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[COPY7]] + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[OR8]](s32) + ; CHECK: $w0 = COPY [[COPY8]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %1:_(<16 x s8>) = COPY $q0 + %2:_(<16 x s8>) = COPY $q1 + %0:_(<32 x s8>) = G_CONCAT_VECTORS %1(<16 x s8>), %2(<16 x s8>) + %3:_(s8) = G_VECREDUCE_OR %0(<32 x s8>) + %4:_(s32) = G_ANYEXT %3(s8) + $w0 = COPY %4(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test_redor_v4i16 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$d0' } +body: | + bb.1: + liveins: $d0 + + ; CHECK-LABEL: name: test_redor_v4i16 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR1]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[COPY2]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; CHECK: $w0 = COPY [[COPY3]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(<4 x s16>) = COPY $d0 + %1:_(s16) = G_VECREDUCE_OR %0(<4 x s16>) + %2:_(s32) = G_ANYEXT %1(s16) + $w0 = COPY %2(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test_redor_v8i16 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$q0' } +body: | + bb.1: + liveins: $q0 + + ; CHECK-LABEL: name: test_redor_v8i16 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 + ; CHECK: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) + ; CHECK: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[UV]], [[UV1]] + ; CHECK: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[OR]](<4 x s16>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR1]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[COPY2]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR3]](s32) + ; CHECK: $w0 = COPY [[COPY3]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(<8 x s16>) = COPY $q0 + %1:_(s16) = G_VECREDUCE_OR %0(<8 x s16>) + %2:_(s32) = G_ANYEXT %1(s16) + $w0 = COPY %2(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test_redor_v16i16 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$q0' } + - { reg: '$q1' } +body: | + bb.1: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: test_redor_v16i16 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1 + ; CHECK: [[OR:%[0-9]+]]:_(<8 x s16>) = G_OR [[COPY]], [[COPY1]] + ; CHECK: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[OR]](<8 x s16>) + ; CHECK: [[OR1:%[0-9]+]]:_(<4 x s16>) = G_OR [[UV]], [[UV1]] + ; CHECK: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[OR1]](<4 x s16>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16) + ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR3]](s32) + ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[COPY3]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR4]](s32) + ; CHECK: $w0 = COPY [[COPY4]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %1:_(<8 x s16>) = COPY $q0 + %2:_(<8 x s16>) = COPY $q1 + %0:_(<16 x s16>) = G_CONCAT_VECTORS %1(<8 x s16>), %2(<8 x s16>) + %3:_(s16) = G_VECREDUCE_OR %0(<16 x s16>) + %4:_(s32) = G_ANYEXT %3(s16) + $w0 = COPY %4(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test_redor_v2i32 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$d0' } +body: | + bb.1: + liveins: $d0 + + ; CHECK-LABEL: name: test_redor_v2i32 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV]], [[UV1]] + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; CHECK: $w0 = COPY [[COPY1]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(<2 x s32>) = COPY $d0 + %1:_(s32) = G_VECREDUCE_OR %0(<2 x s32>) + $w0 = COPY %1(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test_redor_v4i32 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$q0' } +body: | + bb.1: + liveins: $q0 + + ; CHECK-LABEL: name: test_redor_v4i32 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[UV]], [[UV1]] + ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR]](<2 x s32>) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV2]], [[UV3]] + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR1]](s32) + ; CHECK: $w0 = COPY [[COPY1]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(s32) = G_VECREDUCE_OR %0(<4 x s32>) + $w0 = COPY %1(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test_redor_v8i32 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$q0' } + - { reg: '$q1' } +body: | + bb.1: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: test_redor_v8i32 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK: [[OR:%[0-9]+]]:_(<4 x s32>) = G_OR [[COPY]], [[COPY1]] + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[OR]](<4 x s32>) + ; CHECK: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[UV]], [[UV1]] + ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR1]](<2 x s32>) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[UV2]], [[UV3]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; CHECK: $w0 = COPY [[COPY2]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %1:_(<4 x s32>) = COPY $q0 + %2:_(<4 x s32>) = COPY $q1 + %0:_(<8 x s32>) = G_CONCAT_VECTORS %1(<4 x s32>), %2(<4 x s32>) + %3:_(s32) = G_VECREDUCE_OR %0(<8 x s32>) + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: test_redor_v2i64 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$q0' } +body: | + bb.1: + liveins: $q0 + + ; CHECK-LABEL: name: test_redor_v2i64 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[UV]], [[UV1]] + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[OR]](s64) + ; CHECK: $x0 = COPY [[COPY1]](s64) + ; CHECK: RET_ReallyLR implicit $x0 + %0:_(<2 x s64>) = COPY $q0 + %1:_(s64) = G_VECREDUCE_OR %0(<2 x s64>) + $x0 = COPY %1(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: test_redor_v4i64 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$q0' } + - { reg: '$q1' } +body: | + bb.1: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: test_redor_v4i64 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 + ; CHECK: [[OR:%[0-9]+]]:_(<2 x s64>) = G_OR [[COPY]], [[COPY1]] + ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[OR]](<2 x s64>) + ; CHECK: [[OR1:%[0-9]+]]:_(s64) = G_OR [[UV]], [[UV1]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[OR1]](s64) + ; CHECK: $x0 = COPY [[COPY2]](s64) + ; CHECK: RET_ReallyLR implicit $x0 + %1:_(<2 x s64>) = COPY $q0 + %2:_(<2 x s64>) = COPY $q1 + %0:_(<4 x s64>) = G_CONCAT_VECTORS %1(<2 x s64>), %2(<2 x s64>) + %3:_(s64) = G_VECREDUCE_OR %0(<4 x s64>) + $x0 = COPY %3(s64) + RET_ReallyLR implicit $x0 + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index f5a6202..ea35308 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -673,8 +673,8 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_VECREDUCE_OR (opcode {{[0-9]+}}): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined -# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_VECREDUCE_XOR (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined diff --git a/llvm/test/CodeGen/AArch64/reduce-or.ll b/llvm/test/CodeGen/AArch64/reduce-or.ll index e832352..659b062 100644 --- a/llvm/test/CodeGen/AArch64/reduce-or.ll +++ b/llvm/test/CodeGen/AArch64/reduce-or.ll @@ -1,11 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=GISEL + define i1 @test_redor_v1i1(<1 x i1> %a) { ; CHECK-LABEL: test_redor_v1i1: ; CHECK: // %bb.0: ; CHECK-NEXT: and w0, w0, #0x1 ; CHECK-NEXT: ret +; +; GISEL-LABEL: test_redor_v1i1: +; GISEL: // %bb.0: +; GISEL-NEXT: and w0, w0, #0x1 +; GISEL-NEXT: ret %or_result = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %a) ret i1 %or_result } @@ -19,6 +26,16 @@ define i1 @test_redor_v2i1(<2 x i1> %a) { ; CHECK-NEXT: orr w8, w9, w8 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret +; +; GISEL-LABEL: test_redor_v2i1: +; GISEL: // %bb.0: +; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 +; GISEL-NEXT: mov s1, v0.s[1] +; GISEL-NEXT: fmov w8, s0 +; GISEL-NEXT: fmov w9, s1 +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: and w0, w8, #0x1 +; GISEL-NEXT: ret %or_result = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %a) ret i1 %or_result } @@ -36,6 +53,22 @@ define i1 @test_redor_v4i1(<4 x i1> %a) { ; CHECK-NEXT: orr w8, w9, w8 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret +; +; GISEL-LABEL: test_redor_v4i1: +; GISEL: // %bb.0: +; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 +; GISEL-NEXT: mov h1, v0.h[1] +; GISEL-NEXT: mov h2, v0.h[2] +; GISEL-NEXT: mov h3, v0.h[3] +; GISEL-NEXT: fmov w8, s0 +; GISEL-NEXT: fmov w9, s1 +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: fmov w9, s2 +; GISEL-NEXT: fmov w10, s3 +; GISEL-NEXT: orr w9, w9, w10 +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: and w0, w8, #0x1 +; GISEL-NEXT: ret %or_result = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %a) ret i1 %or_result } @@ -61,6 +94,34 @@ define i1 @test_redor_v8i1(<8 x i1> %a) { ; CHECK-NEXT: orr w8, w9, w8 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret +; +; GISEL-LABEL: test_redor_v8i1: +; GISEL: // %bb.0: +; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 +; GISEL-NEXT: mov b1, v0.b[1] +; GISEL-NEXT: mov b2, v0.b[2] +; GISEL-NEXT: mov b3, v0.b[3] +; GISEL-NEXT: fmov w8, s0 +; GISEL-NEXT: fmov w9, s1 +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: fmov w9, s2 +; GISEL-NEXT: fmov w10, s3 +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] +; GISEL-NEXT: orr w9, w9, w10 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: orr w10, w10, w11 +; GISEL-NEXT: fmov w11, s6 +; GISEL-NEXT: fmov w12, s7 +; GISEL-NEXT: orr w11, w11, w12 +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: orr w9, w10, w11 +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: and w0, w8, #0x1 +; GISEL-NEXT: ret %or_result = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a) ret i1 %or_result } @@ -87,6 +148,57 @@ define i1 @test_redor_v16i1(<16 x i1> %a) { ; CHECK-NEXT: orr w8, w8, w9 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret +; +; GISEL-LABEL: test_redor_v16i1: +; GISEL: // %bb.0: +; GISEL-NEXT: mov b1, v0.b[1] +; GISEL-NEXT: mov b2, v0.b[2] +; GISEL-NEXT: mov b3, v0.b[3] +; GISEL-NEXT: fmov w8, s0 +; GISEL-NEXT: fmov w9, s1 +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: fmov w9, s2 +; GISEL-NEXT: fmov w10, s3 +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] +; GISEL-NEXT: orr w9, w9, w10 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: mov b16, v0.b[8] +; GISEL-NEXT: mov b17, v0.b[9] +; GISEL-NEXT: orr w10, w10, w11 +; GISEL-NEXT: fmov w11, s6 +; GISEL-NEXT: fmov w12, s7 +; GISEL-NEXT: mov b18, v0.b[10] +; GISEL-NEXT: mov b19, v0.b[11] +; GISEL-NEXT: orr w11, w11, w12 +; GISEL-NEXT: fmov w12, s16 +; GISEL-NEXT: fmov w13, s17 +; GISEL-NEXT: mov b20, v0.b[12] +; GISEL-NEXT: mov b21, v0.b[13] +; GISEL-NEXT: orr w12, w12, w13 +; GISEL-NEXT: fmov w13, s18 +; GISEL-NEXT: fmov w14, s19 +; GISEL-NEXT: mov b22, v0.b[14] +; GISEL-NEXT: mov b23, v0.b[15] +; GISEL-NEXT: orr w13, w13, w14 +; GISEL-NEXT: fmov w14, s20 +; GISEL-NEXT: fmov w15, s21 +; GISEL-NEXT: orr w14, w14, w15 +; GISEL-NEXT: fmov w15, s22 +; GISEL-NEXT: fmov w16, s23 +; GISEL-NEXT: orr w15, w15, w16 +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: orr w9, w10, w11 +; GISEL-NEXT: orr w10, w12, w13 +; GISEL-NEXT: orr w11, w14, w15 +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: orr w9, w10, w11 +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: and w0, w8, #0x1 +; GISEL-NEXT: ret %or_result = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a) ret i1 %or_result } @@ -97,6 +209,12 @@ define i8 @test_redor_v1i8(<1 x i8> %a) { ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: umov w0, v0.b[0] ; CHECK-NEXT: ret +; +; GISEL-LABEL: test_redor_v1i8: +; GISEL: // %bb.0: +; GISEL-NEXT: fmov x0, d0 +; GISEL-NEXT: // kill: def $w0 killed $w0 killed $x0 +; GISEL-NEXT: ret %or_result = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> %a) ret i8 %or_result } @@ -107,6 +225,12 @@ define i8 @test_redor_v3i8(<3 x i8> %a) { ; CHECK-NEXT: orr w8, w0, w1 ; CHECK-NEXT: orr w0, w8, w2 ; CHECK-NEXT: ret +; +; GISEL-LABEL: test_redor_v3i8: +; GISEL: // %bb.0: +; GISEL-NEXT: orr w8, w0, w1 +; GISEL-NEXT: orr w0, w8, w2 +; GISEL-NEXT: ret %or_result = call i8 @llvm.vector.reduce.or.v3i8(<3 x i8> %a) ret i8 %or_result } @@ -123,6 +247,21 @@ define i8 @test_redor_v4i8(<4 x i8> %a) { ; CHECK-NEXT: orr w9, w10, w9 ; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret +; +; GISEL-LABEL: test_redor_v4i8: +; GISEL: // %bb.0: +; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 +; GISEL-NEXT: mov h1, v0.h[1] +; GISEL-NEXT: mov h2, v0.h[2] +; GISEL-NEXT: mov h3, v0.h[3] +; GISEL-NEXT: fmov w8, s0 +; GISEL-NEXT: fmov w9, s1 +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: fmov w9, s2 +; GISEL-NEXT: fmov w10, s3 +; GISEL-NEXT: orr w9, w9, w10 +; GISEL-NEXT: orr w0, w8, w9 +; GISEL-NEXT: ret %or_result = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> %a) ret i8 %or_result } @@ -147,6 +286,33 @@ define i8 @test_redor_v8i8(<8 x i8> %a) { ; CHECK-NEXT: orr w9, w10, w9 ; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret +; +; GISEL-LABEL: test_redor_v8i8: +; GISEL: // %bb.0: +; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 +; GISEL-NEXT: mov b1, v0.b[1] +; GISEL-NEXT: mov b2, v0.b[2] +; GISEL-NEXT: mov b3, v0.b[3] +; GISEL-NEXT: fmov w8, s0 +; GISEL-NEXT: fmov w9, s1 +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: fmov w9, s2 +; GISEL-NEXT: fmov w10, s3 +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] +; GISEL-NEXT: orr w9, w9, w10 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: orr w10, w10, w11 +; GISEL-NEXT: fmov w11, s6 +; GISEL-NEXT: fmov w12, s7 +; GISEL-NEXT: orr w11, w11, w12 +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: orr w9, w10, w11 +; GISEL-NEXT: orr w0, w8, w9 +; GISEL-NEXT: ret %or_result = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %a) ret i8 %or_result } @@ -172,6 +338,34 @@ define i8 @test_redor_v16i8(<16 x i8> %a) { ; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret +; +; GISEL-LABEL: test_redor_v16i8: +; GISEL: // %bb.0: +; GISEL-NEXT: mov d1, v0.d[1] +; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b +; GISEL-NEXT: mov b1, v0.b[1] +; GISEL-NEXT: mov b2, v0.b[2] +; GISEL-NEXT: mov b3, v0.b[3] +; GISEL-NEXT: fmov w8, s0 +; GISEL-NEXT: fmov w9, s1 +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: fmov w9, s2 +; GISEL-NEXT: fmov w10, s3 +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] +; GISEL-NEXT: orr w9, w9, w10 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: orr w10, w10, w11 +; GISEL-NEXT: fmov w11, s6 +; GISEL-NEXT: fmov w12, s7 +; GISEL-NEXT: orr w11, w11, w12 +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: orr w9, w10, w11 +; GISEL-NEXT: orr w0, w8, w9 +; GISEL-NEXT: ret %or_result = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %a) ret i8 %or_result } @@ -198,6 +392,35 @@ define i8 @test_redor_v32i8(<32 x i8> %a) { ; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret +; +; GISEL-LABEL: test_redor_v32i8: +; GISEL: // %bb.0: +; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b +; GISEL-NEXT: mov d1, v0.d[1] +; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b +; GISEL-NEXT: mov b1, v0.b[1] +; GISEL-NEXT: mov b2, v0.b[2] +; GISEL-NEXT: mov b3, v0.b[3] +; GISEL-NEXT: fmov w8, s0 +; GISEL-NEXT: fmov w9, s1 +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: fmov w9, s2 +; GISEL-NEXT: fmov w10, s3 +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] +; GISEL-NEXT: orr w9, w9, w10 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: orr w10, w10, w11 +; GISEL-NEXT: fmov w11, s6 +; GISEL-NEXT: fmov w12, s7 +; GISEL-NEXT: orr w11, w11, w12 +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: orr w9, w10, w11 +; GISEL-NEXT: orr w0, w8, w9 +; GISEL-NEXT: ret %or_result = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %a) ret i8 %or_result } @@ -214,6 +437,21 @@ define i16 @test_redor_v4i16(<4 x i16> %a) { ; CHECK-NEXT: orr w9, w10, w9 ; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret +; +; GISEL-LABEL: test_redor_v4i16: +; GISEL: // %bb.0: +; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 +; GISEL-NEXT: mov h1, v0.h[1] +; GISEL-NEXT: mov h2, v0.h[2] +; GISEL-NEXT: mov h3, v0.h[3] +; GISEL-NEXT: fmov w8, s0 +; GISEL-NEXT: fmov w9, s1 +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: fmov w9, s2 +; GISEL-NEXT: fmov w10, s3 +; GISEL-NEXT: orr w9, w9, w10 +; GISEL-NEXT: orr w0, w8, w9 +; GISEL-NEXT: ret %or_result = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %a) ret i16 %or_result } @@ -231,6 +469,22 @@ define i16 @test_redor_v8i16(<8 x i16> %a) { ; CHECK-NEXT: umov w9, v0.h[3] ; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret +; +; GISEL-LABEL: test_redor_v8i16: +; GISEL: // %bb.0: +; GISEL-NEXT: mov d1, v0.d[1] +; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b +; GISEL-NEXT: mov h1, v0.h[1] +; GISEL-NEXT: mov h2, v0.h[2] +; GISEL-NEXT: mov h3, v0.h[3] +; GISEL-NEXT: fmov w8, s0 +; GISEL-NEXT: fmov w9, s1 +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: fmov w9, s2 +; GISEL-NEXT: fmov w10, s3 +; GISEL-NEXT: orr w9, w9, w10 +; GISEL-NEXT: orr w0, w8, w9 +; GISEL-NEXT: ret %or_result = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %a) ret i16 %or_result } @@ -249,6 +503,23 @@ define i16 @test_redor_v16i16(<16 x i16> %a) { ; CHECK-NEXT: umov w9, v0.h[3] ; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret +; +; GISEL-LABEL: test_redor_v16i16: +; GISEL: // %bb.0: +; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b +; GISEL-NEXT: mov d1, v0.d[1] +; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b +; GISEL-NEXT: mov h1, v0.h[1] +; GISEL-NEXT: mov h2, v0.h[2] +; GISEL-NEXT: mov h3, v0.h[3] +; GISEL-NEXT: fmov w8, s0 +; GISEL-NEXT: fmov w9, s1 +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: fmov w9, s2 +; GISEL-NEXT: fmov w10, s3 +; GISEL-NEXT: orr w9, w9, w10 +; GISEL-NEXT: orr w0, w8, w9 +; GISEL-NEXT: ret %or_result = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %a) ret i16 %or_result } @@ -261,6 +532,15 @@ define i32 @test_redor_v2i32(<2 x i32> %a) { ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret +; +; GISEL-LABEL: test_redor_v2i32: +; GISEL: // %bb.0: +; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 +; GISEL-NEXT: mov s1, v0.s[1] +; GISEL-NEXT: fmov w8, s0 +; GISEL-NEXT: fmov w9, s1 +; GISEL-NEXT: orr w0, w8, w9 +; GISEL-NEXT: ret %or_result = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %a) ret i32 %or_result } @@ -274,6 +554,16 @@ define i32 @test_redor_v4i32(<4 x i32> %a) { ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret +; +; GISEL-LABEL: test_redor_v4i32: +; GISEL: // %bb.0: +; GISEL-NEXT: mov d1, v0.d[1] +; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b +; GISEL-NEXT: mov s1, v0.s[1] +; GISEL-NEXT: fmov w8, s0 +; GISEL-NEXT: fmov w9, s1 +; GISEL-NEXT: orr w0, w8, w9 +; GISEL-NEXT: ret %or_result = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a) ret i32 %or_result } @@ -288,6 +578,17 @@ define i32 @test_redor_v8i32(<8 x i32> %a) { ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret +; +; GISEL-LABEL: test_redor_v8i32: +; GISEL: // %bb.0: +; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b +; GISEL-NEXT: mov d1, v0.d[1] +; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b +; GISEL-NEXT: mov s1, v0.s[1] +; GISEL-NEXT: fmov w8, s0 +; GISEL-NEXT: fmov w9, s1 +; GISEL-NEXT: orr w0, w8, w9 +; GISEL-NEXT: ret %or_result = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %a) ret i32 %or_result } @@ -299,6 +600,14 @@ define i64 @test_redor_v2i64(<2 x i64> %a) { ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; GISEL-LABEL: test_redor_v2i64: +; GISEL: // %bb.0: +; GISEL-NEXT: mov d1, v0.d[1] +; GISEL-NEXT: fmov x8, d0 +; GISEL-NEXT: fmov x9, d1 +; GISEL-NEXT: orr x0, x8, x9 +; GISEL-NEXT: ret %or_result = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %a) ret i64 %or_result } @@ -311,6 +620,15 @@ define i64 @test_redor_v4i64(<4 x i64> %a) { ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; GISEL-LABEL: test_redor_v4i64: +; GISEL: // %bb.0: +; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b +; GISEL-NEXT: mov d1, v0.d[1] +; GISEL-NEXT: fmov x8, d0 +; GISEL-NEXT: fmov x9, d1 +; GISEL-NEXT: orr x0, x8, x9 +; GISEL-NEXT: ret %or_result = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %a) ret i64 %or_result } diff --git a/llvm/test/MachineVerifier/test_vector_reductions.mir b/llvm/test/MachineVerifier/test_vector_reductions.mir index d66d1ed..6ea611e 100644 --- a/llvm/test/MachineVerifier/test_vector_reductions.mir +++ b/llvm/test/MachineVerifier/test_vector_reductions.mir @@ -30,6 +30,4 @@ body: | %dst:_(s64) = G_VECREDUCE_SEQ_FADD %scalar_s64, %scalar_s64 ; CHECK: Bad machine code: Sequential FADD/FMUL vector reduction must have a vector 2nd operand - %dst2:_(s64) = G_VECREDUCE_MUL %scalar_s64 - ; CHECK: Bad machine code: Vector reduction requires vector source ... -- 2.7.4