From 8d70e6425c7b782729ed9461d11928ca0f045fd2 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Thu, 28 Feb 2019 18:47:29 +0000 Subject: [PATCH] Revert "[AArch64][GlobalISel] Add support for 64 bit vector shuffle using TBL1." Seems to break some neon intrinsics tests. llvm-svn: 355115 --- .../Target/AArch64/AArch64InstructionSelector.cpp | 144 ++++----------------- .../AArch64/GlobalISel/select-shuffle-vector.mir | 72 ++++------- 2 files changed, 51 insertions(+), 165 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp index ebc2d9c..83d61c8 100644 --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -82,8 +82,6 @@ private: unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const; MachineInstr *emitLoadFromConstantPool(Constant *CPVal, MachineIRBuilder &MIRBuilder) const; - MachineInstr *emitVectorConcat(unsigned Op1, unsigned Op2, - MachineIRBuilder &MIRBuilder) const; ComplexRendererFns selectArithImmed(MachineOperand &Root) const; @@ -1967,98 +1965,6 @@ MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool( return &*Load; } -/// Return an pair to do an vector elt insert of a given -/// size and RB. -static std::pair -getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) { - unsigned Opc, SubregIdx; - if (RB.getID() == AArch64::GPRRegBankID) { - if (EltSize == 32) { - Opc = AArch64::INSvi32gpr; - SubregIdx = AArch64::ssub; - } else if (EltSize == 64) { - Opc = AArch64::INSvi64gpr; - SubregIdx = AArch64::dsub; - } else { - llvm_unreachable("invalid elt size!"); - } - } else { - if (EltSize == 8) { - Opc = AArch64::INSvi8lane; - SubregIdx = AArch64::bsub; - } else if (EltSize == 16) { - Opc = AArch64::INSvi16lane; - SubregIdx = AArch64::hsub; - } else if (EltSize == 32) { - Opc = AArch64::INSvi32lane; - SubregIdx = AArch64::ssub; - } else if (EltSize == 64) { - Opc = AArch64::INSvi64lane; - SubregIdx = AArch64::dsub; - } else { - llvm_unreachable("invalid elt size!"); - } - } - return std::make_pair(Opc, SubregIdx); -} - -MachineInstr *AArch64InstructionSelector::emitVectorConcat( - unsigned Op1, unsigned Op2, MachineIRBuilder &MIRBuilder) const { - // We implement a vector concat by: - // 1. Use scalar_to_vector to insert the lower vector into the larger dest - // 2. Insert the upper vector into the destination's upper element - // TODO: some of this code is common with G_BUILD_VECTOR handling. - MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); - - const LLT Op1Ty = MRI.getType(Op1); - const LLT Op2Ty = MRI.getType(Op2); - - if (Op1Ty != Op2Ty) { - LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys"); - return nullptr; - } - assert(Op1Ty.isVector() && "Expected a vector for vector concat"); - - if (Op1Ty.getSizeInBits() >= 128) { - LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors"); - return nullptr; - } - - // At the moment we just support 64 bit vector concats. - if (Op1Ty.getSizeInBits() != 64) { - LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors"); - return nullptr; - } - - const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits()); - const LLT &DstTy = LLT::vector(2, ScalarTy); - const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI); - const TargetRegisterClass *DstRC = - getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2); - - MachineInstr *WidenedOp1 = emitScalarToVector(DstTy, DstRC, Op1, MIRBuilder); - MachineInstr *WidenedOp2 = emitScalarToVector(DstTy, DstRC, Op2, MIRBuilder); - if (!WidenedOp1 || !WidenedOp2) { - LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value"); - return nullptr; - } - - // Now do the insert of the upper element. - unsigned InsertOpc, InsSubRegIdx; - std::tie(InsertOpc, InsSubRegIdx) = - getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits()); - - auto InsElt = - MIRBuilder - .buildInstr(InsertOpc, {DstRC}, {WidenedOp1->getOperand(0).getReg()}) - .addImm(1) /* Lane index */ - .addUse(WidenedOp2->getOperand(0).getReg()) - .addImm(0); - - constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI); - return &*InsElt; -} - bool AArch64InstructionSelector::selectShuffleVector( MachineInstr &I, MachineRegisterInfo &MRI) const { const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); @@ -2096,37 +2002,21 @@ bool AArch64InstructionSelector::selectShuffleVector( } } - MachineIRBuilder MIRBuilder(I); + if (DstTy.getSizeInBits() != 128) { + assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty"); + // This case can be done with TBL1. + return false; + } // Use a constant pool to load the index vector for TBL. Constant *CPVal = ConstantVector::get(CstIdxs); + MachineIRBuilder MIRBuilder(I); MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder); if (!IndexLoad) { LLVM_DEBUG(dbgs() << "Could not load from a constant pool"); return false; } - if (DstTy.getSizeInBits() != 128) { - assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty"); - // This case can be done with TBL1. - MachineInstr *Concat = emitVectorConcat(Src1Reg, Src2Reg, MIRBuilder); - if (!Concat) { - LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1"); - return false; - } - auto TBL1 = MIRBuilder.buildInstr( - AArch64::TBLv16i8One, {&AArch64::FPR128RegClass}, - {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()}); - constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI); - - auto Copy = BuildMI(*I.getParent(), I, I.getDebugLoc(), - TII.get(TargetOpcode::COPY), I.getOperand(0).getReg()) - .addUse(TBL1->getOperand(0).getReg(), 0, AArch64::dsub); - RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI); - I.eraseFromParent(); - return true; - } - // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive // Q registers for regalloc. auto RegSeq = MIRBuilder @@ -2158,8 +2048,26 @@ bool AArch64InstructionSelector::selectBuildVector( const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); unsigned Opc; unsigned SubregIdx; - - std::tie(Opc, SubregIdx) = getInsertVecEltOpInfo(RB, EltSize); + if (RB.getID() == AArch64::GPRRegBankID) { + if (EltSize == 32) { + Opc = AArch64::INSvi32gpr; + SubregIdx = AArch64::ssub; + } else { + Opc = AArch64::INSvi64gpr; + SubregIdx = AArch64::dsub; + } + } else { + if (EltSize == 16) { + Opc = AArch64::INSvi16lane; + SubregIdx = AArch64::hsub; + } else if (EltSize == 32) { + Opc = AArch64::INSvi32lane; + SubregIdx = AArch64::ssub; + } else { + Opc = AArch64::INSvi64lane; + SubregIdx = AArch64::dsub; + } + } MachineIRBuilder MIRBuilder(I); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-shuffle-vector.mir index f03e480..b78c7a5 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-shuffle-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-shuffle-vector.mir @@ -1,17 +1,11 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# WARNING: update_mir_test_checks.py does not include the constant pools output, -# so this test requires manual fixing up after running the script. - # RUN: llc -mtriple=aarch64-- -O0 -run-pass=instruction-select -verify-machineinstrs %s -global-isel-abort=1 -o - | FileCheck %s --- | + ; ModuleID = 'shufflevec-only-legal.ll' + source_filename = "shufflevec-only-legal.ll" target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64" - define <2 x float> @shuffle_v2f32(<2 x float> %a, <2 x float> %b) { - %shuf = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> - ret <2 x float> %shuf - } - define <4 x i32> @shuffle_v4i32(<4 x i32> %a, <4 x i32> %b) { %shuf = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuf @@ -29,50 +23,19 @@ ... --- -name: shuffle_v2f32 -alignment: 2 -legalized: true -regBankSelected: true -tracksRegLiveness: true -body: | - bb.1 (%ir-block.0): - liveins: $d0, $d1 - - ; CHECK-LABEL: name: shuffle_v2f32 - ; CHECK: constants: - ; CHECK: - id: 0 - ; CHECK: value: '<8 x i8> ' - ; CHECK: alignment: 8 - ; CHECK: liveins: $d0, $d1 - ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 - ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0 - ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 - ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF - ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub - ; CHECK: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF - ; CHECK: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[COPY1]], %subreg.dsub - ; CHECK: [[INSvi64lane:%[0-9]+]]:fpr128 = INSvi64lane [[INSERT_SUBREG]], 1, [[INSERT_SUBREG1]], 0 - ; CHECK: [[TBLv16i8One:%[0-9]+]]:fpr128 = TBLv16i8One [[INSvi64lane]], [[LDRQui]] - ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY [[TBLv16i8One]].dsub - ; CHECK: $d0 = COPY [[COPY2]] - ; CHECK: RET_ReallyLR implicit $d0 - %0:fpr(<2 x s32>) = COPY $d0 - %1:fpr(<2 x s32>) = COPY $d1 - %4:gpr(s32) = G_CONSTANT i32 1 - %5:gpr(s32) = G_CONSTANT i32 0 - %3:fpr(<2 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32) - %2:fpr(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1, %3(<2 x s32>) - $d0 = COPY %2(<2 x s32>) - RET_ReallyLR implicit $d0 - -... ---- name: shuffle_v4i32 alignment: 2 legalized: true regBankSelected: true tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: gpr } + - { id: 5, class: gpr } + - { id: 6, class: gpr } body: | bb.1 (%ir-block.0): liveins: $q0, $q1 @@ -108,6 +71,15 @@ alignment: 2 legalized: true regBankSelected: true tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: gpr } + - { id: 5, class: gpr } + - { id: 6, class: gpr } + - { id: 7, class: gpr } body: | bb.1 (%ir-block.0): liveins: $q0, $q1 @@ -144,6 +116,12 @@ alignment: 2 legalized: true regBankSelected: true tracksRegLiveness: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: gpr } body: | bb.1 (%ir-block.0): liveins: $q0, $q1 -- 2.7.4