def : GINodeEquiv<G_CTTZ_ZERO_UNDEF, cttz_zero_undef>;
def : GINodeEquiv<G_CTPOP, ctpop>;
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
+def : GINodeEquiv<G_CONCAT_VECTORS, concat_vectors>;
def : GINodeEquiv<G_FCEIL, fceil>;
def : GINodeEquiv<G_FCOS, fcos>;
def : GINodeEquiv<G_FSIN, fsin>;
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
SmallVectorImpl<int> &Idxs) const;
bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
MachineIRBuilder &MIRBuilder) const;
- MachineInstr *emitVectorConcat(unsigned Op1, unsigned Op2,
+
+ // Emit a vector concat operation.
+ MachineInstr *emitVectorConcat(Optional<unsigned> Dst, unsigned Op1,
+ unsigned Op2,
MachineIRBuilder &MIRBuilder) const;
ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
return selectExtractElt(I, MRI);
case TargetOpcode::G_INSERT_VECTOR_ELT:
return selectInsertElt(I, MRI);
+ case TargetOpcode::G_CONCAT_VECTORS:
+ return selectConcatVectors(I, MRI);
}
return false;
return true;
}
+bool AArch64InstructionSelector::selectConcatVectors(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
+ "Unexpected opcode");
+ unsigned Dst = I.getOperand(0).getReg();
+ unsigned Op1 = I.getOperand(1).getReg();
+ unsigned Op2 = I.getOperand(2).getReg();
+ MachineIRBuilder MIRBuilder(I);
+ MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
+ if (!ConcatMI)
+ return false;
+ I.eraseFromParent();
+ return true;
+}
+
void AArch64InstructionSelector::collectShuffleMaskIndices(
MachineInstr &I, MachineRegisterInfo &MRI,
SmallVectorImpl<int> &Idxs) const {
}
MachineInstr *AArch64InstructionSelector::emitVectorConcat(
- unsigned Op1, unsigned Op2, MachineIRBuilder &MIRBuilder) const {
+ Optional<unsigned> Dst, unsigned Op1, unsigned Op2,
+ MachineIRBuilder &MIRBuilder) const {
// We implement a vector concat by:
// 1. Use scalar_to_vector to insert the lower vector into the larger dest
// 2. Insert the upper vector into the destination's upper element
std::tie(InsertOpc, InsSubRegIdx) =
getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
+ if (!Dst)
+ Dst = MRI.createVirtualRegister(DstRC);
auto InsElt =
MIRBuilder
- .buildInstr(InsertOpc, {DstRC}, {WidenedOp1->getOperand(0).getReg()})
+ .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
.addImm(1) /* Lane index */
.addUse(WidenedOp2->getOperand(0).getReg())
.addImm(0);
-
constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
return &*InsElt;
}
if (DstTy.getSizeInBits() != 128) {
assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
// This case can be done with TBL1.
- MachineInstr *Concat = emitVectorConcat(Src1Reg, Src2Reg, MIRBuilder);
+ MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
if (!Concat) {
LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
return false;
.clampNumElements(0, v4s32, v4s32)
.clampNumElements(0, v2s64, v2s64);
+ getActionDefinitionsBuilder(G_CONCAT_VECTORS)
+ .legalFor({{v4s32, v2s32}, {v8s16, v4s16}});
+
computeTables();
verify(*ST.getInstrInfo());
}
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-linux-gnu -O0 -run-pass=legalizer %s -global-isel-abort=1 -o - | FileCheck %s
+
+---
+name: legal_v4s32_v2s32
+body: |
+ bb.0:
+ liveins: $d0, $d1
+ ; CHECK-LABEL: name: legal_v4s32_v2s32
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
+ ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[COPY1]](<2 x s32>)
+ ; CHECK: $q0 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+ ; CHECK: RET_ReallyLR
+ %0:_(<2 x s32>) = COPY $d0
+ %1:_(<2 x s32>) = COPY $d1
+ %2:_(<4 x s32>) = G_CONCAT_VECTORS %0(<2 x s32>), %1(<2 x s32>)
+ $q0 = COPY %2(<4 x s32>)
+ RET_ReallyLR
+...
+---
+name: legal_v8s16_v4s16
+body: |
+ bb.0:
+ liveins: $d0, $d1
+ ; CHECK-LABEL: name: legal_v8s16_v4s16
+ ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $d1
+ ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>)
+ ; CHECK: $q0 = COPY [[CONCAT_VECTORS]](<8 x s16>)
+ ; CHECK: RET_ReallyLR
+ %0:_(<4 x s16>) = COPY $d0
+ %1:_(<4 x s16>) = COPY $d1
+ %2:_(<8 x s16>) = G_CONCAT_VECTORS %0(<4 x s16>), %1(<4 x s16>)
+ $q0 = COPY %2(<8 x s16>)
+ RET_ReallyLR
+...
# DEBUG: .. type index coverage check SKIPPED: no rules defined
#
# DEBUG-NEXT: G_CONCAT_VECTORS (opcode {{[0-9]+}}): 2 type indices
-# DEBUG: .. type index coverage check SKIPPED: no rules defined
+# DEBUG: .. the first uncovered type index: 2, OK
#
# DEBUG-NEXT: G_PTRTOINT (opcode {{[0-9]+}}): 2 type indices
# DEBUG: .. the first uncovered type index: 2, OK
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+...
+---
+name: legal_v4s32_v2s32
+alignment: 2
+legalized: true
+regBankSelected: true
+registers:
+ - { id: 0, class: fpr }
+ - { id: 1, class: fpr }
+ - { id: 2, class: fpr }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: legal_v4s32_v2s32
+ ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+ ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub
+ ; CHECK: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[COPY1]], %subreg.dsub
+ ; CHECK: [[INSvi64lane:%[0-9]+]]:fpr128 = INSvi64lane [[INSERT_SUBREG]], 1, [[INSERT_SUBREG1]], 0
+ ; CHECK: $q0 = COPY [[INSvi64lane]]
+ ; CHECK: RET_ReallyLR
+ %0:fpr(<2 x s32>) = COPY $d0
+ %1:fpr(<2 x s32>) = COPY $d1
+ %2:fpr(<4 x s32>) = G_CONCAT_VECTORS %0(<2 x s32>), %1(<2 x s32>)
+ $q0 = COPY %2(<4 x s32>)
+ RET_ReallyLR
+
+...
+---
+name: legal_v8s16_v4s16
+alignment: 2
+legalized: true
+regBankSelected: true
+registers:
+ - { id: 0, class: fpr }
+ - { id: 1, class: fpr }
+ - { id: 2, class: fpr }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: legal_v8s16_v4s16
+ ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+ ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub
+ ; CHECK: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[COPY1]], %subreg.dsub
+ ; CHECK: [[INSvi64lane:%[0-9]+]]:fpr128 = INSvi64lane [[INSERT_SUBREG]], 1, [[INSERT_SUBREG1]], 0
+ ; CHECK: $q0 = COPY [[INSvi64lane]]
+ ; CHECK: RET_ReallyLR
+ %0:fpr(<4 x s16>) = COPY $d0
+ %1:fpr(<4 x s16>) = COPY $d1
+ %2:fpr(<8 x s16>) = G_CONCAT_VECTORS %0(<4 x s16>), %1(<4 x s16>)
+ $q0 = COPY %2(<8 x s16>)
+ RET_ReallyLR
+
+...