SDValue SourceVec = V.getOperand(0);
auto Source = std::find(Sources.begin(), Sources.end(), SourceVec);
if (Source == Sources.end())
- Sources.push_back(ShuffleSourceInfo(SourceVec));
+ Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
// Update the minimum and maximum lane number seen.
unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
// This stage of the search produces a source with the same element type as
// the original, but with a total width matching the BUILD_VECTOR output.
EVT EltVT = SrcVT.getVectorElementType();
- EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
- VT.getSizeInBits() / EltVT.getSizeInBits());
+ unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
+ EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
assert(2 * SrcVT.getSizeInBits() == VT.getSizeInBits());
assert(SrcVT.getSizeInBits() == 2 * VT.getSizeInBits());
- if (Src.MaxElt - Src.MinElt >= NumElts) {
+ if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
// Span too large for a VEXT to cope
return SDValue();
}
- if (Src.MinElt >= NumElts) {
+ if (Src.MinElt >= NumSrcElts) {
// The extraction can just take the second half
Src.ShuffleVec =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
- DAG.getIntPtrConstant(NumElts));
- Src.WindowBase = -NumElts;
- } else if (Src.MaxElt < NumElts) {
+ DAG.getIntPtrConstant(NumSrcElts));
+ Src.WindowBase = -NumSrcElts;
+ } else if (Src.MaxElt < NumSrcElts) {
// The extraction can just take the first half
Src.ShuffleVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT,
Src.ShuffleVec, DAG.getIntPtrConstant(0));
Src.ShuffleVec, DAG.getIntPtrConstant(0));
SDValue VEXTSrc2 =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
- DAG.getIntPtrConstant(NumElts));
+ DAG.getIntPtrConstant(NumSrcElts));
unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
--- /dev/null
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+define <4 x i16> @f(<4 x i32> %vqdmlal_v3.i, <8 x i16> %x5) {
+entry:
+ ; Check that we don't just dup the input vector. The code emitted is ext, dup, ext, ext
+ ; but only match the last three instructions as the first two could be combined to
+ ; a dup2 at some stage.
+ ; CHECK: dup
+ ; CHECK: ext
+ ; CHECK: ext
+ %x4 = extractelement <4 x i32> %vqdmlal_v3.i, i32 2
+ %vgetq_lane = trunc i32 %x4 to i16
+ %vecinit.i = insertelement <4 x i16> undef, i16 %vgetq_lane, i32 0
+ %vecinit2.i = insertelement <4 x i16> %vecinit.i, i16 %vgetq_lane, i32 2
+ %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vgetq_lane, i32 3
+ %vgetq_lane261 = extractelement <8 x i16> %x5, i32 0
+ %vset_lane267 = insertelement <4 x i16> %vecinit3.i, i16 %vgetq_lane261, i32 1
+ ret <4 x i16> %vset_lane267
+}