SDLoc DL(N);
EVT VT = InVec.getValueType();
- unsigned NumElts = VT.getVectorNumElements();
auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
// Insert into out-of-bounds element is undefined.
- if (IndexC && IndexC->getZExtValue() >= VT.getVectorNumElements())
+ if (IndexC && VT.isFixedLengthVector() &&
+ IndexC->getZExtValue() >= VT.getVectorNumElements())
return DAG.getUNDEF(VT);
// Remove redundant insertions:
// If this is variable insert to undef vector, it might be better to splat:
// inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
- SmallVector<SDValue, 8> Ops(NumElts, InVal);
- return DAG.getBuildVector(VT, DL, Ops);
+ if (VT.isScalableVector())
+ return DAG.getSplatVector(VT, DL, InVal);
+ else {
+ SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
+ return DAG.getBuildVector(VT, DL, Ops);
+ }
}
return SDValue();
}
+ if (VT.isScalableVector())
+ return SDValue();
+
+ unsigned NumElts = VT.getVectorNumElements();
+
// We must know which element is being inserted for folds below here.
unsigned Elt = IndexC->getZExtValue();
if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
// 16-element contiguous store
defm : st1<ST1B, ST1B_IMM, nxv16i8, AArch64st1, nxv16i1, nxv16i8, am_sve_regreg_lsl0>;
+ def : Pat<(nxv16i8 (vector_insert (nxv16i8 (undef)), (i32 FPR32:$src), 0)),
+ (INSERT_SUBREG (nxv16i8 (IMPLICIT_DEF)), FPR32:$src, ssub)>;
+ def : Pat<(nxv8i16 (vector_insert (nxv8i16 (undef)), (i32 FPR32:$src), 0)),
+ (INSERT_SUBREG (nxv8i16 (IMPLICIT_DEF)), FPR32:$src, ssub)>;
+ def : Pat<(nxv4i32 (vector_insert (nxv4i32 (undef)), (i32 FPR32:$src), 0)),
+ (INSERT_SUBREG (nxv4i32 (IMPLICIT_DEF)), FPR32:$src, ssub)>;
+ def : Pat<(nxv2i64 (vector_insert (nxv2i64 (undef)), (i64 FPR64:$src), 0)),
+ (INSERT_SUBREG (nxv2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
+
+ // Insert scalar into vector[0]
+ def : Pat<(nxv16i8 (vector_insert (nxv16i8 ZPR:$vec), (i32 GPR32:$src), 0)),
+ (CPY_ZPmR_B ZPR:$vec, (PTRUE_B 1), GPR32:$src)>;
+ def : Pat<(nxv8i16 (vector_insert (nxv8i16 ZPR:$vec), (i32 GPR32:$src), 0)),
+ (CPY_ZPmR_H ZPR:$vec, (PTRUE_H 1), GPR32:$src)>;
+ def : Pat<(nxv4i32 (vector_insert (nxv4i32 ZPR:$vec), (i32 GPR32:$src), 0)),
+ (CPY_ZPmR_S ZPR:$vec, (PTRUE_S 1), GPR32:$src)>;
+ def : Pat<(nxv2i64 (vector_insert (nxv2i64 ZPR:$vec), (i64 GPR64:$src), 0)),
+ (CPY_ZPmR_D ZPR:$vec, (PTRUE_D 1), GPR64:$src)>;
+
+ def : Pat<(nxv8f16 (vector_insert (nxv8f16 ZPR:$vec), (f16 FPR16:$src), 0)),
+ (SEL_ZPZZ_H (PTRUE_H 1), (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), ZPR:$vec)>;
+ def : Pat<(nxv4f32 (vector_insert (nxv4f32 ZPR:$vec), (f32 FPR32:$src), 0)),
+ (SEL_ZPZZ_S (PTRUE_S 1), (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$src, ssub), ZPR:$vec)>;
+ def : Pat<(nxv2f64 (vector_insert (nxv2f64 ZPR:$vec), (f64 FPR64:$src), 0)),
+ (SEL_ZPZZ_D (PTRUE_D 1), (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$src, dsub), ZPR:$vec)>;
+
+ // Insert scalar into vector with scalar index
+ def : Pat<(nxv16i8 (vector_insert (nxv16i8 ZPR:$vec), GPR32:$src, GPR64:$index)),
+ (CPY_ZPmR_B ZPR:$vec,
+ (CMPEQ_PPzZZ_B (PTRUE_B 31),
+ (INDEX_II_B 0, 1),
+ (DUP_ZR_B (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))),
+ GPR32:$src)>;
+ def : Pat<(nxv8i16 (vector_insert (nxv8i16 ZPR:$vec), GPR32:$src, GPR64:$index)),
+ (CPY_ZPmR_H ZPR:$vec,
+ (CMPEQ_PPzZZ_H (PTRUE_H 31),
+ (INDEX_II_H 0, 1),
+ (DUP_ZR_H (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))),
+ GPR32:$src)>;
+ def : Pat<(nxv4i32 (vector_insert (nxv4i32 ZPR:$vec), GPR32:$src, GPR64:$index)),
+ (CPY_ZPmR_S ZPR:$vec,
+ (CMPEQ_PPzZZ_S (PTRUE_S 31),
+ (INDEX_II_S 0, 1),
+ (DUP_ZR_S (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))),
+ GPR32:$src)>;
+ def : Pat<(nxv2i64 (vector_insert (nxv2i64 ZPR:$vec), GPR64:$src, GPR64:$index)),
+ (CPY_ZPmR_D ZPR:$vec,
+ (CMPEQ_PPzZZ_D (PTRUE_D 31),
+ (INDEX_II_D 0, 1),
+ (DUP_ZR_D GPR64:$index)),
+ GPR64:$src)>;
+
+ // Insert FP scalar into vector with scalar index
+ def : Pat<(nxv8f16 (vector_insert (nxv8f16 ZPR:$vec), (f16 FPR16:$src), GPR64:$index)),
+ (CPY_ZPmV_H ZPR:$vec,
+ (CMPEQ_PPzZZ_H (PTRUE_H 31),
+ (INDEX_II_H 0, 1),
+ (DUP_ZR_H (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))),
+ $src)>;
+ def : Pat<(nxv4f32 (vector_insert (nxv4f32 ZPR:$vec), (f32 FPR32:$src), GPR64:$index)),
+ (CPY_ZPmV_S ZPR:$vec,
+ (CMPEQ_PPzZZ_S (PTRUE_S 31),
+ (INDEX_II_S 0, 1),
+ (DUP_ZR_S (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))),
+ $src)>;
+ def : Pat<(nxv2f64 (vector_insert (nxv2f64 ZPR:$vec), (f64 FPR64:$src), GPR64:$index)),
+ (CPY_ZPmV_D ZPR:$vec,
+ (CMPEQ_PPzZZ_D (PTRUE_D 31),
+ (INDEX_II_D 0, 1),
+ (DUP_ZR_D $index)),
+ $src)>;
}
let Predicates = [HasSVE, HasMatMulInt8] in {
--- /dev/null
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+define <vscale x 16 x i8> @test_lane0_16xi8(<vscale x 16 x i8> %a) {
+; CHECK-LABEL: test_lane0_16xi8
+; CHECK: mov [[REG:.*]], #30
+; CHECK: mov z0.b, p{{[0-7]}}/m, [[REG]]
+ %b = insertelement <vscale x 16 x i8> %a, i8 30, i32 0
+ ret <vscale x 16 x i8> %b
+}
+
+define <vscale x 8 x i16> @test_lane0_8xi16(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: test_lane0_8xi16
+; CHECK: mov [[REG:.*]], #30
+; CHECK: mov z0.h, p{{[0-7]}}/m, [[REG]]
+ %b = insertelement <vscale x 8 x i16> %a, i16 30, i32 0
+ ret <vscale x 8 x i16> %b
+}
+
+define <vscale x 4 x i32> @test_lane0_4xi32(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: test_lane0_4xi32
+; CHECK: mov [[REG:.*]], #30
+; CHECK: mov z0.s, p{{[0-7]}}/m, [[REG]]
+ %b = insertelement <vscale x 4 x i32> %a, i32 30, i32 0
+ ret <vscale x 4 x i32> %b
+}
+
+define <vscale x 2 x i64> @test_lane0_2xi64(<vscale x 2 x i64> %a) {
+; CHECK-LABEL: test_lane0_2xi64
+; CHECK: mov w[[REG:.*]], #30
+; CHECK: mov z0.d, p{{[0-7]}}/m, x[[REG]]
+ %b = insertelement <vscale x 2 x i64> %a, i64 30, i32 0
+ ret <vscale x 2 x i64> %b
+}
+
+define <vscale x 2 x double> @test_lane0_2xf64(<vscale x 2 x double> %a) {
+; CHECK-LABEL: test_lane0_2xf64
+; CHECK: fmov d[[REG:[0-9]+]], #1.00000000
+; CHECK: mov z0.d, p{{[0-7]}}/m, z[[REG]].d
+ %b = insertelement <vscale x 2 x double> %a, double 1.0, i32 0
+ ret <vscale x 2 x double> %b
+}
+
+define <vscale x 4 x float> @test_lane0_4xf32(<vscale x 4 x float> %a) {
+; CHECK-LABEL: test_lane0_4xf32
+; CHECK: fmov s[[REG:[0-9]+]], #1.00000000
+; CHECK: mov z0.s, p{{[0-7]}}/m, z[[REG]].s
+ %b = insertelement <vscale x 4 x float> %a, float 1.0, i32 0
+ ret <vscale x 4 x float> %b
+}
+
+define <vscale x 8 x half> @test_lane0_8xf16(<vscale x 8 x half> %a) {
+; CHECK-LABEL: test_lane0_8xf16
+; CHECK: fmov h[[REG:[0-9]+]], #1.00000000
+; CHECK: mov z0.h, p{{[0-7]}}/m, z[[REG]].h
+ %b = insertelement <vscale x 8 x half> %a, half 1.0, i32 0
+ ret <vscale x 8 x half> %b
+}
+
+; Undefined lane insert
+define <vscale x 2 x i64> @test_lane4_2xi64(<vscale x 2 x i64> %a) {
+; CHECK-LABEL: test_lane4_2xi64
+; CHECK: mov w[[IDXREG:.*]], #4
+; CHECK: index z[[CMPVEC:[0-9]+]].d, #0, #1
+; CHECK: mov z[[IDXVEC:[0-9]+]].d, x[[IDXREG]]
+; CHECK: cmpeq p[[PRED:[0-9]+]].d, p{{[0-7]}}/z, z[[CMPVEC]].d, z[[IDXVEC]].d
+; CHECK: mov w[[VALREG:.*]], #30
+; CHECK: mov z0.d, p[[PRED]]/m, x[[VALREG]]
+ %b = insertelement <vscale x 2 x i64> %a, i64 30, i32 4
+ ret <vscale x 2 x i64> %b
+}
+
+; Undefined lane insert
+define <vscale x 8 x half> @test_lane9_8xf16(<vscale x 8 x half> %a) {
+; CHECK-LABEL: test_lane9_8xf16
+; CHECK: mov w[[IDXREG:.*]], #9
+; CHECK: index z[[CMPVEC:[0-9]+]].h, #0, #1
+; CHECK: mov z[[IDXVEC:[0-9]+]].h, w[[IDXREG]]
+; CHECK: cmpeq p[[PRED:[0-9]+]].h, p{{[0-7]}}/z, z[[CMPVEC]].h, z[[IDXVEC]].h
+; CHECK: fmov h[[VALREG:[0-9]+]], #1.00000000
+; CHECK: mov z0.h, p[[PRED]]/m, h[[VALREG]]
+ %b = insertelement <vscale x 8 x half> %a, half 1.0, i32 9
+ ret <vscale x 8 x half> %b
+}
+
+define <vscale x 16 x i8> @test_lane1_16xi8(<vscale x 16 x i8> %a) {
+; CHECK-LABEL: test_lane1_16xi8
+; CHECK: mov w[[IDXREG:.*]], #1
+; CHECK: index z[[CMPVEC:[0-9]+]].b, #0, #1
+; CHECK: mov z[[IDXVEC:[0-9]+]].b, w[[IDXREG]]
+; CHECK: cmpeq p[[PRED:[0-9]+]].b, p{{[0-7]}}/z, z[[CMPVEC]].b, z[[IDXVEC]].b
+; CHECK: mov w[[VALREG:.*]], #30
+; CHECK: mov z0.b, p[[PRED]]/m, w[[VALREG]]
+ %b = insertelement <vscale x 16 x i8> %a, i8 30, i32 1
+ ret <vscale x 16 x i8> %b
+}
+
+define <vscale x 16 x i8> @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) {
+; CHECK-LABEL: test_lanex_16xi8
+; CHECK: index z[[CMPVEC:[0-9]+]].b, #0, #1
+; CHECK: mov z[[IDXVEC:[0-9]+]].b, w[[IDXREG]]
+; CHECK: cmpeq p[[PRED:[0-9]+]].b, p{{[0-7]}}/z, z[[CMPVEC]].b, z[[IDXVEC]].b
+; CHECK: mov w[[VALREG:.*]], #30
+; CHECK: mov z0.b, p[[PRED]]/m, w[[VALREG]]
+ %b = insertelement <vscale x 16 x i8> %a, i8 30, i32 %x
+ ret <vscale x 16 x i8> %b
+}
+
+
+; Redundant lane insert
+define <vscale x 4 x i32> @extract_insert_4xi32(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: extract_insert_4xi32
+; CHECK-NOT: mov w{{.*}}, #30
+; CHECK-NOT: mov z0.d
+ %b = extractelement <vscale x 4 x i32> %a, i32 2
+ %c = insertelement <vscale x 4 x i32> %a, i32 %b, i32 2
+ ret <vscale x 4 x i32> %c
+}
+
+define <vscale x 8 x i16> @test_lane6_undef_8xi16(i16 %a) {
+; CHECK-LABEL: test_lane6_undef_8xi16
+; CHECK: mov w[[IDXREG:.*]], #6
+; CHECK: index z[[CMPVEC:.*]].h, #0, #1
+; CHECK: mov z[[IDXVEC:[0-9]+]].h, w[[IDXREG]]
+; CHECK: cmpeq p[[PRED:.*]].h, p{{.*}}/z, z[[CMPVEC]].h, z[[IDXVEC]].h
+; CHECK: mov z0.h, p[[PRED]]/m, w0
+ %b = insertelement <vscale x 8 x i16> undef, i16 %a, i32 6
+ ret <vscale x 8 x i16> %b
+}
+
+define <vscale x 16 x i8> @test_lane0_undef_16xi8(i8 %a) {
+; CHECK-LABEL: test_lane0_undef_16xi8
+; CHECK: fmov s0, w0
+ %b = insertelement <vscale x 16 x i8> undef, i8 %a, i32 0
+ ret <vscale x 16 x i8> %b
+}