[CodeGen] Add support for inserting elements into scalable vectors

author David Sherwood <david.sherwood@arm.com>

Fri, 24 Apr 2020 06:55:53 +0000 (07:55 +0100)

committer David Sherwood <david.sherwood@arm.com>

Thu, 30 Apr 2020 10:14:04 +0000 (11:14 +0100)
author David Sherwood <david.sherwood@arm.com>
Fri, 24 Apr 2020 06:55:53 +0000 (07:55 +0100)
committer David Sherwood <david.sherwood@arm.com>
Thu, 30 Apr 2020 10:14:04 +0000 (11:14 +0100)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 1a8242a..9042f51 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17134,11 +17134,11 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
    SDLoc DL(N);
  
    EVT VT = InVec.getValueType();
-  unsigned NumElts = VT.getVectorNumElements();
    auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
  
    // Insert into out-of-bounds element is undefined.
-  if (IndexC && IndexC->getZExtValue() >= VT.getVectorNumElements())
+  if (IndexC && VT.isFixedLengthVector() &&
+      IndexC->getZExtValue() >= VT.getVectorNumElements())
      return DAG.getUNDEF(VT);
  
    // Remove redundant insertions:
@@ -17151,12 +17151,21 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
      // If this is variable insert to undef vector, it might be better to splat:
      // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
      if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
-      SmallVector<SDValue, 8> Ops(NumElts, InVal);
-      return DAG.getBuildVector(VT, DL, Ops);
+      if (VT.isScalableVector())
+        return DAG.getSplatVector(VT, DL, InVal);
+      else {
+        SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
+        return DAG.getBuildVector(VT, DL, Ops);
+      }
      }
      return SDValue();
    }
  
+  if (VT.isScalableVector())
+    return SDValue();
+
+  unsigned NumElts = VT.getVectorNumElements();
+
    // We must know which element is being inserted for folds below here.
    unsigned Elt = IndexC->getZExtValue();
    if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

index 4d89bee..7929682 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5615,8 +5615,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
      llvm_unreachable("should use getVectorShuffle constructor!");
    case ISD::INSERT_VECTOR_ELT: {
      ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3);
-    // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF
-    if (N3C && N3C->getZExtValue() >= N1.getValueType().getVectorNumElements())
+    // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF, except
+    // for scalable vectors where we will generate appropriate code to
+    // deal with out-of-bounds cases correctly.
+    if (N3C && N1.getValueType().isFixedLengthVector() &&
+        N3C->getZExtValue() >= N1.getValueType().getVectorNumElements())
        return getUNDEF(VT);
  
      // Undefined index can be assumed out-of-bounds, so that's UNDEF too.
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

index 7864cc1..ed79063 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1767,6 +1767,77 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
    // 16-element contiguous store
    defm : st1<ST1B, ST1B_IMM,   nxv16i8, AArch64st1, nxv16i1, nxv16i8, am_sve_regreg_lsl0>;
  
+  def : Pat<(nxv16i8 (vector_insert (nxv16i8 (undef)), (i32 FPR32:$src), 0)),
+            (INSERT_SUBREG (nxv16i8 (IMPLICIT_DEF)), FPR32:$src, ssub)>;
+  def : Pat<(nxv8i16 (vector_insert (nxv8i16 (undef)), (i32 FPR32:$src), 0)),
+            (INSERT_SUBREG (nxv8i16 (IMPLICIT_DEF)), FPR32:$src, ssub)>;
+  def : Pat<(nxv4i32 (vector_insert (nxv4i32 (undef)), (i32 FPR32:$src), 0)),
+            (INSERT_SUBREG (nxv4i32 (IMPLICIT_DEF)), FPR32:$src, ssub)>;
+  def : Pat<(nxv2i64 (vector_insert (nxv2i64 (undef)), (i64 FPR64:$src), 0)),
+            (INSERT_SUBREG (nxv2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
+
+  // Insert scalar into vector[0]
+  def : Pat<(nxv16i8 (vector_insert (nxv16i8 ZPR:$vec), (i32 GPR32:$src), 0)),
+            (CPY_ZPmR_B ZPR:$vec, (PTRUE_B 1), GPR32:$src)>;
+  def : Pat<(nxv8i16 (vector_insert (nxv8i16 ZPR:$vec), (i32 GPR32:$src), 0)),
+            (CPY_ZPmR_H ZPR:$vec, (PTRUE_H 1), GPR32:$src)>;
+  def : Pat<(nxv4i32 (vector_insert (nxv4i32 ZPR:$vec), (i32 GPR32:$src), 0)),
+            (CPY_ZPmR_S ZPR:$vec, (PTRUE_S 1), GPR32:$src)>;
+  def : Pat<(nxv2i64 (vector_insert (nxv2i64 ZPR:$vec), (i64 GPR64:$src), 0)),
+            (CPY_ZPmR_D ZPR:$vec, (PTRUE_D 1), GPR64:$src)>;
+
+  def : Pat<(nxv8f16 (vector_insert (nxv8f16 ZPR:$vec), (f16 FPR16:$src), 0)),
+            (SEL_ZPZZ_H (PTRUE_H 1), (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), ZPR:$vec)>;
+  def : Pat<(nxv4f32 (vector_insert (nxv4f32 ZPR:$vec), (f32 FPR32:$src), 0)),
+            (SEL_ZPZZ_S (PTRUE_S 1), (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$src, ssub), ZPR:$vec)>;
+  def : Pat<(nxv2f64 (vector_insert (nxv2f64 ZPR:$vec), (f64 FPR64:$src), 0)),
+            (SEL_ZPZZ_D (PTRUE_D 1), (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$src, dsub), ZPR:$vec)>;
+
+  // Insert scalar into vector with scalar index
+  def : Pat<(nxv16i8 (vector_insert (nxv16i8 ZPR:$vec), GPR32:$src, GPR64:$index)),
+            (CPY_ZPmR_B ZPR:$vec,
+                        (CMPEQ_PPzZZ_B (PTRUE_B 31),
+                                       (INDEX_II_B 0, 1),
+                                       (DUP_ZR_B (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))),
+                        GPR32:$src)>;
+  def : Pat<(nxv8i16 (vector_insert (nxv8i16 ZPR:$vec), GPR32:$src, GPR64:$index)),
+            (CPY_ZPmR_H ZPR:$vec,
+                        (CMPEQ_PPzZZ_H (PTRUE_H 31),
+                                       (INDEX_II_H 0, 1),
+                                       (DUP_ZR_H (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))),
+                        GPR32:$src)>;
+  def : Pat<(nxv4i32 (vector_insert (nxv4i32 ZPR:$vec), GPR32:$src, GPR64:$index)),
+            (CPY_ZPmR_S ZPR:$vec,
+                        (CMPEQ_PPzZZ_S (PTRUE_S 31),
+                                       (INDEX_II_S 0, 1),
+                                       (DUP_ZR_S (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))),
+                        GPR32:$src)>;
+  def : Pat<(nxv2i64 (vector_insert (nxv2i64 ZPR:$vec), GPR64:$src, GPR64:$index)),
+            (CPY_ZPmR_D ZPR:$vec,
+                        (CMPEQ_PPzZZ_D (PTRUE_D 31),
+                                       (INDEX_II_D 0, 1),
+                                       (DUP_ZR_D GPR64:$index)),
+                        GPR64:$src)>;
+
+  // Insert FP scalar into vector with scalar index
+  def : Pat<(nxv8f16 (vector_insert (nxv8f16 ZPR:$vec), (f16 FPR16:$src), GPR64:$index)),
+            (CPY_ZPmV_H ZPR:$vec,
+                        (CMPEQ_PPzZZ_H (PTRUE_H 31),
+                                       (INDEX_II_H 0, 1),
+                                       (DUP_ZR_H (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))),
+                        $src)>;
+  def : Pat<(nxv4f32 (vector_insert (nxv4f32 ZPR:$vec), (f32 FPR32:$src), GPR64:$index)),
+            (CPY_ZPmV_S ZPR:$vec,
+                        (CMPEQ_PPzZZ_S (PTRUE_S 31),
+                                       (INDEX_II_S 0, 1),
+                                       (DUP_ZR_S (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))),
+                        $src)>;
+  def : Pat<(nxv2f64 (vector_insert (nxv2f64 ZPR:$vec), (f64 FPR64:$src), GPR64:$index)),
+            (CPY_ZPmV_D ZPR:$vec,
+                        (CMPEQ_PPzZZ_D (PTRUE_D 31),
+                                       (INDEX_II_D 0, 1),
+                                       (DUP_ZR_D $index)),
+                        $src)>;
  }
  
  let Predicates = [HasSVE, HasMatMulInt8] in {
diff --git a/llvm/test/CodeGen/AArch64/sve-insert-element.ll b/llvm/test/CodeGen/AArch64/sve-insert-element.ll

new file mode 100644 (file)

index 0000000..90acf8c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-insert-element.ll
@@ -0,0 +1,135 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+define <vscale x 16 x i8> @test_lane0_16xi8(<vscale x 16 x i8> %a) {
+; CHECK-LABEL: test_lane0_16xi8
+; CHECK:       mov [[REG:.*]], #30
+; CHECK:       mov z0.b, p{{[0-7]}}/m, [[REG]]
+  %b = insertelement <vscale x 16 x i8> %a, i8 30, i32 0
+  ret <vscale x 16 x i8> %b
+}
+
+define <vscale x 8 x i16> @test_lane0_8xi16(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: test_lane0_8xi16
+; CHECK:       mov [[REG:.*]], #30
+; CHECK:       mov z0.h, p{{[0-7]}}/m, [[REG]]
+  %b = insertelement <vscale x 8 x i16> %a, i16 30, i32 0
+  ret <vscale x 8 x i16> %b
+}
+
+define <vscale x 4 x i32> @test_lane0_4xi32(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: test_lane0_4xi32
+; CHECK:       mov [[REG:.*]], #30
+; CHECK:       mov z0.s, p{{[0-7]}}/m, [[REG]]
+  %b = insertelement <vscale x 4 x i32> %a, i32 30, i32 0
+  ret <vscale x 4 x i32> %b
+}
+
+define <vscale x 2 x i64> @test_lane0_2xi64(<vscale x 2 x i64> %a) {
+; CHECK-LABEL: test_lane0_2xi64
+; CHECK:       mov w[[REG:.*]], #30
+; CHECK:       mov z0.d, p{{[0-7]}}/m, x[[REG]]
+  %b = insertelement <vscale x 2 x i64> %a, i64 30, i32 0
+  ret <vscale x 2 x i64> %b
+}
+
+define <vscale x 2 x double> @test_lane0_2xf64(<vscale x 2 x double> %a) {
+; CHECK-LABEL: test_lane0_2xf64
+; CHECK:       fmov d[[REG:[0-9]+]], #1.00000000
+; CHECK:       mov z0.d, p{{[0-7]}}/m, z[[REG]].d
+  %b = insertelement <vscale x 2 x double> %a, double 1.0, i32 0
+  ret <vscale x 2 x double> %b
+}
+
+define <vscale x 4 x float> @test_lane0_4xf32(<vscale x 4 x float> %a) {
+; CHECK-LABEL: test_lane0_4xf32
+; CHECK:       fmov s[[REG:[0-9]+]], #1.00000000
+; CHECK:       mov z0.s, p{{[0-7]}}/m, z[[REG]].s
+  %b = insertelement <vscale x 4 x float> %a, float 1.0, i32 0
+  ret <vscale x 4 x float> %b
+}
+
+define <vscale x 8 x half> @test_lane0_8xf16(<vscale x 8 x half> %a) {
+; CHECK-LABEL: test_lane0_8xf16
+; CHECK:       fmov h[[REG:[0-9]+]], #1.00000000
+; CHECK:       mov z0.h, p{{[0-7]}}/m, z[[REG]].h
+  %b = insertelement <vscale x 8 x half> %a, half 1.0, i32 0
+  ret <vscale x 8 x half> %b
+}
+
+; Undefined lane insert
+define <vscale x 2 x i64> @test_lane4_2xi64(<vscale x 2 x i64> %a) {
+; CHECK-LABEL: test_lane4_2xi64
+; CHECK:       mov w[[IDXREG:.*]], #4
+; CHECK:       index z[[CMPVEC:[0-9]+]].d, #0, #1
+; CHECK:       mov z[[IDXVEC:[0-9]+]].d, x[[IDXREG]]
+; CHECK:       cmpeq p[[PRED:[0-9]+]].d, p{{[0-7]}}/z, z[[CMPVEC]].d, z[[IDXVEC]].d
+; CHECK:       mov w[[VALREG:.*]], #30
+; CHECK:       mov z0.d, p[[PRED]]/m, x[[VALREG]]
+  %b = insertelement <vscale x 2 x i64> %a, i64 30, i32 4
+  ret <vscale x 2 x i64> %b
+}
+
+; Undefined lane insert
+define <vscale x 8 x half> @test_lane9_8xf16(<vscale x 8 x half> %a) {
+; CHECK-LABEL: test_lane9_8xf16
+; CHECK:       mov w[[IDXREG:.*]], #9
+; CHECK:       index z[[CMPVEC:[0-9]+]].h, #0, #1
+; CHECK:       mov z[[IDXVEC:[0-9]+]].h, w[[IDXREG]]
+; CHECK:       cmpeq p[[PRED:[0-9]+]].h, p{{[0-7]}}/z, z[[CMPVEC]].h, z[[IDXVEC]].h
+; CHECK:       fmov h[[VALREG:[0-9]+]], #1.00000000
+; CHECK:       mov z0.h, p[[PRED]]/m, h[[VALREG]]
+  %b = insertelement <vscale x 8 x half> %a, half 1.0, i32 9
+  ret <vscale x 8 x half> %b
+}
+
+define <vscale x 16 x i8> @test_lane1_16xi8(<vscale x 16 x i8> %a) {
+; CHECK-LABEL: test_lane1_16xi8
+; CHECK:       mov w[[IDXREG:.*]], #1
+; CHECK:       index z[[CMPVEC:[0-9]+]].b, #0, #1
+; CHECK:       mov z[[IDXVEC:[0-9]+]].b, w[[IDXREG]]
+; CHECK:       cmpeq p[[PRED:[0-9]+]].b, p{{[0-7]}}/z, z[[CMPVEC]].b, z[[IDXVEC]].b
+; CHECK:       mov w[[VALREG:.*]], #30
+; CHECK:       mov z0.b, p[[PRED]]/m, w[[VALREG]]
+  %b = insertelement <vscale x 16 x i8> %a, i8 30, i32 1
+  ret <vscale x 16 x i8> %b
+}
+
+define <vscale x 16 x i8> @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) {
+; CHECK-LABEL: test_lanex_16xi8
+; CHECK:       index z[[CMPVEC:[0-9]+]].b, #0, #1
+; CHECK:       mov z[[IDXVEC:[0-9]+]].b, w[[IDXREG]]
+; CHECK:       cmpeq p[[PRED:[0-9]+]].b, p{{[0-7]}}/z, z[[CMPVEC]].b, z[[IDXVEC]].b
+; CHECK:       mov w[[VALREG:.*]], #30
+; CHECK:       mov z0.b, p[[PRED]]/m, w[[VALREG]]
+  %b = insertelement <vscale x 16 x i8> %a, i8 30, i32 %x
+  ret <vscale x 16 x i8> %b
+}
+
+
+; Redundant lane insert
+define <vscale x 4 x i32> @extract_insert_4xi32(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: extract_insert_4xi32
+; CHECK-NOT:   mov w{{.*}}, #30
+; CHECK-NOT:   mov z0.d
+  %b = extractelement <vscale x 4 x i32> %a, i32 2
+  %c = insertelement <vscale x 4 x i32> %a, i32 %b, i32 2
+  ret <vscale x 4 x i32> %c
+}
+
+define <vscale x 8 x i16> @test_lane6_undef_8xi16(i16 %a) {
+; CHECK-LABEL: test_lane6_undef_8xi16
+; CHECK:       mov w[[IDXREG:.*]], #6
+; CHECK:       index z[[CMPVEC:.*]].h, #0, #1
+; CHECK:       mov z[[IDXVEC:[0-9]+]].h, w[[IDXREG]]
+; CHECK:       cmpeq p[[PRED:.*]].h, p{{.*}}/z, z[[CMPVEC]].h, z[[IDXVEC]].h
+; CHECK:       mov z0.h, p[[PRED]]/m, w0
+  %b = insertelement <vscale x 8 x i16> undef, i16 %a, i32 6
+  ret <vscale x 8 x i16> %b
+}
+
+define <vscale x 16 x i8> @test_lane0_undef_16xi8(i8 %a) {
+; CHECK-LABEL: test_lane0_undef_16xi8
+; CHECK:       fmov s0, w0
+  %b = insertelement <vscale x 16 x i8> undef, i8 %a, i32 0
+  ret <vscale x 16 x i8> %b
+}
author	David Sherwood <david.sherwood@arm.com>
	Fri, 24 Apr 2020 06:55:53 +0000 (07:55 +0100)
committer	David Sherwood <david.sherwood@arm.com>
	Thu, 30 Apr 2020 10:14:04 +0000 (11:14 +0100)
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp		patch \| blob \| history
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td		patch \| blob \| history
llvm/test/CodeGen/AArch64/sve-insert-element.ll	[new file with mode: 0644]	patch \| blob