[CodeGen] Fix issues with subvector intrinsic index types

author Fraser Cormack <fraser@codeplay.com>

Thu, 25 Feb 2021 09:50:20 +0000 (09:50 +0000)

committer Fraser Cormack <fraser@codeplay.com>

Mon, 1 Mar 2021 10:28:21 +0000 (10:28 +0000)
author Fraser Cormack <fraser@codeplay.com>
Thu, 25 Feb 2021 09:50:20 +0000 (09:50 +0000)
committer Fraser Cormack <fraser@codeplay.com>
Mon, 1 Mar 2021 10:28:21 +0000 (10:28 +0000)
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h

index dbe6f9e..d344836 100644 (file)
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -520,7 +520,9 @@ enum NodeType {
    /// The elements of VECTOR1 starting at IDX are overwritten with VECTOR2.
    /// Elements IDX through (IDX + num_elements(T) - 1) must be valid VECTOR1
    /// indices. If this condition cannot be determined statically but is false at
-  /// runtime, then the result vector is undefined.
+  /// runtime, then the result vector is undefined. The IDX parameter must be a
+  /// vector index constant type, which for most targets will be an integer
+  /// pointer type.
    ///
    /// This operation supports inserting a fixed-width vector into a scalable
    /// vector, but not the other way around.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

index a571ee3..0c592b2 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5608,9 +5608,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
                  N1VT.getVectorMinNumElements()) &&
             "Extract subvector overflow!");
      assert(N2C->getAPIntValue().getBitWidth() ==
-               TLI->getVectorIdxTy(getDataLayout())
-                   .getSizeInBits()
-                   .getFixedSize() &&
+               TLI->getVectorIdxTy(getDataLayout()).getFixedSizeInBits() &&
             "Constant index for EXTRACT_SUBVECTOR has an invalid size");
  
      // Trivial extraction.
@@ -5830,6 +5828,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
               cast<ConstantSDNode>(N3)->getZExtValue()) <=
                  VT.getVectorMinNumElements()) &&
             "Insert subvector overflow!");
+    assert(cast<ConstantSDNode>(N3)->getAPIntValue().getBitWidth() ==
+               TLI->getVectorIdxTy(getDataLayout()).getFixedSizeInBits() &&
+           "Constant index for INSERT_SUBVECTOR has an invalid size");
  
      // Trivial insertion.
      if (VT == N2VT)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

index b9c5ca2..39730d0 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -7009,6 +7009,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
      SDValue Vec = getValue(I.getOperand(0));
      SDValue SubVec = getValue(I.getOperand(1));
      SDValue Index = getValue(I.getOperand(2));
+
+    // The intrinsic's index type is i64, but the SDNode requires an index type
+    // suitable for the target. Convert the index as required.
+    MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+    if (Index.getValueType() != VectorIdxTy)
+      Index = DAG.getVectorIdxConstant(
+          cast<ConstantSDNode>(Index)->getZExtValue(), DL);
+
      EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
      setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResultVT, Vec, SubVec,
                               Index));
@@ -7021,6 +7029,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
      SDValue Index = getValue(I.getOperand(1));
      EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
  
+    // The intrinsic's index type is i64, but the SDNode requires an index type
+    // suitable for the target. Convert the index as required.
+    MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+    if (Index.getValueType() != VectorIdxTy)
+      Index = DAG.getVectorIdxConstant(
+          cast<ConstantSDNode>(Index)->getZExtValue(), DL);
+
      setValue(&I, DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, Index));
      return;
    }
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

index f3ca688..2e8752f 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8023,7 +8023,7 @@ static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) {
    SDLoc DL(V64Reg);
  
    return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
-                     V64Reg, DAG.getConstant(0, DL, MVT::i32));
+                     V64Reg, DAG.getConstant(0, DL, MVT::i64));
  }
  
  /// getExtFactor - Determine the adjustment factor for the position when
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td

index 938bd66..08fdeec 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -10517,7 +10517,7 @@ multiclass SIMDIndexedSQRDMLxHSDTied<bit U, bits<4> opc, string asm,
                                                   (v2i32 (AArch64duplane32
                                                            (v4i32 V128:$Rm),
                                                            VectorIndexS:$idx)))),
-                                      (i32 0))),
+                                      (i64 0))),
                                 (i64 0))))),
              (EXTRACT_SUBREG
                  (v2i32 (!cast<Instruction>(NAME # v2i32_indexed)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td

index c1e1c4a..05d3e88 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5607,7 +5607,7 @@ def : Pat<(v4i32 (opNode V128:$Rn)),
  
  // If none did, fallback to the explicit patterns, consuming the vector_extract.
  def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)),
-            (i32 0)), (i64 0))),
+            (i64 0)), (i64 0))),
            (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
              (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn),
              bsub), ssub)>;
@@ -5616,7 +5616,7 @@ def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))),
              (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn),
              bsub), ssub)>;
  def : Pat<(i32 (vector_extract (insert_subvector undef,
-            (v4i16 (opNode V64:$Rn)), (i32 0)), (i64 0))),
+            (v4i16 (opNode V64:$Rn)), (i64 0)), (i64 0))),
            (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
              (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn),
              hsub), ssub)>;
@@ -5637,7 +5637,7 @@ multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc,
  // If there is a sign extension after this intrinsic, consume it as smov already
  // performed it
  def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
-            (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), i8)),
+            (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), i8)),
            (i32 (SMOVvi8to32
              (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
                (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
@@ -5649,7 +5649,7 @@ def : Pat<(i32 (sext_inreg (i32 (vector_extract
               (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
              (i64 0)))>;
  def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
-            (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), i16)),
+            (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), i16)),
            (i32 (SMOVvi16to32
             (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
              (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
@@ -5668,7 +5668,7 @@ multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc,
  // If there is a masking operation keeping only what has been actually
  // generated, consume it.
  def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
-            (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), maski8_or_more)),
+            (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), maski8_or_more)),
        (i32 (EXTRACT_SUBREG
          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
@@ -5680,7 +5680,7 @@ def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))),
              (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
            ssub))>;
  def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
-            (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), maski16_or_more)),
+            (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), maski16_or_more)),
            (i32 (EXTRACT_SUBREG
              (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
                (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
@@ -6003,7 +6003,7 @@ multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
                             (v2f32 (AArch64duplane32
                                        (v4f32 (insert_subvector undef,
                                                   (v2f32 (fneg V64:$Rm)),
-                                                 (i32 0))),
+                                                 (i64 0))),
                                        VectorIndexS:$idx)))),
              (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
                                 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
@@ -6024,7 +6024,7 @@ multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
                             (v4f32 (AArch64duplane32
                                        (v4f32 (insert_subvector undef,
                                                   (v2f32 (fneg V64:$Rm)),
-                                                 (i32 0))),
+                                                 (i64 0))),
                                        VectorIndexS:$idx)))),
              (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
                                 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
@@ -6055,7 +6055,7 @@ multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
    def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
                           (vector_extract (v4f32 (insert_subvector undef,
                                                      (v2f32 (fneg V64:$Rm)),
-                                                    (i32 0))),
+                                                    (i64 0))),
                                           VectorIndexS:$idx))),
              (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
                  (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>;
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll

index 99d55b3..53b014b 100644 (file)
--- a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
@@ -97,12 +97,12 @@ define i8 @test_v9i8(<9 x i8> %a) nounwind {
  ; CHECK-LABEL: test_v9i8:
  ; CHECK:       // %bb.0:
  ; CHECK-NEXT:    mov w8, #-1
-; CHECK-NEXT:    mov v0.b[9], w8
-; CHECK-NEXT:    mov v0.b[10], w8
-; CHECK-NEXT:    mov v0.b[11], w8
-; CHECK-NEXT:    mov v0.b[12], w8
-; CHECK-NEXT:    mov v0.b[13], w8
-; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    mov v1.b[9], w8
+; CHECK-NEXT:    mov v1.b[10], w8
+; CHECK-NEXT:    mov v1.b[11], w8
+; CHECK-NEXT:    mov v1.b[13], w8
+; CHECK-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
  ; CHECK-NEXT:    and v1.8b, v0.8b, v1.8b
  ; CHECK-NEXT:    umov w8, v1.b[1]
  ; CHECK-NEXT:    umov w9, v1.b[0]
diff --git a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll

index a28fc4c..f297afc 100644 (file)
--- a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
@@ -1,4 +1,5 @@
  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple riscv32 -mattr=+m,+d,+experimental-zfh,+experimental-v -verify-machineinstrs < %s | FileCheck %s
  ; RUN: llc -mtriple riscv64 -mattr=+m,+d,+experimental-zfh,+experimental-v -verify-machineinstrs < %s | FileCheck %s
  
  define <vscale x 4 x i32> @extract_nxv8i32_nxv4i32_0(<vscale x 8 x i32> %vec) {
author	Fraser Cormack <fraser@codeplay.com>
	Thu, 25 Feb 2021 09:50:20 +0000 (09:50 +0000)
committer	Fraser Cormack <fraser@codeplay.com>
	Mon, 1 Mar 2021 10:28:21 +0000 (10:28 +0000)
llvm/include/llvm/CodeGen/ISDOpcodes.h		patch \| blob \| history
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp		patch \| blob \| history
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp		patch \| blob \| history
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/AArch64/AArch64InstrFormats.td		patch \| blob \| history
llvm/lib/Target/AArch64/AArch64InstrInfo.td		patch \| blob \| history
llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll		patch \| blob \| history
llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll		patch \| blob \| history