[SVE] Lower scalable vector mul operations.

author Paul Walker <paul.walker@arm.com>

Wed, 5 Aug 2020 16:19:54 +0000 (17:19 +0100)

committer Paul Walker <paul.walker@arm.com>

Thu, 6 Aug 2020 10:15:35 +0000 (11:15 +0100)
author Paul Walker <paul.walker@arm.com>
Wed, 5 Aug 2020 16:19:54 +0000 (17:19 +0100)
committer Paul Walker <paul.walker@arm.com>
Thu, 6 Aug 2020 10:15:35 +0000 (11:15 +0100)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

index cd54834..fcac5c7 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -9895,6 +9895,9 @@ SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) {
      if (GA->getOpcode() == ISD::GlobalAddress &&
          TLI->isOffsetFoldingLegal(GA))
        return GA;
+  if ((N.getOpcode() == ISD::SPLAT_VECTOR) &&
+      isa<ConstantSDNode>(N.getOperand(0)))
+    return N.getNode();
    return nullptr;
  }
  
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

index ce7cb3e..7a3e86b 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -921,6 +921,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
      for (MVT VT : MVT::integer_scalable_vector_valuetypes()) {
        if (isTypeLegal(VT)) {
          setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+        setOperationAction(ISD::MUL, VT, Custom);
          setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
          setOperationAction(ISD::SELECT, VT, Custom);
          setOperationAction(ISD::SDIV, VT, Custom);
@@ -3102,7 +3103,7 @@ SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
    // If SVE is available then i64 vector multiplications can also be made legal.
    bool OverrideNEON = VT == MVT::v2i64 || VT == MVT::v1i64;
  
-  if (useSVEForFixedLengthVectorVT(VT, OverrideNEON))
+  if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON))
      return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED, OverrideNEON);
  
    // Multiplications are only custom-lowered for 128-bit vectors so that
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

index bedbc7f..178cd25 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -291,23 +291,13 @@ let Predicates = [HasSVE] in {
    defm UMAX_ZI   : sve_int_arith_imm1_unsigned<0b01, "umax", AArch64umax_p>;
    defm UMIN_ZI   : sve_int_arith_imm1_unsigned<0b11, "umin", AArch64umin_p>;
  
-  defm MUL_ZI     : sve_int_arith_imm2<"mul", mul>;
+  defm MUL_ZI     : sve_int_arith_imm2<"mul", AArch64mul_p>;
    defm MUL_ZPmZ   : sve_int_bin_pred_arit_2<0b000, "mul",   "MUL_ZPZZ",   int_aarch64_sve_mul,   DestructiveBinaryComm>;
    defm SMULH_ZPmZ : sve_int_bin_pred_arit_2<0b010, "smulh", "SMULH_ZPZZ", int_aarch64_sve_smulh, DestructiveBinaryComm>;
    defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh", "UMULH_ZPZZ", int_aarch64_sve_umulh, DestructiveBinaryComm>;
  
    defm MUL_ZPZZ   : sve_int_bin_pred_bhsd<AArch64mul_p>;
  
-  // Add unpredicated alternative for the mul instruction.
-  def : Pat<(mul nxv16i8:$Op1, nxv16i8:$Op2),
-            (MUL_ZPmZ_B (PTRUE_B 31), $Op1, $Op2)>;
-  def : Pat<(mul nxv8i16:$Op1, nxv8i16:$Op2),
-            (MUL_ZPmZ_H (PTRUE_H 31), $Op1, $Op2)>;
-  def : Pat<(mul nxv4i32:$Op1, nxv4i32:$Op2),
-            (MUL_ZPmZ_S (PTRUE_S 31), $Op1, $Op2)>;
-  def : Pat<(mul nxv2i64:$Op1, nxv2i64:$Op2),
-            (MUL_ZPmZ_D (PTRUE_D 31), $Op1, $Op2)>;
-
    defm SDIV_ZPmZ  : sve_int_bin_pred_arit_2_div<0b100, "sdiv",  "SDIV_ZPZZ", int_aarch64_sve_sdiv, DestructiveBinaryCommWithRev, "SDIVR_ZPmZ">;
    defm UDIV_ZPmZ  : sve_int_bin_pred_arit_2_div<0b101, "udiv",  "UDIV_ZPZZ", int_aarch64_sve_udiv, DestructiveBinaryCommWithRev, "UDIVR_ZPmZ">;
    defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr", "SDIVR_ZPZZ", int_aarch64_sve_sdivr, DestructiveBinaryCommWithRev, "SDIV_ZPmZ", /*isReverseInstr*/ 1>;
@@ -2227,10 +2217,10 @@ let Predicates = [HasSVE2] in {
    defm SQRDMULH_ZZZ : sve2_int_mul<0b101, "sqrdmulh", int_aarch64_sve_sqrdmulh>;
  
    // SVE2 integer multiply vectors (unpredicated)
-  defm MUL_ZZZ    : sve2_int_mul<0b000,  "mul",   mul>;
+  defm MUL_ZZZ    : sve2_int_mul<0b000,  "mul",   null_frag, AArch64mul_p>;
    defm SMULH_ZZZ  : sve2_int_mul<0b010,  "smulh", null_frag>;
    defm UMULH_ZZZ  : sve2_int_mul<0b011,  "umulh", null_frag>;
-  defm PMUL_ZZZ   : sve2_int_mul_single<0b001, "pmul",  int_aarch64_sve_pmul>;
+  defm PMUL_ZZZ   : sve2_int_mul_single<0b001, "pmul", int_aarch64_sve_pmul>;
  
    // Add patterns for unpredicated version of smulh and umulh.
    def : Pat<(nxv16i8 (int_aarch64_sve_smulh (nxv16i1 (AArch64ptrue 31)), nxv16i8:$Op1, nxv16i8:$Op2)),
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td

index 8b5722e..d202951 100644 (file)
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -315,11 +315,6 @@ class SVE_1_Op_Imm_OptLsl_Pat<ValueType vt, SDPatternOperator op, ZPRRegOp zprty
    : Pat<(vt (op (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))))),
          (inst $Op1, i32:$imm, i32:$shift)>;
  
-class SVE_1_Op_Imm_Arith_Pat<ValueType vt, SDPatternOperator op, ZPRRegOp zprty,
-                             ValueType it, ComplexPattern cpx, Instruction inst>
-  : Pat<(vt (op (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm)))))),
-        (inst $Op1, i32:$imm)>;
-
  class SVE_1_Op_Imm_Shift_Pred_Pat<ValueType vt, ValueType pt, SDPatternOperator op,
                                    ZPRRegOp zprty, Operand ImmTy, Instruction inst>
    : Pat<(vt (op (pt (AArch64ptrue 31)), (vt zprty:$Op1), (vt (AArch64dup (ImmTy:$imm))))),
@@ -2867,7 +2862,8 @@ class sve2_int_mul<bits<2> sz, bits<3> opc, string asm, ZPRRegOp zprty>
    let Inst{4-0}   = Zd;
  }
  
-multiclass sve2_int_mul<bits<3> opc, string asm, SDPatternOperator op> {
+multiclass sve2_int_mul<bits<3> opc, string asm, SDPatternOperator op,
+                        SDPatternOperator op_pred = null_frag> {
    def _B : sve2_int_mul<0b00, opc, asm, ZPR8>;
    def _H : sve2_int_mul<0b01, opc, asm, ZPR16>;
    def _S : sve2_int_mul<0b10, opc, asm, ZPR32>;
@@ -2877,6 +2873,11 @@ multiclass sve2_int_mul<bits<3> opc, string asm, SDPatternOperator op> {
    def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
    def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
    def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+  def : SVE_2_Op_Pred_All_Active<nxv16i8, op_pred, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+  def : SVE_2_Op_Pred_All_Active<nxv8i16, op_pred, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+  def : SVE_2_Op_Pred_All_Active<nxv4i32, op_pred, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+  def : SVE_2_Op_Pred_All_Active<nxv2i64, op_pred, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
  }
  
  multiclass sve2_int_mul_single<bits<3> opc, string asm, SDPatternOperator op> {
@@ -3914,10 +3915,10 @@ multiclass sve_int_arith_imm2<string asm, SDPatternOperator op> {
    def _S : sve_int_arith_imm<0b10, 0b110000, asm, ZPR32, simm8>;
    def _D : sve_int_arith_imm<0b11, 0b110000, asm, ZPR64, simm8>;
  
-  def : SVE_1_Op_Imm_Arith_Pat<nxv16i8, op, ZPR8, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _B)>;
-  def : SVE_1_Op_Imm_Arith_Pat<nxv8i16, op, ZPR16, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _H)>;
-  def : SVE_1_Op_Imm_Arith_Pat<nxv4i32, op, ZPR32, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _S)>;
-  def : SVE_1_Op_Imm_Arith_Pat<nxv2i64, op, ZPR64, i64, SVEArithSImmPat, !cast<Instruction>(NAME # _D)>;
+  def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv16i8, nxv16i1, op, ZPR8, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _B)>;
+  def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv8i16, nxv8i1, op, ZPR16, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _H)>;
+  def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv4i32, nxv4i1, op, ZPR32, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _S)>;
+  def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv2i64, nxv2i1, op, ZPR64, i64, SVEArithSImmPat, !cast<Instruction>(NAME # _D)>;
  }
  
  //===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll b/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll

index a2ab019..d43dcda 100644 (file)
--- a/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll
+++ b/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll
@@ -131,8 +131,8 @@ define <vscale x 16 x i8> @srem_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
  ; CHECK-NEXT:    uzp1 z3.h, z4.h, z3.h
  ; CHECK-NEXT:    uzp1 z2.b, z3.b, z2.b
  ; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    mul z2.b, p0/m, z2.b, z1.b
-; CHECK-NEXT:    sub z0.b, z0.b, z2.b
+; CHECK-NEXT:    mul z1.b, p0/m, z1.b, z2.b
+; CHECK-NEXT:    sub z0.b, z0.b, z1.b
  ; CHECK-NEXT:    ret
    %div = srem <vscale x 16 x i8> %a, %b
    ret <vscale x 16 x i8> %div
@@ -151,8 +151,8 @@ define <vscale x 8 x i16> @srem_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
  ; CHECK-NEXT:    sdiv z3.s, p0/m, z3.s, z4.s
  ; CHECK-NEXT:    uzp1 z2.h, z3.h, z2.h
  ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mul z2.h, p0/m, z2.h, z1.h
-; CHECK-NEXT:    sub z0.h, z0.h, z2.h
+; CHECK-NEXT:    mul z1.h, p0/m, z1.h, z2.h
+; CHECK-NEXT:    sub z0.h, z0.h, z1.h
  ; CHECK-NEXT:    ret
    %div = srem <vscale x 8 x i16> %a, %b
    ret <vscale x 8 x i16> %div
@@ -164,8 +164,8 @@ define <vscale x 4 x i32> @srem_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
  ; CHECK-NEXT:    ptrue p0.s
  ; CHECK-NEXT:    movprfx z2, z0
  ; CHECK-NEXT:    sdiv z2.s, p0/m, z2.s, z1.s
-; CHECK-NEXT:    mul z2.s, p0/m, z2.s, z1.s
-; CHECK-NEXT:    sub z0.s, z0.s, z2.s
+; CHECK-NEXT:    mul z1.s, p0/m, z1.s, z2.s
+; CHECK-NEXT:    sub z0.s, z0.s, z1.s
  ; CHECK-NEXT:    ret
    %div = srem <vscale x 4 x i32> %a, %b
    ret <vscale x 4 x i32> %div
@@ -177,8 +177,8 @@ define <vscale x 2 x i64> @srem_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
  ; CHECK-NEXT:    ptrue p0.d
  ; CHECK-NEXT:    movprfx z2, z0
  ; CHECK-NEXT:    sdiv z2.d, p0/m, z2.d, z1.d
-; CHECK-NEXT:    mul z2.d, p0/m, z2.d, z1.d
-; CHECK-NEXT:    sub z0.d, z0.d, z2.d
+; CHECK-NEXT:    mul z1.d, p0/m, z1.d, z2.d
+; CHECK-NEXT:    sub z0.d, z0.d, z1.d
  ; CHECK-NEXT:    ret
    %div = srem <vscale x 2 x i64> %a, %b
    ret <vscale x 2 x i64> %div
@@ -315,8 +315,8 @@ define <vscale x 16 x i8> @urem_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
  ; CHECK-NEXT:    uzp1 z3.h, z4.h, z3.h
  ; CHECK-NEXT:    uzp1 z2.b, z3.b, z2.b
  ; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    mul z2.b, p0/m, z2.b, z1.b
-; CHECK-NEXT:    sub z0.b, z0.b, z2.b
+; CHECK-NEXT:    mul z1.b, p0/m, z1.b, z2.b
+; CHECK-NEXT:    sub z0.b, z0.b, z1.b
  ; CHECK-NEXT:    ret
    %div = urem <vscale x 16 x i8> %a, %b
    ret <vscale x 16 x i8> %div
@@ -335,8 +335,8 @@ define <vscale x 8 x i16> @urem_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
  ; CHECK-NEXT:    udiv z3.s, p0/m, z3.s, z4.s
  ; CHECK-NEXT:    uzp1 z2.h, z3.h, z2.h
  ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mul z2.h, p0/m, z2.h, z1.h
-; CHECK-NEXT:    sub z0.h, z0.h, z2.h
+; CHECK-NEXT:    mul z1.h, p0/m, z1.h, z2.h
+; CHECK-NEXT:    sub z0.h, z0.h, z1.h
  ; CHECK-NEXT:    ret
    %div = urem <vscale x 8 x i16> %a, %b
    ret <vscale x 8 x i16> %div
@@ -348,8 +348,8 @@ define <vscale x 4 x i32> @urem_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
  ; CHECK-NEXT:    ptrue p0.s
  ; CHECK-NEXT:    movprfx z2, z0
  ; CHECK-NEXT:    udiv z2.s, p0/m, z2.s, z1.s
-; CHECK-NEXT:    mul z2.s, p0/m, z2.s, z1.s
-; CHECK-NEXT:    sub z0.s, z0.s, z2.s
+; CHECK-NEXT:    mul z1.s, p0/m, z1.s, z2.s
+; CHECK-NEXT:    sub z0.s, z0.s, z1.s
  ; CHECK-NEXT:    ret
    %div = urem <vscale x 4 x i32> %a, %b
    ret <vscale x 4 x i32> %div
@@ -361,8 +361,8 @@ define <vscale x 2 x i64> @urem_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
  ; CHECK-NEXT:    ptrue p0.d
  ; CHECK-NEXT:    movprfx z2, z0
  ; CHECK-NEXT:    udiv z2.d, p0/m, z2.d, z1.d
-; CHECK-NEXT:    mul z2.d, p0/m, z2.d, z1.d
-; CHECK-NEXT:    sub z0.d, z0.d, z2.d
+; CHECK-NEXT:    mul z1.d, p0/m, z1.d, z2.d
+; CHECK-NEXT:    sub z0.d, z0.d, z1.d
  ; CHECK-NEXT:    ret
    %div = urem <vscale x 2 x i64> %a, %b
    ret <vscale x 2 x i64> %div
author	Paul Walker <paul.walker@arm.com>
	Wed, 5 Aug 2020 16:19:54 +0000 (17:19 +0100)
committer	Paul Walker <paul.walker@arm.com>
	Thu, 6 Aug 2020 10:15:35 +0000 (11:15 +0100)
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp		patch \| blob \| history
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td		patch \| blob \| history
llvm/lib/Target/AArch64/SVEInstrFormats.td		patch \| blob \| history
llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll		patch \| blob \| history