if (GA->getOpcode() == ISD::GlobalAddress &&
TLI->isOffsetFoldingLegal(GA))
return GA;
+ if ((N.getOpcode() == ISD::SPLAT_VECTOR) &&
+ isa<ConstantSDNode>(N.getOperand(0)))
+ return N.getNode();
return nullptr;
}
for (MVT VT : MVT::integer_scalable_vector_valuetypes()) {
if (isTypeLegal(VT)) {
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SDIV, VT, Custom);
// If SVE is available then i64 vector multiplications can also be made legal.
bool OverrideNEON = VT == MVT::v2i64 || VT == MVT::v1i64;
- if (useSVEForFixedLengthVectorVT(VT, OverrideNEON))
+ if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON))
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED, OverrideNEON);
// Multiplications are only custom-lowered for 128-bit vectors so that
defm UMAX_ZI : sve_int_arith_imm1_unsigned<0b01, "umax", AArch64umax_p>;
defm UMIN_ZI : sve_int_arith_imm1_unsigned<0b11, "umin", AArch64umin_p>;
- defm MUL_ZI : sve_int_arith_imm2<"mul", mul>;
+ defm MUL_ZI : sve_int_arith_imm2<"mul", AArch64mul_p>;
defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", "MUL_ZPZZ", int_aarch64_sve_mul, DestructiveBinaryComm>;
defm SMULH_ZPmZ : sve_int_bin_pred_arit_2<0b010, "smulh", "SMULH_ZPZZ", int_aarch64_sve_smulh, DestructiveBinaryComm>;
defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh", "UMULH_ZPZZ", int_aarch64_sve_umulh, DestructiveBinaryComm>;
defm MUL_ZPZZ : sve_int_bin_pred_bhsd<AArch64mul_p>;
- // Add unpredicated alternative for the mul instruction.
- def : Pat<(mul nxv16i8:$Op1, nxv16i8:$Op2),
- (MUL_ZPmZ_B (PTRUE_B 31), $Op1, $Op2)>;
- def : Pat<(mul nxv8i16:$Op1, nxv8i16:$Op2),
- (MUL_ZPmZ_H (PTRUE_H 31), $Op1, $Op2)>;
- def : Pat<(mul nxv4i32:$Op1, nxv4i32:$Op2),
- (MUL_ZPmZ_S (PTRUE_S 31), $Op1, $Op2)>;
- def : Pat<(mul nxv2i64:$Op1, nxv2i64:$Op2),
- (MUL_ZPmZ_D (PTRUE_D 31), $Op1, $Op2)>;
-
defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", "SDIV_ZPZZ", int_aarch64_sve_sdiv, DestructiveBinaryCommWithRev, "SDIVR_ZPmZ">;
defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", "UDIV_ZPZZ", int_aarch64_sve_udiv, DestructiveBinaryCommWithRev, "UDIVR_ZPmZ">;
defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr", "SDIVR_ZPZZ", int_aarch64_sve_sdivr, DestructiveBinaryCommWithRev, "SDIV_ZPmZ", /*isReverseInstr*/ 1>;
defm SQRDMULH_ZZZ : sve2_int_mul<0b101, "sqrdmulh", int_aarch64_sve_sqrdmulh>;
// SVE2 integer multiply vectors (unpredicated)
- defm MUL_ZZZ : sve2_int_mul<0b000, "mul", mul>;
+ defm MUL_ZZZ : sve2_int_mul<0b000, "mul", null_frag, AArch64mul_p>;
defm SMULH_ZZZ : sve2_int_mul<0b010, "smulh", null_frag>;
defm UMULH_ZZZ : sve2_int_mul<0b011, "umulh", null_frag>;
- defm PMUL_ZZZ : sve2_int_mul_single<0b001, "pmul", int_aarch64_sve_pmul>;
+ defm PMUL_ZZZ : sve2_int_mul_single<0b001, "pmul", int_aarch64_sve_pmul>;
// Add patterns for unpredicated version of smulh and umulh.
def : Pat<(nxv16i8 (int_aarch64_sve_smulh (nxv16i1 (AArch64ptrue 31)), nxv16i8:$Op1, nxv16i8:$Op2)),
: Pat<(vt (op (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))))),
(inst $Op1, i32:$imm, i32:$shift)>;
-class SVE_1_Op_Imm_Arith_Pat<ValueType vt, SDPatternOperator op, ZPRRegOp zprty,
- ValueType it, ComplexPattern cpx, Instruction inst>
- : Pat<(vt (op (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm)))))),
- (inst $Op1, i32:$imm)>;
-
class SVE_1_Op_Imm_Shift_Pred_Pat<ValueType vt, ValueType pt, SDPatternOperator op,
ZPRRegOp zprty, Operand ImmTy, Instruction inst>
: Pat<(vt (op (pt (AArch64ptrue 31)), (vt zprty:$Op1), (vt (AArch64dup (ImmTy:$imm))))),
let Inst{4-0} = Zd;
}
-multiclass sve2_int_mul<bits<3> opc, string asm, SDPatternOperator op> {
+multiclass sve2_int_mul<bits<3> opc, string asm, SDPatternOperator op,
+ SDPatternOperator op_pred = null_frag> {
def _B : sve2_int_mul<0b00, opc, asm, ZPR8>;
def _H : sve2_int_mul<0b01, opc, asm, ZPR16>;
def _S : sve2_int_mul<0b10, opc, asm, ZPR32>;
def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+ def : SVE_2_Op_Pred_All_Active<nxv16i8, op_pred, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pred_All_Active<nxv8i16, op_pred, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pred_All_Active<nxv4i32, op_pred, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pred_All_Active<nxv2i64, op_pred, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve2_int_mul_single<bits<3> opc, string asm, SDPatternOperator op> {
def _S : sve_int_arith_imm<0b10, 0b110000, asm, ZPR32, simm8>;
def _D : sve_int_arith_imm<0b11, 0b110000, asm, ZPR64, simm8>;
- def : SVE_1_Op_Imm_Arith_Pat<nxv16i8, op, ZPR8, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _B)>;
- def : SVE_1_Op_Imm_Arith_Pat<nxv8i16, op, ZPR16, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Imm_Arith_Pat<nxv4i32, op, ZPR32, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_Imm_Arith_Pat<nxv2i64, op, ZPR64, i64, SVEArithSImmPat, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv16i8, nxv16i1, op, ZPR8, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv8i16, nxv8i1, op, ZPR16, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv4i32, nxv4i1, op, ZPR32, i32, SVEArithSImmPat, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv2i64, nxv2i1, op, ZPR64, i64, SVEArithSImmPat, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
; CHECK-NEXT: uzp1 z3.h, z4.h, z3.h
; CHECK-NEXT: uzp1 z2.b, z3.b, z2.b
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: mul z2.b, p0/m, z2.b, z1.b
-; CHECK-NEXT: sub z0.b, z0.b, z2.b
+; CHECK-NEXT: mul z1.b, p0/m, z1.b, z2.b
+; CHECK-NEXT: sub z0.b, z0.b, z1.b
; CHECK-NEXT: ret
%div = srem <vscale x 16 x i8> %a, %b
ret <vscale x 16 x i8> %div
; CHECK-NEXT: sdiv z3.s, p0/m, z3.s, z4.s
; CHECK-NEXT: uzp1 z2.h, z3.h, z2.h
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: mul z2.h, p0/m, z2.h, z1.h
-; CHECK-NEXT: sub z0.h, z0.h, z2.h
+; CHECK-NEXT: mul z1.h, p0/m, z1.h, z2.h
+; CHECK-NEXT: sub z0.h, z0.h, z1.h
; CHECK-NEXT: ret
%div = srem <vscale x 8 x i16> %a, %b
ret <vscale x 8 x i16> %div
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: sdiv z2.s, p0/m, z2.s, z1.s
-; CHECK-NEXT: mul z2.s, p0/m, z2.s, z1.s
-; CHECK-NEXT: sub z0.s, z0.s, z2.s
+; CHECK-NEXT: mul z1.s, p0/m, z1.s, z2.s
+; CHECK-NEXT: sub z0.s, z0.s, z1.s
; CHECK-NEXT: ret
%div = srem <vscale x 4 x i32> %a, %b
ret <vscale x 4 x i32> %div
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: sdiv z2.d, p0/m, z2.d, z1.d
-; CHECK-NEXT: mul z2.d, p0/m, z2.d, z1.d
-; CHECK-NEXT: sub z0.d, z0.d, z2.d
+; CHECK-NEXT: mul z1.d, p0/m, z1.d, z2.d
+; CHECK-NEXT: sub z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%div = srem <vscale x 2 x i64> %a, %b
ret <vscale x 2 x i64> %div
; CHECK-NEXT: uzp1 z3.h, z4.h, z3.h
; CHECK-NEXT: uzp1 z2.b, z3.b, z2.b
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: mul z2.b, p0/m, z2.b, z1.b
-; CHECK-NEXT: sub z0.b, z0.b, z2.b
+; CHECK-NEXT: mul z1.b, p0/m, z1.b, z2.b
+; CHECK-NEXT: sub z0.b, z0.b, z1.b
; CHECK-NEXT: ret
%div = urem <vscale x 16 x i8> %a, %b
ret <vscale x 16 x i8> %div
; CHECK-NEXT: udiv z3.s, p0/m, z3.s, z4.s
; CHECK-NEXT: uzp1 z2.h, z3.h, z2.h
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: mul z2.h, p0/m, z2.h, z1.h
-; CHECK-NEXT: sub z0.h, z0.h, z2.h
+; CHECK-NEXT: mul z1.h, p0/m, z1.h, z2.h
+; CHECK-NEXT: sub z0.h, z0.h, z1.h
; CHECK-NEXT: ret
%div = urem <vscale x 8 x i16> %a, %b
ret <vscale x 8 x i16> %div
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: udiv z2.s, p0/m, z2.s, z1.s
-; CHECK-NEXT: mul z2.s, p0/m, z2.s, z1.s
-; CHECK-NEXT: sub z0.s, z0.s, z2.s
+; CHECK-NEXT: mul z1.s, p0/m, z1.s, z2.s
+; CHECK-NEXT: sub z0.s, z0.s, z1.s
; CHECK-NEXT: ret
%div = urem <vscale x 4 x i32> %a, %b
ret <vscale x 4 x i32> %div
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: udiv z2.d, p0/m, z2.d, z1.d
-; CHECK-NEXT: mul z2.d, p0/m, z2.d, z1.d
-; CHECK-NEXT: sub z0.d, z0.d, z2.d
+; CHECK-NEXT: mul z1.d, p0/m, z1.d, z2.d
+; CHECK-NEXT: sub z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%div = urem <vscale x 2 x i64> %a, %b
ret <vscale x 2 x i64> %div