return hasNoVMLxHazardUse(N);
}]>;
+// An 'fadd' node which can be contracted into a fma
+def fadd_contract : PatFrag<(ops node:$lhs, node:$rhs),(fadd node:$lhs, node:$rhs),[{
+ return N->getFlags().hasAllowContract();
+}]>;
+
def imm_even : ImmLeaf<i32, [{ return (Imm & 1) == 0; }]>;
def imm_odd : ImmLeaf<i32, [{ return (Imm & 1) == 1; }]>;
(VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
imm:$rot))>;
+ def: Pat<(VTI.Vec (fadd_contract MQPR:$Qd_src,
+ (int_arm_mve_vcmulq imm:$rot,
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm)))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ imm:$rot))>;
+
def : Pat<(VTI.Vec (int_arm_mve_vcmlaq_predicated
imm:$rot, (VTI.Vec MQPR:$Qd_src),
(VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), (VTI.Vec MQPR:$Qn),
(VTI.Vec MQPR:$Qm), imm:$rot,
ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg))>;
-
}
}
define <4 x float> @mul_addequal(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: mul_addequal:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: add.w r12, sp, #16
; CHECK-NEXT: vmov d0, r0, r1
; CHECK-NEXT: mov r0, sp
-; CHECK-NEXT: add r1, sp, #16
+; CHECK-NEXT: vldrw.u32 q2, [r12]
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vmov d1, r2, r3
-; CHECK-NEXT: vldrw.u32 q2, [r1]
-; CHECK-NEXT: vcmul.f32 q3, q0, q1, #0
-; CHECK-NEXT: vadd.f32 q2, q3, q2
+; CHECK-NEXT: vcmla.f32 q2, q0, q1, #0
; CHECK-NEXT: vcmla.f32 q2, q0, q1, #90
; CHECK-NEXT: vmov r0, r1, d4
; CHECK-NEXT: vmov r2, r3, d5
define arm_aapcs_vfpcc <4 x float> @muladd_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: muladd_f32x4:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vcmul.f32 q3, q1, q2, #0
-; CHECK-NEXT: vadd.f32 q0, q3, q0
+; CHECK-NEXT: vcmla.f32 q0, q1, q2, #0
; CHECK-NEXT: bx lr
entry:
%d = tail call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 0, <4 x float> %b, <4 x float> %c)
define arm_aapcs_vfpcc <4 x float> @muladd_c_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: muladd_c_f32x4:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vcmul.f32 q3, q1, q2, #90
-; CHECK-NEXT: vadd.f32 q0, q0, q3
+; CHECK-NEXT: vcmla.f32 q0, q1, q2, #90
; CHECK-NEXT: bx lr
entry:
%d = tail call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 1, <4 x float> %b, <4 x float> %c)
define arm_aapcs_vfpcc <8 x half> @muladd_f16x4(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
; CHECK-LABEL: muladd_f16x4:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vcmul.f16 q1, q1, q2, #180
-; CHECK-NEXT: vadd.f16 q0, q1, q0
+; CHECK-NEXT: vcmla.f16 q0, q1, q2, #180
; CHECK-NEXT: bx lr
entry:
%d = tail call <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32 2, <8 x half> %b, <8 x half> %c)
define arm_aapcs_vfpcc <8 x half> @muladd_c_f16x4(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
; CHECK-LABEL: muladd_c_f16x4:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vcmul.f16 q1, q1, q2, #270
-; CHECK-NEXT: vadd.f16 q0, q0, q1
+; CHECK-NEXT: vcmla.f16 q0, q1, q2, #270
; CHECK-NEXT: bx lr
entry:
%d = tail call <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32 3, <8 x half> %b, <8 x half> %c)