// We are constructing a chain of intrinsics similar to:
// return FMA.MultiplyAddScalar(
- // Vector128.CreateScalar(x),
- // Vector128.CreateScalar(y),
- // Vector128.CreateScalar(z)
+ // Vector128.CreateScalarUnsafe(x),
+ // Vector128.CreateScalarUnsafe(y),
+ // Vector128.CreateScalarUnsafe(z)
// ).ToScalar();
GenTree* op3 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, impPopStack().val,
retNode = gtNewSimdHWIntrinsicNode(callType, res, NI_Vector128_ToScalar, callType, 16);
}
-#endif // TARGET_XARCH
+#elif defined(TARGET_ARM64)
+ if (compExactlyDependsOn(InstructionSet_AdvSimd))
+ {
+ assert(varTypeIsFloating(callType));
+
+ // We are constructing a chain of intrinsics similar to:
+ // return AdvSimd.FusedMultiplyAddScalar(
+ // Vector64.Create{ScalarUnsafe}(z),
+ // Vector64.Create{ScalarUnsafe}(y),
+ // Vector64.Create{ScalarUnsafe}(x)
+ // ).ToScalar();
+
+ NamedIntrinsic createVector64 =
+ (callType == TYP_DOUBLE) ? NI_Vector64_Create : NI_Vector64_CreateScalarUnsafe;
+
+ constexpr unsigned int simdSize = 8;
+
+ GenTree* op3 =
+ gtNewSimdHWIntrinsicNode(TYP_SIMD8, impPopStack().val, createVector64, callType, simdSize);
+ GenTree* op2 =
+ gtNewSimdHWIntrinsicNode(TYP_SIMD8, impPopStack().val, createVector64, callType, simdSize);
+ GenTree* op1 =
+ gtNewSimdHWIntrinsicNode(TYP_SIMD8, impPopStack().val, createVector64, callType, simdSize);
+
+ // Note that AdvSimd.FusedMultiplyAddScalar(op1,op2,op3) corresponds to op1 + op2 * op3
+ // while Math{F}.FusedMultiplyAddScalar(op1,op2,op3) corresponds to op1 * op2 + op3
+ retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op3, op2, op1, NI_AdvSimd_FusedMultiplyAddScalar,
+ callType, simdSize);
+
+ retNode = gtNewSimdHWIntrinsicNode(callType, retNode, NI_Vector64_ToScalar, callType, simdSize);
+ }
+#endif
break;
}
#endif // FEATURE_HW_INTRINSICS
void LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp);
void LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node);
void LowerHWIntrinsicDot(GenTreeHWIntrinsic* node);
- void LowerFusedMultiplyAdd(GenTreeHWIntrinsic* node);
-
#if defined(TARGET_XARCH)
+ void LowerFusedMultiplyAdd(GenTreeHWIntrinsic* node);
void LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node);
#elif defined(TARGET_ARM64)
bool IsValidConstForMovImm(GenTreeHWIntrinsic* node);
+ void LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node);
#endif // !TARGET_XARCH && !TARGET_ARM64
union VectorConstant {
#endif // FEATURE_SIMD
#ifdef FEATURE_HW_INTRINSICS
+
+//----------------------------------------------------------------------------------------------
+// LowerHWIntrinsicFusedMultiplyAddScalar: Lowers AdvSimd_FusedMultiplyAddScalar intrinsics
+// when some of the operands are negated by "containing" such negation.
+//
+// Arguments:
+// node - The original hardware intrinsic node
+//
+// | op1 | op2 | op3 |
+// | + | + | + | AdvSimd_FusedMultiplyAddScalar
+// | + | + | - | AdvSimd_FusedMultiplySubtractScalar
+// | + | - | + | AdvSimd_FusedMultiplySubtractScalar
+// | + | - | - | AdvSimd_FusedMultiplyAddScalar
+// | - | + | + | AdvSimd_FusedMultiplySubtractNegatedScalar
+// | - | + | - | AdvSimd_FusedMultiplyAddNegatedScalar
+// | - | - | + | AdvSimd_FusedMultiplyAddNegatedScalar
+// | - | - | - | AdvSimd_FusedMultiplySubtractNegatedScalar
+//
+void Lowering::LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node)
+{
+ assert(node->gtHWIntrinsicId == NI_AdvSimd_FusedMultiplyAddScalar);
+
+ const HWIntrinsic intrin(node);
+
+ GenTree* op1 = intrin.op1;
+ GenTree* op2 = intrin.op2;
+ GenTree* op3 = intrin.op3;
+
+ auto lowerOperand = [this](GenTree* op) {
+ bool wasNegated = false;
+
+ if (op->OperIsHWIntrinsic() &&
+ ((op->AsHWIntrinsic()->gtHWIntrinsicId == NI_AdvSimd_Arm64_DuplicateToVector64) ||
+ (op->AsHWIntrinsic()->gtHWIntrinsicId == NI_Vector64_CreateScalarUnsafe)))
+ {
+ GenTreeHWIntrinsic* createVector64 = op->AsHWIntrinsic();
+ GenTree* valueOp = createVector64->gtGetOp1();
+
+ if (valueOp->OperIs(GT_NEG))
+ {
+ createVector64->gtOp1 = valueOp->gtGetOp1();
+ BlockRange().Remove(valueOp);
+ wasNegated = true;
+ }
+ }
+
+ return wasNegated;
+ };
+
+ const bool op1WasNegated = lowerOperand(op1);
+ const bool op2WasNegated = lowerOperand(op2);
+ const bool op3WasNegated = lowerOperand(op3);
+
+ if (op1WasNegated)
+ {
+ if (op2WasNegated != op3WasNegated)
+ {
+ node->gtHWIntrinsicId = NI_AdvSimd_FusedMultiplyAddNegatedScalar;
+ }
+ else
+ {
+ node->gtHWIntrinsicId = NI_AdvSimd_FusedMultiplySubtractNegatedScalar;
+ }
+ }
+ else if (op2WasNegated != op3WasNegated)
+ {
+ node->gtHWIntrinsicId = NI_AdvSimd_FusedMultiplySubtractScalar;
+ }
+}
+
//----------------------------------------------------------------------------------------------
// Lowering::LowerHWIntrinsic: Perform containment analysis for a hardware intrinsic node.
//
return;
}
+ case NI_AdvSimd_FusedMultiplyAddScalar:
+ LowerHWIntrinsicFusedMultiplyAddScalar(node);
+ break;
+
default:
break;
}
if (intrin.op1 != nullptr)
{
- // If we have an RMW intrinsic, we want to preference op1Reg to the target if
- // op1 is not contained.
- if (isRMW)
+ bool simdRegToSimdRegMove = false;
+
+ if ((intrin.id == NI_Vector64_CreateScalarUnsafe) || (intrin.id == NI_Vector128_CreateScalarUnsafe))
+ {
+ simdRegToSimdRegMove = varTypeIsFloating(intrin.op1);
+ }
+ else if (intrin.id == NI_AdvSimd_Arm64_DuplicateToVector64)
+ {
+ simdRegToSimdRegMove = (intrin.op1->TypeGet() == TYP_DOUBLE);
+ }
+ else if ((intrin.id == NI_Vector64_ToScalar) || (intrin.id == NI_Vector128_ToScalar))
+ {
+ simdRegToSimdRegMove = varTypeIsFloating(intrinsicTree);
+ }
+
+ // If we have an RMW intrinsic or an intrinsic with simple move semantic between two SIMD registers,
+ // we want to preference op1Reg to the target if op1 is not contained.
+ if (isRMW || simdRegToSimdRegMove)
{
tgtPrefOp1 = !intrin.op1->isContained();
}