[MethodImpl(MethodImplOptions.AggressiveInlining)]
public override bool Equals([NotNullWhen(true)] object? obj) => (obj is Vector128<T> other) && Equals(other);
+ // Account for floating-point equality around NaN
+ // This is in a separate method so it can be optimized by the mono interpreter/jiterpreter
+ [Intrinsic]
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ internal static bool EqualsFloatingPoint (Vector128<T> lhs, Vector128<T> rhs)
+ {
+ Vector128<T> result = Vector128.Equals(lhs, rhs) | ~(Vector128.Equals(lhs, lhs) | Vector128.Equals(rhs, rhs));
+ return result.AsInt32() == Vector128<int>.AllBitsSet;
+ }
+
/// <summary>Determines whether the specified <see cref="Vector128{T}" /> is equal to the current instance.</summary>
/// <param name="other">The <see cref="Vector128{T}" /> to compare with the current instance.</param>
/// <returns><c>true</c> if <paramref name="other" /> is equal to the current instance; otherwise, <c>false</c>.</returns>
{
if ((typeof(T) == typeof(double)) || (typeof(T) == typeof(float)))
{
- Vector128<T> result = Vector128.Equals(this, other) | ~(Vector128.Equals(this, this) | Vector128.Equals(other, other));
- return result.AsInt32() == Vector128<int>.AllBitsSet;
+ return EqualsFloatingPoint(this, other);
}
else
{
INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_EQUALITY, interp_v128_op_bitwise_equality, -1)
INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_INEQUALITY, interp_v128_op_bitwise_inequality, -1)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_R4_FLOAT_EQUALITY, interp_v128_r4_float_equality, -1)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_R8_FLOAT_EQUALITY, interp_v128_r8_float_equality, -1)
+
INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_EXCLUSIVE_OR, interp_v128_op_exclusive_or, 81)
INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_MULTIPLY, interp_v128_i1_op_multiply, -1)
typedef gint8 v128_i1 __attribute__ ((vector_size (SIZEOF_V128)));
typedef guint8 v128_u1 __attribute__ ((vector_size (SIZEOF_V128)));
typedef float v128_r4 __attribute__ ((vector_size (SIZEOF_V128)));
+typedef double v128_r8 __attribute__ ((vector_size (SIZEOF_V128)));
// get_AllBitsSet
static void
*(gint32*)res = 1;
}
-// op_Addition
+// Vector128<float>EqualsFloatingPoint
+static void
+interp_v128_r4_float_equality (gpointer res, gpointer v1, gpointer v2)
+{
+ v128_r4 v1_cast = *(v128_r4*)v1;
+ v128_r4 v2_cast = *(v128_r4*)v2;
+ v128_r4 result = (v1_cast == v2_cast) | ~((v1_cast == v1_cast) | (v2_cast == v2_cast));
+ memset (&v1_cast, 0xff, SIZEOF_V128);
+
+ *(gint32*)res = memcmp (&v1_cast, &result, SIZEOF_V128) == 0;
+}
+
+static void
+interp_v128_r8_float_equality (gpointer res, gpointer v1, gpointer v2)
+{
+ v128_r8 v1_cast = *(v128_r8*)v1;
+ v128_r8 v2_cast = *(v128_r8*)v2;
+ v128_r8 result = (v1_cast == v2_cast) | ~((v1_cast == v1_cast) | (v2_cast == v2_cast));
+ memset (&v1_cast, 0xff, SIZEOF_V128);
+
+ *(gint32*)res = memcmp (&v1_cast, &result, SIZEOF_V128) == 0;
+}
+
+// op_Multiply
static void
interp_v128_i1_op_multiply (gpointer res, gpointer v1, gpointer v2)
{
*(v128_r4*)res = *(v128_r4*)v1 * *(v128_r4*)v2;
}
+// op_Division
static void
interp_v128_r4_op_division (gpointer res, gpointer v1, gpointer v2)
{
SIMD_METHOD(CreateScalarUnsafe)
SIMD_METHOD(Equals)
+SIMD_METHOD(EqualsFloatingPoint)
SIMD_METHOD(ExtractMostSignificantBits)
SIMD_METHOD(GreaterThan)
SIMD_METHOD(LessThan)
};
static guint16 sri_vector128_t_methods [] = {
+ SN_EqualsFloatingPoint,
SN_get_AllBitsSet,
SN_get_Count,
SN_get_One,
*simd_intrins = INTERP_SIMD_INTRINSIC_V128_BITWISE_EQUALITY;
}
break;
+ case SN_EqualsFloatingPoint:
+ *simd_opcode = MINT_SIMD_INTRINS_P_PP;
+ if (atype == MONO_TYPE_R4)
+ *simd_intrins = INTERP_SIMD_INTRINSIC_V128_R4_FLOAT_EQUALITY;
+ else if (atype == MONO_TYPE_R8)
+ *simd_intrins = INTERP_SIMD_INTRINSIC_V128_R8_FLOAT_EQUALITY;
+ break;
case SN_op_ExclusiveOr:
*simd_opcode = MINT_SIMD_INTRINS_P_PP;
*simd_intrins = INTERP_SIMD_INTRINSIC_V128_EXCLUSIVE_OR;
new MinDouble(),
new MaxDouble(),
new Normalize(),
+ new EqualsInt32(),
+ new EqualsFloat(),
};
}
result = vector / (float)Math.Sqrt(Vector128.Dot(vector, vector));
}
}
+
+ class EqualsInt32 : VectorMeasurement
+ {
+ Vector128<Int32> vector1, vector2;
+ bool result;
+
+ public override string Name => "Equals Int32";
+
+ public EqualsInt32()
+ {
+ vector1 = Vector128.Create(1, 2, 3, 4);
+ vector2 = Vector128.Create(4, 3, 2, 1);
+ }
+
+ public override void RunStep() {
+ result = vector1.Equals(vector2);
+ }
+ }
+
+ class EqualsFloat : VectorMeasurement
+ {
+ Vector128<float> vector1, vector2;
+ bool result;
+
+ public override string Name => "Equals Float";
+
+ public EqualsFloat()
+ {
+ vector1 = Vector128.Create(1f, 2f, 3f, 4f);
+ vector2 = Vector128.Create(4f, 3f, 2f, 1f);
+ }
+
+ public override void RunStep() {
+ result = vector1.Equals(vector2);
+ }
+ }
}
}
return this.current.appendU8(value);
}
- appendSimd(value: WasmSimdOpcode) {
+ appendSimd(value: WasmSimdOpcode, allowLoad?: boolean) {
this.current.appendU8(WasmOpcode.PREFIX_simd);
// Yes that's right. We're using LEB128 to encode 8-bit opcodes. Why? I don't know
+ mono_assert(((value | 0) !== 0) || ((value === WasmSimdOpcode.v128_load) && (allowLoad === true)), "Expected non-v128_load simd opcode or allowLoad==true");
return this.current.appendULeb(value);
}
}
appendULeb(value: number) {
+ mono_assert(typeof (value) === "number", () => `appendULeb expected number but got ${value}`);
mono_assert(value >= 0, "cannot pass negative value to appendULeb");
if (value < 0x7F) {
if (this.size + 1 >= this.capacity)
}
appendLeb(value: number) {
+ mono_assert(typeof (value) === "number", () => `appendLeb expected number but got ${value}`);
if (this.size + 8 >= this.capacity)
throw new Error("Buffer full");
while (count >= sizeofV128) {
builder.local(destLocal);
builder.local(srcLocal);
- builder.appendSimd(WasmSimdOpcode.v128_load);
+ builder.appendSimd(WasmSimdOpcode.v128_load, true);
builder.appendMemarg(srcOffset, 0);
builder.appendSimd(WasmSimdOpcode.v128_store);
builder.appendMemarg(destOffset, 0);
if (simdOpcode !== undefined) {
// This looks wrong but I assure you it's correct.
builder.appendULeb(simdOpcode);
+ } else if (opcodeOrPrefix === WasmOpcode.PREFIX_simd) {
+ throw new Error("PREFIX_simd ldloc without a simdOpcode");
}
const alignment = computeMemoryAlignment(offset, opcodeOrPrefix, simdOpcode);
builder.appendMemarg(offset, alignment);
// Indirect load, so v1 is T** and res is Vector128*
builder.local("pLocals");
append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.i32_load);
- builder.appendSimd(simple);
+ builder.appendSimd(simple, true);
builder.appendMemarg(0, 0);
append_simd_store(builder, ip);
} else {
builder.appendU8(WasmOpcode.i32_eqz);
append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i32_store);
return true;
+ case SimdIntrinsic3.V128_R4_FLOAT_EQUALITY:
+ case SimdIntrinsic3.V128_R8_FLOAT_EQUALITY: {
+ /*
+ Vector128<T> result = Vector128.Equals(lhs, rhs) | ~(Vector128.Equals(lhs, lhs) | Vector128.Equals(rhs, rhs));
+ return result.AsInt32() == Vector128<int>.AllBitsSet;
+ */
+ const isR8 = index === SimdIntrinsic3.V128_R8_FLOAT_EQUALITY,
+ eqOpcode = isR8 ? WasmSimdOpcode.f64x2_eq : WasmSimdOpcode.f32x4_eq;
+ builder.local("pLocals");
+ append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load);
+ builder.local("math_lhs128", WasmOpcode.tee_local);
+ append_ldloc(builder, getArgU16(ip, 3), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load);
+ builder.local("math_rhs128", WasmOpcode.tee_local);
+ builder.appendSimd(eqOpcode);
+ builder.local("math_lhs128");
+ builder.local("math_lhs128");
+ builder.appendSimd(eqOpcode);
+ builder.local("math_rhs128");
+ builder.local("math_rhs128");
+ builder.appendSimd(eqOpcode);
+ builder.appendSimd(WasmSimdOpcode.v128_or);
+ builder.appendSimd(WasmSimdOpcode.v128_not);
+ builder.appendSimd(WasmSimdOpcode.v128_or);
+ builder.appendSimd(isR8 ? WasmSimdOpcode.i64x2_all_true : WasmSimdOpcode.i32x4_all_true);
+ append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i32_store);
+ return true;
+ }
case SimdIntrinsic3.V128_I1_SHUFFLE: {
// Detect a constant indices vector and turn it into a const. This allows
// v8 to use a more optimized implementation of the swizzle opcode
"temp_f64": WasmValtype.f64,
"backbranched": WasmValtype.i32,
};
- if (builder.options.enableSimd)
+ if (builder.options.enableSimd) {
traceLocals["v128_zero"] = WasmValtype.v128;
+ traceLocals["math_lhs128"] = WasmValtype.v128;
+ traceLocals["math_rhs128"] = WasmValtype.v128;
+ }
let keep = true,
traceValue = 0;