add_compile_options(-Wno-strict-prototypes)
add_compile_options(-Wno-unused-but-set-variable)
add_compile_options(-Wno-single-bit-bitfield-constant-conversion)
+ add_compile_options(-msimd128)
set(DISABLE_EXECUTABLES 1)
# FIXME: Is there a cmake option for this ?
set(DISABLE_SHARED_LIBS 1)
#define PROFILE_INTERP 0
-#if !HOST_BROWSER && __GNUC__
+#if __GNUC__
#define INTERP_ENABLE_SIMD
#endif
gpointer
mono_jiterp_frame_data_allocator_alloc (FrameDataAllocator *stack, InterpFrame *frame, int size);
+gpointer
+mono_jiterp_get_simd_intrinsic (int arity, int index);
+
+int
+mono_jiterp_get_simd_opcode (int arity, int index);
+
#endif
static inline int
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_ADD, interp_v128_i1_op_addition)
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_ADD, interp_v128_i2_op_addition)
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_ADD, interp_v128_i4_op_addition)
-
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_SUB, interp_v128_i1_op_subtraction)
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_SUB, interp_v128_i2_op_subtraction)
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_SUB, interp_v128_i4_op_subtraction)
-
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_AND, interp_v128_op_bitwise_and)
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_OR, interp_v128_op_bitwise_or)
-
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_EQUALITY, interp_v128_op_bitwise_equality)
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_INEQUALITY, interp_v128_op_bitwise_inequality)
-
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_EXCLUSIVE_OR, interp_v128_op_exclusive_or)
-
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_MULTIPLY, interp_v128_i1_op_multiply)
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_MULTIPLY, interp_v128_i2_op_multiply)
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_MULTIPLY, interp_v128_i4_op_multiply)
-
-INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I1_NEGATION, interp_v128_i1_op_negation)
-INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I2_NEGATION, interp_v128_i2_op_negation)
-INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I4_NEGATION, interp_v128_i4_op_negation)
-
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_LEFT_SHIFT, interp_v128_i1_op_left_shift)
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_LEFT_SHIFT, interp_v128_i2_op_left_shift)
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_LEFT_SHIFT, interp_v128_i4_op_left_shift)
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I8_LEFT_SHIFT, interp_v128_i8_op_left_shift)
-
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_RIGHT_SHIFT, interp_v128_i1_op_right_shift)
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_RIGHT_SHIFT, interp_v128_i2_op_right_shift)
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_RIGHT_SHIFT, interp_v128_i4_op_right_shift)
-
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_URIGHT_SHIFT, interp_v128_i1_op_uright_shift)
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_URIGHT_SHIFT, interp_v128_i2_op_uright_shift)
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_URIGHT_SHIFT, interp_v128_i4_op_uright_shift)
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I8_URIGHT_SHIFT, interp_v128_i8_op_uright_shift)
-
-INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_ONES_COMPLEMENT, interp_v128_op_ones_complement)
-
-INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_U2_WIDEN_LOWER, interp_v128_u2_widen_lower)
-INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_U2_WIDEN_UPPER, interp_v128_u2_widen_upper)
-
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_U1_NARROW, interp_v128_u1_narrow)
-
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_U1_GREATER_THAN, interp_v128_u1_greater_than)
-
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_LESS_THAN, interp_v128_i1_less_than)
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_U1_LESS_THAN, interp_v128_u1_less_than)
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_LESS_THAN, interp_v128_i2_less_than)
-
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_EQUALS, interp_v128_i1_equals)
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_EQUALS, interp_v128_i2_equals)
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_EQUALS, interp_v128_i4_equals)
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I8_EQUALS, interp_v128_i8_equals)
-
-INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I1_CREATE_SCALAR, interp_v128_i1_create_scalar)
-INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I2_CREATE_SCALAR, interp_v128_i2_create_scalar)
-INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I4_CREATE_SCALAR, interp_v128_i4_create_scalar)
-INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I8_CREATE_SCALAR, interp_v128_i8_create_scalar)
-
-INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I1_EXTRACT_MSB, interp_v128_i1_extract_msb)
-INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I2_EXTRACT_MSB, interp_v128_i2_extract_msb)
-INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I4_EXTRACT_MSB, interp_v128_i4_extract_msb)
-INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I8_EXTRACT_MSB, interp_v128_i8_extract_msb)
-
-INTERP_SIMD_INTRINSIC_P_PPP (INTERP_SIMD_INTRINSIC_V128_CONDITIONAL_SELECT, interp_v128_conditional_select)
-
-INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I1_CREATE, interp_v128_i1_create)
-INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I2_CREATE, interp_v128_i2_create)
-INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I4_CREATE, interp_v128_i4_create)
-INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I8_CREATE, interp_v128_i8_create)
-
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_AND_NOT, interp_v128_and_not)
-
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_U2_LESS_THAN_EQUAL, interp_v128_u2_less_than_equal)
-
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_SHUFFLE, interp_v128_i1_shuffle)
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_SHUFFLE, interp_v128_i2_shuffle)
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_SHUFFLE, interp_v128_i4_shuffle)
-INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I8_SHUFFLE, interp_v128_i8_shuffle)
+// FIXME: SIMD causes compile errors on WASI
+#ifdef HOST_BROWSER
+#ifndef INTERP_WASM_SIMD_INTRINSIC_V_P
+#define INTERP_WASM_SIMD_INTRINSIC_V_P(id, c_intrinsic, wasm_opcode) INTERP_SIMD_INTRINSIC_P_P(id, _mono_interp_simd_ ## id, wasm_opcode)
+#endif
+#ifndef INTERP_WASM_SIMD_INTRINSIC_V_V
+#define INTERP_WASM_SIMD_INTRINSIC_V_V(id, c_intrinsic, wasm_opcode) INTERP_SIMD_INTRINSIC_P_P(id, _mono_interp_simd_ ## id, wasm_opcode)
+#endif
+#ifndef INTERP_WASM_SIMD_INTRINSIC_I_V
+#define INTERP_WASM_SIMD_INTRINSIC_I_V(id, c_intrinsic, wasm_opcode) INTERP_SIMD_INTRINSIC_P_P(id, _mono_interp_simd_ ## id, wasm_opcode)
+#endif
+#ifndef INTERP_WASM_SIMD_INTRINSIC_V_VV
+#define INTERP_WASM_SIMD_INTRINSIC_V_VV(id, c_intrinsic, wasm_opcode) INTERP_SIMD_INTRINSIC_P_PP(id, _mono_interp_simd_ ## id, wasm_opcode)
+#endif
+#ifndef INTERP_WASM_SIMD_INTRINSIC_V_VI
+#define INTERP_WASM_SIMD_INTRINSIC_V_VI(id, c_intrinsic, wasm_opcode) INTERP_SIMD_INTRINSIC_P_PP(id, _mono_interp_simd_ ## id, wasm_opcode)
+#endif
+#ifndef INTERP_WASM_SIMD_INTRINSIC_V_VVV
+#define INTERP_WASM_SIMD_INTRINSIC_V_VVV(id, c_intrinsic, wasm_opcode) INTERP_SIMD_INTRINSIC_P_PPP(id, _mono_interp_simd_ ## id, wasm_opcode)
+#endif
+#else // HOST_BROWSER
+#define INTERP_WASM_SIMD_INTRINSIC_V_P(id, c_intrinsic, wasm_opcode)
+#define INTERP_WASM_SIMD_INTRINSIC_V_V(id, c_intrinsic, wasm_opcode)
+#define INTERP_WASM_SIMD_INTRINSIC_I_V(id, c_intrinsic, wasm_opcode)
+#define INTERP_WASM_SIMD_INTRINSIC_V_VV(id, c_intrinsic, wasm_opcode)
+#define INTERP_WASM_SIMD_INTRINSIC_V_VI(id, c_intrinsic, wasm_opcode)
+#define INTERP_WASM_SIMD_INTRINSIC_V_VVV(id, c_intrinsic, wasm_opcode)
+#endif // HOST_BROWSER
+
+// The third argument is the wasm opcode that corresponds to this simd intrinsic, if any.
+// Specify 0 if there is no exact 1:1 mapping (the opcode can still be implemented manually in the jiterpreter.)
+
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_ADD, interp_v128_i1_op_addition, 110)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_ADD, interp_v128_i2_op_addition, 142)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_ADD, interp_v128_i4_op_addition, 174)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_R4_ADD, interp_v128_r4_op_addition, 228)
+
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_SUB, interp_v128_i1_op_subtraction, 113)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_SUB, interp_v128_i2_op_subtraction, 145)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_SUB, interp_v128_i4_op_subtraction, 177)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_R4_SUB, interp_v128_r4_op_subtraction, 229)
+
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_AND, interp_v128_op_bitwise_and, 78)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_OR, interp_v128_op_bitwise_or, 80)
+
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_EQUALITY, interp_v128_op_bitwise_equality, 0)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_INEQUALITY, interp_v128_op_bitwise_inequality, 0)
+
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_EXCLUSIVE_OR, interp_v128_op_exclusive_or, 81)
+
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_MULTIPLY, interp_v128_i1_op_multiply, 0)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_MULTIPLY, interp_v128_i2_op_multiply, 149)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_MULTIPLY, interp_v128_i4_op_multiply, 181)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_R4_MULTIPLY, interp_v128_r4_op_multiply, 230)
+
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_R4_DIVISION, interp_v128_r4_op_division, 231)
+
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I1_NEGATION, interp_v128_i1_op_negation, 97)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I2_NEGATION, interp_v128_i2_op_negation, 129)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I4_NEGATION, interp_v128_i4_op_negation, 161)
+
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_LEFT_SHIFT, interp_v128_i1_op_left_shift, 107)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_LEFT_SHIFT, interp_v128_i2_op_left_shift, 139)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_LEFT_SHIFT, interp_v128_i4_op_left_shift, 171)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I8_LEFT_SHIFT, interp_v128_i8_op_left_shift, 203)
+
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_RIGHT_SHIFT, interp_v128_i1_op_right_shift, 108)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_RIGHT_SHIFT, interp_v128_i2_op_right_shift, 140)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_RIGHT_SHIFT, interp_v128_i4_op_right_shift, 172)
+
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_URIGHT_SHIFT, interp_v128_i1_op_uright_shift, 109)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_URIGHT_SHIFT, interp_v128_i2_op_uright_shift, 141)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_URIGHT_SHIFT, interp_v128_i4_op_uright_shift, 173)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I8_URIGHT_SHIFT, interp_v128_i8_op_uright_shift, 205)
+
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_ONES_COMPLEMENT, interp_v128_op_ones_complement, 77)
+
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_U2_WIDEN_LOWER, interp_v128_u2_widen_lower, 137)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_U2_WIDEN_UPPER, interp_v128_u2_widen_upper, 138)
+
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_U1_NARROW, interp_v128_u1_narrow, 102)
+
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_U1_GREATER_THAN, interp_v128_u1_greater_than, 40)
+
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_LESS_THAN, interp_v128_i1_less_than, 37)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_U1_LESS_THAN, interp_v128_u1_less_than, 38)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_LESS_THAN, interp_v128_i2_less_than, 47)
+
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_EQUALS, interp_v128_i1_equals, 35)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_EQUALS, interp_v128_i2_equals, 45)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_EQUALS, interp_v128_i4_equals, 55)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I8_EQUALS, interp_v128_i8_equals, 214)
+
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I1_CREATE_SCALAR, interp_v128_i1_create_scalar, 0)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I2_CREATE_SCALAR, interp_v128_i2_create_scalar, 0)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I4_CREATE_SCALAR, interp_v128_i4_create_scalar, 0)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I8_CREATE_SCALAR, interp_v128_i8_create_scalar, 0)
+
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I1_EXTRACT_MSB, interp_v128_i1_extract_msb, 0)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I2_EXTRACT_MSB, interp_v128_i2_extract_msb, 0)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I4_EXTRACT_MSB, interp_v128_i4_extract_msb, 0)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I8_EXTRACT_MSB, interp_v128_i8_extract_msb, 0)
+
+// wasm opcode is 0 because it has a different calling convention
+INTERP_SIMD_INTRINSIC_P_PPP (INTERP_SIMD_INTRINSIC_V128_CONDITIONAL_SELECT, interp_v128_conditional_select, 0)
+
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I1_CREATE, interp_v128_i1_create, 0)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I2_CREATE, interp_v128_i2_create, 0)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I4_CREATE, interp_v128_i4_create, 0)
+INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I8_CREATE, interp_v128_i8_create, 0)
+
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_AND_NOT, interp_v128_and_not, 79)
+
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_U2_LESS_THAN_EQUAL, interp_v128_u2_less_than_equal, 52)
+
+// wasm only has a swizzle opcode for i8x16, none of the others
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_SHUFFLE, interp_v128_i1_shuffle, 14)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_SHUFFLE, interp_v128_i2_shuffle, 0)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_SHUFFLE, interp_v128_i4_shuffle, 0)
+INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I8_SHUFFLE, interp_v128_i8_shuffle, 0)
+
+// Wasm PackedSimd (see PackedSimd.cs)
+// We automatically generate C wrappers around clang's wasm simd intrinsics for each of these intrinsics
+// The 2nd argument is the name of the clang intrinsic and the 3rd argument is the wasm opcode.
+
+INTERP_WASM_SIMD_INTRINSIC_V_P (INTERP_SIMD_INTRINSIC_WASM_I8X16_SPLAT, wasm_v128_load8_splat, 0x07)
+INTERP_WASM_SIMD_INTRINSIC_V_P (INTERP_SIMD_INTRINSIC_WASM_I16X8_SPLAT, wasm_v128_load16_splat, 0x08)
+INTERP_WASM_SIMD_INTRINSIC_V_P (INTERP_SIMD_INTRINSIC_WASM_I32X4_SPLAT, wasm_v128_load32_splat, 0x09)
+INTERP_WASM_SIMD_INTRINSIC_V_P (INTERP_SIMD_INTRINSIC_WASM_I64X2_SPLAT, wasm_v128_load64_splat, 0x0a)
+// FIXME: ExtractLane and ReplaceLane
+// FIXME: Shuffle
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I8X16_SWIZZLE, wasm_i8x16_swizzle, 0x0e)
+// FIXME: f32/f64 versions of add/subtract/multiply/negate are missing
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I8X16_ADD, wasm_i8x16_add, 0x6e)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I16X8_ADD, wasm_i16x8_add, 0x8e)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I32X4_ADD, wasm_i32x4_add, 0xae)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I64X2_ADD, wasm_i64x2_add, 0xce)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I8X16_SUBTRACT, wasm_i8x16_sub, 0x71)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I16X8_SUBTRACT, wasm_i16x8_sub, 0x91)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I32X4_SUBTRACT, wasm_i32x4_sub, 0xb1)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I64X2_SUBTRACT, wasm_i64x2_sub, 0xd1)
+// There is no i8x16 mul opcode
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I8X16_MULTIPLY, _interp_wasm_simd_assert_not_reached, 0x0)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I16X8_MULTIPLY, wasm_i16x8_mul, 0x95)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I32X4_MULTIPLY, wasm_i32x4_mul, 0xb5)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I64X2_MULTIPLY, wasm_i64x2_mul, 0xd5)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I32X4_DOT_I16X8, wasm_i32x4_dot_i16x8, 0xba)
+INTERP_WASM_SIMD_INTRINSIC_V_V (INTERP_SIMD_INTRINSIC_WASM_I8X16_NEGATE, wasm_i8x16_neg, 0x61)
+INTERP_WASM_SIMD_INTRINSIC_V_V (INTERP_SIMD_INTRINSIC_WASM_I16X8_NEGATE, wasm_i16x8_neg, 0x81)
+INTERP_WASM_SIMD_INTRINSIC_V_V (INTERP_SIMD_INTRINSIC_WASM_I32X4_NEGATE, wasm_i32x4_neg, 0xa1)
+INTERP_WASM_SIMD_INTRINSIC_V_V (INTERP_SIMD_INTRINSIC_WASM_I64X2_NEGATE, wasm_i64x2_neg, 0xc1)
+INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I8X16_SHIFTLEFT, wasm_i8x16_shl, 0x6b)
+INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I16X8_SHIFTLEFT, wasm_i16x8_shl, 0x8b)
+INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I32X4_SHIFTLEFT, wasm_i32x4_shl, 0xab)
+INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I64X2_SHIFTLEFT, wasm_i64x2_shl, 0xcb)
+INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I8X16_SHIFTRIGHTARITHMETIC, wasm_i8x16_shr, 0x6c)
+INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I16X8_SHIFTRIGHTARITHMETIC, wasm_i16x8_shr, 0x8c)
+INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I32X4_SHIFTRIGHTARITHMETIC, wasm_i32x4_shr, 0xac)
+INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I64X2_SHIFTRIGHTARITHMETIC, wasm_i64x2_shr, 0xcc)
+INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I8X16_SHIFTRIGHTLOGICAL, wasm_u8x16_shr, 0x6d)
+INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I16X8_SHIFTRIGHTLOGICAL, wasm_u16x8_shr, 0x8d)
+INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I32X4_SHIFTRIGHTLOGICAL, wasm_u32x4_shr, 0xad)
+INTERP_WASM_SIMD_INTRINSIC_V_VI (INTERP_SIMD_INTRINSIC_WASM_I64X2_SHIFTRIGHTLOGICAL, wasm_u64x2_shr, 0xcd)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_AND, wasm_v128_and, 0x4e)
+// FIXME: NOT, OR, XOR
+INTERP_WASM_SIMD_INTRINSIC_I_V (INTERP_SIMD_INTRINSIC_WASM_I8X16_BITMASK, wasm_i8x16_bitmask, 0x64)
+INTERP_WASM_SIMD_INTRINSIC_I_V (INTERP_SIMD_INTRINSIC_WASM_I16X8_BITMASK, wasm_i16x8_bitmask, 0x84)
+INTERP_WASM_SIMD_INTRINSIC_I_V (INTERP_SIMD_INTRINSIC_WASM_I32X4_BITMASK, wasm_i32x4_bitmask, 0xa4)
+INTERP_WASM_SIMD_INTRINSIC_I_V (INTERP_SIMD_INTRINSIC_WASM_I64X2_BITMASK, wasm_i64x2_bitmask, 0xc4)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I8X16_COMPAREEQUAL, wasm_i8x16_eq, 0x23)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I16X8_COMPAREEQUAL, wasm_i16x8_eq, 0x2d)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I32X4_COMPAREEQUAL, wasm_i32x4_eq, 0x37)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I64X2_COMPAREEQUAL, wasm_i64x2_eq, 0xd6)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_F32X4_COMPAREEQUAL, wasm_f32x4_eq, 0x41)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_F64X2_COMPAREEQUAL, wasm_f64x2_eq, 0x47)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I8X16_COMPARENOTEQUAL, wasm_i8x16_ne, 0x24)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I16X8_COMPARENOTEQUAL, wasm_i16x8_ne, 0x2e)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I32X4_COMPARENOTEQUAL, wasm_i32x4_ne, 0x38)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I64X2_COMPARENOTEQUAL, wasm_i64x2_ne, 0xd7)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_F32X4_COMPARENOTEQUAL, wasm_f32x4_ne, 0x42)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_F64X2_COMPARENOTEQUAL, wasm_f64x2_ne, 0x48)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I8X16_NARROW_I16X8_S, wasm_i8x16_narrow_i16x8, 0x65)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I16X8_NARROW_I32X4_S, wasm_i16x8_narrow_i32x4, 0x85)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I8X16_NARROW_I16X8_U, wasm_u8x16_narrow_i16x8, 0x66)
+INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I16X8_NARROW_I32X4_U, wasm_u16x8_narrow_i32x4, 0x86)
#include "interp-internals.h"
#include "interp-simd.h"
+#if HOST_BROWSER
+#include <wasm_simd128.h>
+#endif
+
#ifdef INTERP_ENABLE_SIMD
typedef gint64 v128_i8 __attribute__ ((vector_size (SIZEOF_V128)));
typedef guint16 v128_u2 __attribute__ ((vector_size (SIZEOF_V128)));
typedef gint8 v128_i1 __attribute__ ((vector_size (SIZEOF_V128)));
typedef guint8 v128_u1 __attribute__ ((vector_size (SIZEOF_V128)));
+typedef float v128_r4 __attribute__ ((vector_size (SIZEOF_V128)));
// get_AllBitsSet
static void
*(v128_i4*)res = *(v128_i4*)v1 + *(v128_i4*)v2;
}
+static void
+interp_v128_r4_op_addition (gpointer res, gpointer v1, gpointer v2)
+{
+ *(v128_r4*)res = *(v128_r4*)v1 + *(v128_r4*)v2;
+}
+
// op_Subtraction
static void
interp_v128_i1_op_subtraction (gpointer res, gpointer v1, gpointer v2)
*(v128_i4*)res = *(v128_i4*)v1 - *(v128_i4*)v2;
}
+static void
+interp_v128_r4_op_subtraction (gpointer res, gpointer v1, gpointer v2)
+{
+ *(v128_r4*)res = *(v128_r4*)v1 - *(v128_r4*)v2;
+}
+
// op_BitwiseAnd
static void
interp_v128_op_bitwise_and (gpointer res, gpointer v1, gpointer v2)
*(v128_i4*)res = *(v128_i4*)v1 * *(v128_i4*)v2;
}
+static void
+interp_v128_r4_op_multiply (gpointer res, gpointer v1, gpointer v2)
+{
+ *(v128_r4*)res = *(v128_r4*)v1 * *(v128_r4*)v2;
+}
+
+static void
+interp_v128_r4_op_division (gpointer res, gpointer v1, gpointer v2)
+{
+ *(v128_r4*)res = *(v128_r4*)v1 / *(v128_r4*)v2;
+}
+
// op_UnaryNegation
static void
interp_v128_i1_op_negation (gpointer res, gpointer v1)
V128_SHUFFLE (gint64, guint64);
}
-#define INTERP_SIMD_INTRINSIC_P_P(a,b)
-#define INTERP_SIMD_INTRINSIC_P_PP(a,b)
-#define INTERP_SIMD_INTRINSIC_P_PPP(a,b)
+#define INTERP_SIMD_INTRINSIC_P_P(a,b,c)
+#define INTERP_SIMD_INTRINSIC_P_PP(a,b,c)
+#define INTERP_SIMD_INTRINSIC_P_PPP(a,b,c)
+
+// For the wasm packed simd intrinsics we want to automatically generate the C implementations from
+// their corresponding clang intrinsics. See also:
+// https://github.com/llvm/llvm-project/blob/main/clang/lib/Headers/wasm_simd128.h
+// In this context V means Vector128 and P means void* pointer.
+#ifdef HOST_BROWSER
+
+static v128_t
+_interp_wasm_simd_assert_not_reached (v128_t lhs, v128_t rhs) {
+ g_assert_not_reached ();
+}
+
+#define INTERP_WASM_SIMD_INTRINSIC_V_P(id, c_intrinsic, wasm_opcode) \
+static void \
+_mono_interp_simd_ ## id (gpointer res, gpointer v1) { \
+ *((v128_t *)res) = c_intrinsic (v1); \
+}
+
+#define INTERP_WASM_SIMD_INTRINSIC_V_V(id, c_intrinsic, wasm_opcode) \
+static void \
+_mono_interp_simd_ ## id (gpointer res, gpointer v1) { \
+ *((v128_t *)res) = c_intrinsic (*((v128_t *)v1)); \
+}
+
+#define INTERP_WASM_SIMD_INTRINSIC_I_V(id, c_intrinsic, wasm_opcode) \
+static void \
+_mono_interp_simd_ ## id (gpointer res, gpointer v1) { \
+ *((int32_t *)res) = c_intrinsic (*((v128_t *)v1)); \
+}
+
+#define INTERP_WASM_SIMD_INTRINSIC_V_VV(id, c_intrinsic, wasm_opcode) \
+static void \
+_mono_interp_simd_ ## id (gpointer res, gpointer v1, gpointer v2) { \
+ *((v128_t *)res) = c_intrinsic (*((v128_t *)v1), *((v128_t *)v2)); \
+}
+
+#define INTERP_WASM_SIMD_INTRINSIC_V_VI(id, c_intrinsic, wasm_opcode) \
+static void \
+_mono_interp_simd_ ## id (gpointer res, gpointer v1, gpointer v2) { \
+ *((v128_t *)res) = c_intrinsic (*((v128_t *)v1), *((int *)v2)); \
+}
+
+#define INTERP_WASM_SIMD_INTRINSIC_V_VVV(id, c_intrinsic, wasm_opcode) \
+static void \
+_mono_interp_simd_ ## id (gpointer res, gpointer v1, gpointer v2, gpointer v3) { \
+ *((v128_t *)res) = c_intrinsic (*((v128_t *)v1), *((v128_t *)v2), *((v128_t *)v3)); \
+}
+
+#include "interp-simd-intrins.def"
+
+#undef INTERP_WASM_SIMD_INTRINSIC_V_P
+#undef INTERP_WASM_SIMD_INTRINSIC_V_V
+#undef INTERP_WASM_SIMD_INTRINSIC_I_V
+#undef INTERP_WASM_SIMD_INTRINSIC_V_VV
+#undef INTERP_WASM_SIMD_INTRINSIC_V_VI
+#undef INTERP_WASM_SIMD_INTRINSIC_V_VVV
+
+// Now generate the wasm opcode tables for the intrinsics
+
+#undef INTERP_SIMD_INTRINSIC_P_P
+#define INTERP_SIMD_INTRINSIC_P_P(a,b,c) c,
+
+int interp_simd_p_p_wasm_opcode_table [] = {
+#include "interp-simd-intrins.def"
+};
+
+#undef INTERP_SIMD_INTRINSIC_P_P
+#define INTERP_SIMD_INTRINSIC_P_P(a,b,c)
+
+#undef INTERP_SIMD_INTRINSIC_P_PP
+#define INTERP_SIMD_INTRINSIC_P_PP(a,b,c) c,
+
+int interp_simd_p_pp_wasm_opcode_table [] = {
+#include "interp-simd-intrins.def"
+};
+
+#undef INTERP_SIMD_INTRINSIC_P_PP
+#define INTERP_SIMD_INTRINSIC_P_PP(a,b,c)
+
+#undef INTERP_SIMD_INTRINSIC_P_PPP
+#define INTERP_SIMD_INTRINSIC_P_PPP(a,b,c) c,
+
+int interp_simd_p_ppp_wasm_opcode_table [] = {
+#include "interp-simd-intrins.def"
+};
+
+#undef INTERP_SIMD_INTRINSIC_P_PPP
+#define INTERP_SIMD_INTRINSIC_P_PPP(a,b,c)
+
+#endif // HOST_BROWSER
#undef INTERP_SIMD_INTRINSIC_P_P
-#define INTERP_SIMD_INTRINSIC_P_P(a,b) b,
+#define INTERP_SIMD_INTRINSIC_P_P(a,b,c) b,
PP_SIMD_Method interp_simd_p_p_table [] = {
#include "interp-simd-intrins.def"
};
#undef INTERP_SIMD_INTRINSIC_P_P
-#define INTERP_SIMD_INTRINSIC_P_P(a,b)
+#define INTERP_SIMD_INTRINSIC_P_P(a,b,c)
#undef INTERP_SIMD_INTRINSIC_P_PP
-#define INTERP_SIMD_INTRINSIC_P_PP(a,b) b,
+#define INTERP_SIMD_INTRINSIC_P_PP(a,b,c) b,
PPP_SIMD_Method interp_simd_p_pp_table [] = {
#include "interp-simd-intrins.def"
};
#undef INTERP_SIMD_INTRINSIC_P_PP
-#define INTERP_SIMD_INTRINSIC_P_PP(a,b)
+#define INTERP_SIMD_INTRINSIC_P_PP(a,b,c)
#undef INTERP_SIMD_INTRINSIC_P_PPP
-#define INTERP_SIMD_INTRINSIC_P_PPP(a,b) b,
+#define INTERP_SIMD_INTRINSIC_P_PPP(a,b,c) b,
PPPP_SIMD_Method interp_simd_p_ppp_table [] = {
#include "interp-simd-intrins.def"
};
#undef INTERP_SIMD_INTRINSIC_P_PPP
-#define INTERP_SIMD_INTRINSIC_P_PPP(a,b)
+#define INTERP_SIMD_INTRINSIC_P_PPP(a,b,c)
#endif // INTERP_ENABLE_SIMD
extern PPP_SIMD_Method interp_simd_p_pp_table [];
extern PPPP_SIMD_Method interp_simd_p_ppp_table [];
+#if HOST_BROWSER
+extern int interp_simd_p_p_wasm_opcode_table [];
+extern int interp_simd_p_pp_wasm_opcode_table [];
+extern int interp_simd_p_ppp_wasm_opcode_table [];
+#endif
+
#endif /* __MONO_MINI_INTERP_SIMD_H__ */
*dest = mono_interp_enum_hasflag (sp1, sp2, klass);
}
+EMSCRIPTEN_KEEPALIVE gpointer
+mono_jiterp_get_simd_intrinsic (int arity, int index)
+{
+#ifdef INTERP_ENABLE_SIMD
+ switch (arity) {
+ case 1:
+ return interp_simd_p_p_table [index];
+ case 2:
+ return interp_simd_p_pp_table [index];
+ case 3:
+ return interp_simd_p_ppp_table [index];
+ default:
+ g_assert_not_reached();
+ }
+#else
+ g_assert_not_reached();
+#endif
+}
+
+EMSCRIPTEN_KEEPALIVE int
+mono_jiterp_get_simd_opcode (int arity, int index)
+{
+#ifdef INTERP_ENABLE_SIMD
+ switch (arity) {
+ case 1:
+ return interp_simd_p_p_wasm_opcode_table [index];
+ case 2:
+ return interp_simd_p_pp_wasm_opcode_table [index];
+ case 3:
+ return interp_simd_p_ppp_wasm_opcode_table [index];
+ default:
+ g_assert_not_reached();
+ }
+#else
+ g_assert_not_reached();
+#endif
+}
+
#endif
/* SIMD opcodes, grouped by signature */
-#define INTERP_SIMD_INTRINSIC_P_P(a,b)
-#define INTERP_SIMD_INTRINSIC_P_PP(a,b)
-#define INTERP_SIMD_INTRINSIC_P_PPP(a,b)
+#define INTERP_SIMD_INTRINSIC_P_P(a,b,c)
+#define INTERP_SIMD_INTRINSIC_P_PP(a,b,c)
+#define INTERP_SIMD_INTRINSIC_P_PPP(a,b,c)
#undef INTERP_SIMD_INTRINSIC_P_P
-#define INTERP_SIMD_INTRINSIC_P_P(a,b) a,
+#define INTERP_SIMD_INTRINSIC_P_P(a,b,c) a,
typedef enum {
#include "interp-simd-intrins.def"
} MintSIMDOpsPP;
#undef INTERP_SIMD_INTRINSIC_P_P
-#define INTERP_SIMD_INTRINSIC_P_P(a,b)
+#define INTERP_SIMD_INTRINSIC_P_P(a,b,c)
#undef INTERP_SIMD_INTRINSIC_P_PP
-#define INTERP_SIMD_INTRINSIC_P_PP(a,b) a,
+#define INTERP_SIMD_INTRINSIC_P_PP(a,b,c) a,
typedef enum {
#include "interp-simd-intrins.def"
INTERP_SIMD_INTRINSIC_P_PP_LAST
} MintSIMDOpsPPP;
#undef INTERP_SIMD_INTRINSIC_P_PP
-#define INTERP_SIMD_INTRINSIC_P_PP(a,b)
+#define INTERP_SIMD_INTRINSIC_P_PP(a,b,c)
#undef INTERP_SIMD_INTRINSIC_P_PPP
-#define INTERP_SIMD_INTRINSIC_P_PPP(a,b) a,
+#define INTERP_SIMD_INTRINSIC_P_PPP(a,b,c) a,
typedef enum {
#include "interp-simd-intrins.def"
INTERP_SIMD_INTRINSIC_P_PPP_LAST
} MintSIMDOpsPPPP;
#undef INTERP_SIMD_INTRINSIC_P_PPP
-#define INTERP_SIMD_INTRINSIC_P_PPP(a,b)
+#define INTERP_SIMD_INTRINSIC_P_PPP(a,b,c)
#if NO_UNALIGNED_ACCESS
# if G_BYTE_ORDER == G_LITTLE_ENDIAN
SIMD_METHOD(get_Count)
SIMD_METHOD(get_AllBitsSet)
SIMD_METHOD(get_IsHardwareAccelerated)
+SIMD_METHOD(get_IsSupported)
SIMD_METHOD(get_Item)
SIMD_METHOD(get_One)
SIMD_METHOD(get_Zero)
SIMD_METHOD(op_Addition)
SIMD_METHOD(op_BitwiseAnd)
SIMD_METHOD(op_BitwiseOr)
+SIMD_METHOD(op_Division)
SIMD_METHOD(op_Equality)
SIMD_METHOD(op_ExclusiveOr)
SIMD_METHOD(op_Explicit)
SIMD_METHOD(Create)
SIMD_METHOD(CreateScalar)
SIMD_METHOD(CreateScalarUnsafe)
+
SIMD_METHOD(Equals)
SIMD_METHOD(ExtractMostSignificantBits)
SIMD_METHOD(GreaterThan)
SIMD_METHOD(Shuffle)
SIMD_METHOD(WidenLower)
SIMD_METHOD(WidenUpper)
+
+// PackedSimd
+SIMD_METHOD(Splat)
+SIMD_METHOD(ExtractLane)
+SIMD_METHOD(ReplaceLane)
+SIMD_METHOD(Swizzle)
+SIMD_METHOD(Add)
+SIMD_METHOD(Subtract)
+SIMD_METHOD(Multiply)
+SIMD_METHOD(Dot)
+SIMD_METHOD(Negate)
+SIMD_METHOD(And)
+SIMD_METHOD(Bitmask)
+SIMD_METHOD(CompareEqual)
+SIMD_METHOD(CompareNotEqual)
+SIMD_METHOD(ConvertNarrowingSignedSaturate)
+SIMD_METHOD(ConvertNarrowingUnsignedSaturate)
* SIMD Intrinsics support for interpreter
*/
+#include "config.h"
+#include <glib.h>
#include <mono/utils/bsearch.h>
// We use the same approach as jit/aot for identifying simd methods.
SN_Shuffle,
SN_WidenLower,
SN_WidenUpper,
- SN_get_IsHardwareAccelerated
+ SN_get_IsHardwareAccelerated,
};
static guint16 sri_vector128_t_methods [] = {
SN_op_Addition,
SN_op_BitwiseAnd,
SN_op_BitwiseOr,
+ SN_op_Division,
SN_op_Equality,
SN_op_ExclusiveOr,
SN_op_Inequality,
SN_op_UnsignedRightShift
};
+static guint16 sri_packedsimd_methods [] = {
+ SN_ConvertNarrowingSignedSaturate,
+ SN_ConvertNarrowingUnsignedSaturate,
+ SN_Swizzle,
+ SN_get_IsHardwareAccelerated,
+ SN_get_IsSupported,
+};
+
+#if HOST_BROWSER
+
+/*
+ * maps from INTERP_SIMD_INTRINSIC_WASM_I8X16_xxx to the correct one for the return type,
+ * assuming that they are laid out sequentially like this:
+ * INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I8X16_COMPAREEQUAL, wasm_i8x16_eq, 0x0)
+ * INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I16X8_COMPAREEQUAL, wasm_i16x8_eq, 0x0)
+ * INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I32X4_COMPAREEQUAL, wasm_i32x4_eq, 0x0)
+ * INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_I64X2_COMPAREEQUAL, wasm_i64x2_eq, 0x0)
+ * INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_F32X4_COMPAREEQUAL, wasm_f32x4_eq, 0x0)
+ * INTERP_WASM_SIMD_INTRINSIC_V_VV (INTERP_SIMD_INTRINSIC_WASM_F64X2_COMPAREEQUAL, wasm_f64x2_eq, 0x0)
+ * It is your responsibility to ensure that it's actually laid out this way!
+ */
+
+static int sri_packedsimd_offset_from_atype [] = {
+ -1, // MONO_TYPE_END = 0x00,
+ -1, // MONO_TYPE_VOID = 0x01,
+ // FIXME: Should this be 2, for I4?
+ 0, // MONO_TYPE_BOOLEAN = 0x02,
+ 1, // MONO_TYPE_CHAR = 0x03,
+ 0, // MONO_TYPE_I1 = 0x04,
+ 0, // MONO_TYPE_U1 = 0x05,
+ 1, // MONO_TYPE_I2 = 0x06,
+ 1, // MONO_TYPE_U2 = 0x07,
+ 2, // MONO_TYPE_I4 = 0x08,
+ 2, // MONO_TYPE_U4 = 0x09,
+ 3, // MONO_TYPE_I8 = 0x0a,
+ 3, // MONO_TYPE_U8 = 0x0b,
+ 4, // MONO_TYPE_R4 = 0x0c,
+ 5, // MONO_TYPE_R8 = 0x0d,
+ -1, // MONO_TYPE_STRING = 0x0e,
+ 2, // MONO_TYPE_PTR = 0x0f,
+ -1, // MONO_TYPE_BYREF = 0x10,
+ -1, // MONO_TYPE_VALUETYPE = 0x11,
+ -1, // MONO_TYPE_CLASS = 0x12,
+ -1, // MONO_TYPE_VAR = 0x13,
+ -1, // MONO_TYPE_ARRAY = 0x14,
+ -1, // MONO_TYPE_GENERICINST= 0x15,
+ -1, // MONO_TYPE_TYPEDBYREF = 0x16,
+ 2, // MONO_TYPE_I = 0x18,
+ 2, // MONO_TYPE_U = 0x19,
+};
+
+static const int sri_packedsimd_offset_from_atype_length = sizeof(sri_packedsimd_offset_from_atype) / sizeof(sri_packedsimd_offset_from_atype[0]);
+#endif // HOST_BROWSER
+
static gboolean
emit_sri_vector128 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature)
{
}
break;
case SN_op_LeftShift:
- g_assert (scalar_arg == 1);
+ if (scalar_arg != 1)
+ return FALSE;
simd_opcode = MINT_SIMD_INTRINS_P_PP;
if (arg_size == 1) simd_intrins = INTERP_SIMD_INTRINSIC_V128_I1_LEFT_SHIFT;
else if (arg_size == 2) simd_intrins = INTERP_SIMD_INTRINSIC_V128_I2_LEFT_SHIFT;
else if (arg_size == 4) simd_intrins = INTERP_SIMD_INTRINSIC_V128_I4_LEFT_SHIFT;
else if (arg_size == 8) simd_intrins = INTERP_SIMD_INTRINSIC_V128_I8_LEFT_SHIFT;
break;
+ case SN_op_Division:
+ if (scalar_arg != -1)
+ return FALSE;
+ simd_opcode = MINT_SIMD_INTRINS_P_PP;
+ if (atype == MONO_TYPE_R4) simd_intrins = INTERP_SIMD_INTRINSIC_V128_R4_DIVISION;
+ break;
case SN_op_Multiply:
- g_assert (scalar_arg == -1);
+ if (scalar_arg != -1)
+ return FALSE;
simd_opcode = MINT_SIMD_INTRINS_P_PP;
if (atype == MONO_TYPE_I1 || atype == MONO_TYPE_U1) simd_intrins = INTERP_SIMD_INTRINSIC_V128_I1_MULTIPLY;
else if (atype == MONO_TYPE_I2 || atype == MONO_TYPE_U2) simd_intrins = INTERP_SIMD_INTRINSIC_V128_I2_MULTIPLY;
else if (atype == MONO_TYPE_I4 || atype == MONO_TYPE_U4) simd_intrins = INTERP_SIMD_INTRINSIC_V128_I4_MULTIPLY;
+ else if (atype == MONO_TYPE_R4) simd_intrins = INTERP_SIMD_INTRINSIC_V128_R4_MULTIPLY;
break;
case SN_op_OnesComplement:
simd_opcode = MINT_SIMD_INTRINS_P_P;
simd_intrins = INTERP_SIMD_INTRINSIC_V128_ONES_COMPLEMENT;
break;
case SN_op_RightShift:
- g_assert (scalar_arg == 1);
+ if (scalar_arg != 1)
+ return FALSE;
simd_opcode = MINT_SIMD_INTRINS_P_PP;
if (atype == MONO_TYPE_I1) simd_intrins = INTERP_SIMD_INTRINSIC_V128_I1_RIGHT_SHIFT;
else if (atype == MONO_TYPE_I2) simd_intrins = INTERP_SIMD_INTRINSIC_V128_I2_RIGHT_SHIFT;
else if (atype == MONO_TYPE_I4 || atype == MONO_TYPE_U4) simd_intrins = INTERP_SIMD_INTRINSIC_V128_I4_NEGATION;
break;
case SN_op_UnsignedRightShift:
- g_assert (scalar_arg == 1);
+ if (scalar_arg != 1)
+ return FALSE;
simd_opcode = MINT_SIMD_INTRINS_P_PP;
if (arg_size == 1) simd_intrins = INTERP_SIMD_INTRINSIC_V128_I1_URIGHT_SHIFT;
else if (arg_size == 2) simd_intrins = INTERP_SIMD_INTRINSIC_V128_I2_URIGHT_SHIFT;
return TRUE;
}
+#if HOST_BROWSER
+static int
+map_packedsimd_intrins_based_on_atype (MonoTypeEnum atype, int base_intrins, gboolean allow_float)
+{
+ int max_offset = allow_float ? 5 : 3;
+ if ((atype < 0) || (atype >= sri_packedsimd_offset_from_atype_length))
+ return -1;
+ int offset = sri_packedsimd_offset_from_atype [atype];
+ if ((offset < 0) || (offset > max_offset))
+ return -1;
+ return base_intrins + offset;
+}
+#endif
+
+static gboolean
+emit_sri_packedsimd (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature)
+{
+ int id = lookup_intrins (sri_packedsimd_methods, sizeof (sri_packedsimd_methods), cmethod);
+ if (id == -1)
+ return FALSE;
+
+ MonoClass *vector_klass = mono_class_from_mono_type_internal (csignature->ret);
+ int vector_size = -1;
+
+ if ((id == SN_get_IsSupported) || (id == SN_get_IsHardwareAccelerated)) {
+#if HOST_BROWSER
+ interp_add_ins (td, MINT_LDC_I4_1);
+#else
+ interp_add_ins (td, MINT_LDC_I4_0);
+#endif
+ goto opcode_added;
+ }
+
+#if HOST_BROWSER
+ gint16 simd_opcode = -1;
+ gint16 simd_intrins = -1;
+ if (!m_class_is_simd_type (vector_klass))
+ vector_klass = mono_class_from_mono_type_internal (csignature->params [0]);
+ if (!m_class_is_simd_type (vector_klass))
+ return FALSE;
+
+ vector_size = mono_class_value_size (vector_klass, NULL);
+ g_assert (vector_size == SIZEOF_V128);
+
+ MonoType *arg_type = mono_class_get_context (vector_klass)->class_inst->type_argv [0];
+ if (!mono_type_is_primitive (arg_type))
+ return FALSE;
+ MonoTypeEnum atype = arg_type->type;
+ if (atype == MONO_TYPE_BOOLEAN)
+ return FALSE;
+
+ int scalar_arg = -1;
+ for (int i = 0; i < csignature->param_count; i++) {
+ if (csignature->params [i]->type != MONO_TYPE_GENERICINST)
+ scalar_arg = i;
+ }
+
+ switch (id) {
+ case SN_Splat: {
+ simd_opcode = MINT_SIMD_INTRINS_P_P;
+ simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_SPLAT, FALSE);
+ break;
+ }
+ case SN_Swizzle: {
+ simd_opcode = MINT_SIMD_INTRINS_P_PP;
+ simd_intrins = INTERP_SIMD_INTRINSIC_WASM_I8X16_SWIZZLE;
+ break;
+ }
+ case SN_Add: {
+ simd_opcode = MINT_SIMD_INTRINS_P_PP;
+ simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_ADD, FALSE);
+ break;
+ }
+ case SN_Subtract: {
+ simd_opcode = MINT_SIMD_INTRINS_P_PP;
+ simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_SUBTRACT, FALSE);
+ break;
+ }
+ case SN_Multiply: {
+ simd_opcode = MINT_SIMD_INTRINS_P_PP;
+ simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_MULTIPLY, FALSE);
+ break;
+ }
+ case SN_Dot: {
+ simd_opcode = MINT_SIMD_INTRINS_P_PP;
+ simd_intrins = INTERP_SIMD_INTRINSIC_WASM_I32X4_DOT_I16X8;
+ break;
+ }
+ case SN_Negate: {
+ simd_opcode = MINT_SIMD_INTRINS_P_P;
+ simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_NEGATE, FALSE);
+ break;
+ }
+ case SN_ShiftLeft: {
+ simd_opcode = MINT_SIMD_INTRINS_P_PP;
+ simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_SHIFTLEFT, FALSE);
+ break;
+ }
+ case SN_ShiftRightArithmetic: {
+ simd_opcode = MINT_SIMD_INTRINS_P_PP;
+ simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_SHIFTRIGHTARITHMETIC, FALSE);
+ break;
+ }
+ case SN_ShiftRightLogical: {
+ simd_opcode = MINT_SIMD_INTRINS_P_PP;
+ simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_SHIFTRIGHTLOGICAL, FALSE);
+ break;
+ }
+ case SN_And: {
+ simd_opcode = MINT_SIMD_INTRINS_P_PP;
+ simd_intrins = INTERP_SIMD_INTRINSIC_WASM_AND;
+ break;
+ }
+ case SN_Bitmask: {
+ simd_opcode = MINT_SIMD_INTRINS_P_P;
+ simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_BITMASK, FALSE);
+ break;
+ }
+ case SN_CompareEqual: {
+ simd_opcode = MINT_SIMD_INTRINS_P_PP;
+ simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_COMPAREEQUAL, TRUE);
+ break;
+ }
+ case SN_CompareNotEqual: {
+ simd_opcode = MINT_SIMD_INTRINS_P_PP;
+ simd_intrins = map_packedsimd_intrins_based_on_atype (atype, INTERP_SIMD_INTRINSIC_WASM_I8X16_COMPARENOTEQUAL, TRUE);
+ break;
+ }
+ case SN_ConvertNarrowingSignedSaturate: {
+ simd_opcode = MINT_SIMD_INTRINS_P_PP;
+ if (atype == MONO_TYPE_I1)
+ simd_intrins = INTERP_SIMD_INTRINSIC_WASM_I8X16_NARROW_I16X8_S;
+ else if (atype == MONO_TYPE_I2)
+ simd_intrins = INTERP_SIMD_INTRINSIC_WASM_I16X8_NARROW_I32X4_S;
+ break;
+ }
+ case SN_ConvertNarrowingUnsignedSaturate: {
+ simd_opcode = MINT_SIMD_INTRINS_P_PP;
+ if (atype == MONO_TYPE_U1)
+ simd_intrins = INTERP_SIMD_INTRINSIC_WASM_I8X16_NARROW_I16X8_U;
+ else if (atype == MONO_TYPE_U2)
+ simd_intrins = INTERP_SIMD_INTRINSIC_WASM_I16X8_NARROW_I32X4_U;
+ break;
+ }
+ default:
+ return FALSE;
+ }
+
+ if (simd_opcode == -1 || simd_intrins == -1) {
+ return FALSE;
+ }
+
+ interp_add_ins (td, simd_opcode);
+ td->last_ins->data [0] = simd_intrins;
+#else // HOST_BROWSER
+ return FALSE;
+#endif // HOST_BROWSER
+
+opcode_added:
+ td->sp -= csignature->param_count;
+ for (int i = 0; i < csignature->param_count; i++)
+ td->last_ins->sregs [i] = td->sp [i].local;
+
+ g_assert (csignature->ret->type != MONO_TYPE_VOID);
+ int ret_mt = mint_type (csignature->ret);
+ if (ret_mt == MINT_TYPE_VT) {
+ // For these intrinsics, if we return a VT then it is a V128
+ push_type_vt (td, vector_klass, vector_size);
+ } else {
+ push_simple_type (td, stack_type [ret_mt]);
+ }
+ interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
+ td->ip += 5;
+ return TRUE;
+}
+
static gboolean
interp_emit_simd_intrinsics (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature)
{
class_ns = m_class_get_name_space (cmethod->klass);
class_name = m_class_get_name (cmethod->klass);
- if (!strcmp (class_ns, "System.Runtime.Intrinsics")) {
+ if (mono_opt_interp_simd_v128 && !strcmp (class_ns, "System.Runtime.Intrinsics")) {
if (!strcmp (class_name, "Vector128"))
return emit_sri_vector128 (td, cmethod, csignature);
else if (!strcmp (class_name, "Vector128`1"))
return emit_sri_vector128_t (td, cmethod, csignature);
+ } else if (mono_opt_interp_simd_packedsimd && !strcmp (class_ns, "System.Runtime.Intrinsics.Wasm")) {
+ if (!strcmp (class_name, "PackedSimd")) {
+ gboolean res = emit_sri_packedsimd (td, cmethod, csignature);
+#if HOST_BROWSER
+ if (!res)
+ g_print ("MONO interpreter: Unsupported method: System.Runtime.Intrinsics.Wasm.PackedSimd.%s\n", cmethod->name);
+ g_assert (res);
+#endif
+ return res;
+ }
}
return FALSE;
}
DEFINE_BOOL(wasm_exceptions, "wasm-exceptions", FALSE, "Enable codegen for WASM exceptions")
DEFINE_BOOL(wasm_gc_safepoints, "wasm-gc-safepoints", FALSE, "Use GC safepoints on WASM")
DEFINE_BOOL(aot_lazy_assembly_load, "aot-lazy-assembly-load", FALSE, "Load assemblies referenced by AOT images lazily")
+#if HOST_BROWSER
+DEFINE_BOOL(interp_simd_v128, "interp-simd-v128", FALSE, "Enable interpreter Vector128 support")
+#else
+DEFINE_BOOL(interp_simd_v128, "interp-simd-v128", TRUE, "Enable interpreter Vector128 support")
+#endif
+DEFINE_BOOL(interp_simd_packedsimd, "interp-simd-packedsimd", FALSE, "Enable interpreter WASM PackedSimd support")
#if HOST_BROWSER
DEFINE_BOOL(jiterpreter_eliminate_null_checks, "jiterpreter-eliminate-null-checks", TRUE, "Attempt to eliminate redundant null checks in traces")
// enables performing backward branches without exiting traces
DEFINE_BOOL(jiterpreter_backward_branches_enabled, "jiterpreter-backward-branches-enabled", TRUE, "Enable performing backward branches without exiting traces")
+// Attempt to use WASM v128 opcodes to implement SIMD interpreter opcodes
+DEFINE_BOOL(jiterpreter_enable_simd, "jiterpreter-simd-enabled", TRUE, "Attempt to use WebAssembly SIMD support")
// When compiling a jit_call wrapper, bypass sharedvt wrappers if possible by inlining their
// logic into the compiled wrapper and calling the target AOTed function with native call convention
DEFINE_BOOL(jiterpreter_direct_jit_call, "jiterpreter-direct-jit-calls", TRUE, "Bypass gsharedvt wrappers when compiling JIT call wrappers")
{
measurements = new Measurement[] {
new Create(),
+ new PackConstant(),
+ new Pack(),
new Add(),
new Multiply(),
new DotInt(),
public override void RunStep() => vector = Vector128.Create(0x123456);
}
+ class PackConstant : VectorMeasurement
+ {
+ Vector128<int> vector;
+
+ public override string Name => "Pack Vector128 (Constant)";
+
+ public override void RunStep() => vector = Vector128.Create(1, 2, 3, 4);
+ }
+
+ class Pack : VectorMeasurement
+ {
+ Vector128<int> vector;
+ int a = 1, b = 2, c = 3, d = 4;
+
+ public override string Name => "Pack Vector128";
+
+ public override void RunStep() => vector = Vector128.Create(a, b, c, d);
+ }
+
class Add : VectorMeasurement
{
Vector128<int> vector1, vector2, vector3;
RUNTIME_OUTPUT_DIRECTORY "${NATIVE_BIN_DIR}")
set(ignoreMeWasmOptFlags "${CONFIGURATION_WASM_OPT_FLAGS}")
+set(ignoreMeWasmOptAdditionalFlags "${WASM_OPT_ADDITIONAL_FLAGS}")
+set(ignoreMeEmsdkPath "${EMSDK_PATH}")
if(CMAKE_BUILD_TYPE STREQUAL "Release")
add_custom_command(TARGET dotnet
[true, "mono_jiterp_boost_back_branch_target", "void", ["number"]],
[true, "mono_jiterp_is_imethod_var_address_taken", "number", ["number", "number"]],
[true, "mono_jiterp_get_opcode_value_table_entry", "number", ["number"]],
+ [true, "mono_jiterp_get_simd_intrinsic", "number", ["number", "number"]],
+ [true, "mono_jiterp_get_simd_opcode", "number", ["number", "number"]],
...legacy_interop_cwraps
];
mono_jiterp_boost_back_branch_target(destination: number): void;
mono_jiterp_is_imethod_var_address_taken(imethod: VoidPtr, offsetBytes: number): number;
mono_jiterp_get_opcode_value_table_entry(opcode: number): number;
+ mono_jiterp_get_simd_intrinsic(arity: number, index: number): VoidPtr;
+ mono_jiterp_get_simd_opcode(arity: number, index: number): number;
}
const wrapped_c_functions: t_Cwraps = <any>{};
import os
import re
-if len (sys.argv) != 3:
- print ("Usage: genmintops.py <src/mintops.def> <dest/mintops.ts>")
+if len (sys.argv) != 4:
+ print ("Usage: genmintops.py <src/mintops.def> <src/interp-simd-intrins.def> <dest/mintops.ts>")
exit (1)
src_header_path = sys.argv [1]
-output_ts_path = sys.argv [2]
+simd_header_path = sys.argv [2]
+output_ts_path = sys.argv [3]
src = open(src_header_path, 'r')
+simd_src = open(simd_header_path, 'r')
tab = " "
header_lines = src.read().splitlines()
+# strip preprocessing directives
+simd_header_lines = (l for l in simd_src.read().splitlines() if not l.startswith("#"))
# strip preprocessing directives and add indentation for tslint/eslint
header = "\n".join((tab + l) for l in header_lines if not l.startswith("#"))
src.close()
+simd_src.close()
opdef_regex = r'\s(IR)?OPDEF\((\w+),\s*(.+?),\s*(MintOp\w+)\)'
enum_values = re.sub(
opdef_regex, lambda m : f"[MintOpcode.{m.group(2)}]: [{m.group(3)}, MintOpArgType.{m.group(4)}],", header
)
+simd_values_1 = []
+simd_values_2 = []
+simd_values_3 = []
+simd_disp = {
+ "INTERP_SIMD_INTRINSIC_P_P": simd_values_1,
+ "INTERP_SIMD_INTRINSIC_P_PP": simd_values_2,
+ "INTERP_SIMD_INTRINSIC_P_PPP": simd_values_3,
+ "INTERP_WASM_SIMD_INTRINSIC_V_P": simd_values_1,
+ "INTERP_WASM_SIMD_INTRINSIC_V_V": simd_values_1,
+ "INTERP_WASM_SIMD_INTRINSIC_I_V": simd_values_1,
+ "INTERP_WASM_SIMD_INTRINSIC_V_VV": simd_values_2,
+ "INTERP_WASM_SIMD_INTRINSIC_V_VI": simd_values_2,
+ "INTERP_WASM_SIMD_INTRINSIC_V_VVV": simd_values_3,
+}
+
+for line in simd_header_lines:
+ idx1 = line.index("(") if "(" in line else None
+ idx2 = line.index(",") if "," in line else None
+ if (idx1 and idx2):
+ key = line[0:idx1].strip()
+ simd_disp[key].append(line[(idx1 + 1):idx2].strip().replace("INTERP_SIMD_INTRINSIC_", ""))
+
+splitter = ",\n "
+splitter2 = ",\n "
+
generated = f"""
// Generated by genmintops.py from mintops.def.
// Do not manually edit this file.
-import {{ OpcodeInfoTable, MintOpArgType }} from "./jiterpreter-opcodes";
+import {{ OpcodeInfoTable, MintOpArgType, SimdInfoTable }} from "./jiterpreter-opcodes";
export const enum MintOpcode {{
{enum_values}
export const OpcodeInfo : OpcodeInfoTable = {{
{metadata_table}
}};
+
+export const enum SimdIntrinsic2 {{
+ {splitter.join(simd_values_1)}
+}}
+
+export const enum SimdIntrinsic3 {{
+ {splitter.join(simd_values_2)}
+}}
+
+export const enum SimdIntrinsic4 {{
+ {splitter.join(simd_values_3)}
+}}
+
+export const SimdInfo : SimdInfoTable = {{
+ 2: [
+ {splitter2.join(repr(x) for x in simd_values_1)}
+ ],
+ 3: [
+ {splitter2.join(repr(x) for x in simd_values_2)}
+ ],
+ 4: [
+ {splitter2.join(repr(x) for x in simd_values_3)}
+ ],
+}};
"""
os.makedirs(os.path.dirname(output_ts_path), exist_ok=True)
// Emit function imports
for (let i = 0; i < trampImports.length; i++) {
mono_assert(trampImports[i], () => `trace #${i} missing`);
- builder.defineImportedFunction("i", trampImports[i][0], trampImports[i][1], true, false, trampImports[i][2]);
+ builder.defineImportedFunction("i", trampImports[i][0], trampImports[i][1], true, trampImports[i][2]);
}
builder._generateImportSection();
console.log(`jit queue generated ${buffer.length} byte(s) of wasm`);
counters.bytesGenerated += buffer.length;
const traceModule = new WebAssembly.Module(buffer);
+ const wasmImports = builder.getWasmImports();
- const traceInstance = new WebAssembly.Instance(traceModule, {
- i: builder.getImportedFunctionTable(),
- c: <any>builder.getConstants(),
- m: { h: (<any>Module).asm.memory },
- });
+ const traceInstance = new WebAssembly.Instance(traceModule, wasmImports);
// Now that we've jitted the trampolines, go through and fix up the function pointers
// to point to the new jitted trampolines instead of the default implementations
import {
WasmValtype, WasmBuilder, addWasmFunctionPointer as addWasmFunctionPointer,
_now, elapsedTimes, counters, getWasmFunctionTable, applyOptions,
- recordFailure, getOptions
+ recordFailure, getOptions, bytesFromHex
} from "./jiterpreter-support";
import cwraps from "./cwraps";
}
// this is cached replacements for Module.getWasmTableEntry();
-// we could add <EmccExportedLibraryFunction Include="$getWasmTableEntry" /> and <EmccExportedRuntimeMethod Include="getWasmTableEntry" />
+// we could add <EmccExportedLibraryFunction Include="$getWasmTableEntry" /> and <EmccExportedRuntimeMethod Include="getWasmTableEntry" />
// if we need to export the original
function getWasmTableEntry(index: number) {
let result = fnCache[index];
// Probe whether the current environment can handle wasm exceptions
try {
// Load and compile the wasm version of do_jit_call_indirect. This serves as a way to probe for wasm EH
- const bytes = new Uint8Array(doJitCall16.length / 2);
- for (let i = 0; i < doJitCall16.length; i += 2)
- bytes[i / 2] = parseInt(doJitCall16.substring(i, i + 2), 16);
+ const bytes = bytesFromHex(doJitCall16);
counters.bytesGenerated += bytes.length;
doJitCallModule = new WebAssembly.Module(bytes);
// Emit function imports
for (let i = 0; i < trampImports.length; i++)
- builder.defineImportedFunction("i", trampImports[i][0], trampImports[i][1], true, false, trampImports[i][2]);
+ builder.defineImportedFunction("i", trampImports[i][0], trampImports[i][1], true, trampImports[i][2]);
builder._generateImportSection();
// Function section
console.log(`do_jit_call queue flush generated ${buffer.length} byte(s) of wasm`);
counters.bytesGenerated += buffer.length;
const traceModule = new WebAssembly.Module(buffer);
+ const wasmImports = builder.getWasmImports();
- const traceInstance = new WebAssembly.Instance(traceModule, {
- i: builder.getImportedFunctionTable(),
- c: <any>builder.getConstants(),
- m: { h: (<any>Module).asm.memory }
- });
+ const traceInstance = new WebAssembly.Instance(traceModule, wasmImports);
for (let i = 0; i < jitQueue.length; i++) {
const info = jitQueue[i];
[key: number]: [name: string, length_u16: number, dregs: number, sregs: number, optype: MintOpArgType];
}
+export type SimdInfoSubtable = Array<string>
+
+export type SimdInfoTable = {
+ [argument_count: number] : SimdInfoSubtable
+}
+
// Keep this in sync with the wasm spec (but I don't think any changes will impact it),
// Note that prefix opcodes aren't in this enum, since making them write properly is awkward.
i64_extend_32_s,
PREFIX_sat = 0xfc,
+ PREFIX_simd = 0xfd,
PREFIX_atomic = 0xfe
}
+
+export const enum WasmSimdOpcode {
+ v128_load = 0x00,
+ v128_load8x8_s = 0x01,
+ v128_load8x8_u = 0x02,
+ v128_load16x4_s = 0x03,
+ v128_load16x4_u = 0x04,
+ v128_load32x2_s = 0x05,
+ v128_load32x2_u = 0x06,
+ v128_load8_splat = 0x07,
+ v128_load16_splat = 0x08,
+ v128_load32_splat = 0x09,
+ v128_load64_splat = 0x0a,
+ v128_store = 0x0b,
+ v128_const = 0x0c,
+ i8x16_shuffle = 0x0d,
+ i8x16_swizzle = 0x0e,
+ i8x16_splat = 0x0f,
+ i16x8_splat = 0x10,
+ i32x4_splat = 0x11,
+ i64x2_splat = 0x12,
+ f32x4_splat = 0x13,
+ f64x2_splat = 0x14,
+ i8x16_extract_lane_s = 0x15,
+ i8x16_extract_lane_u = 0x16,
+ i8x16_replace_lane = 0x17,
+ i16x8_extract_lane_s = 0x18,
+ i16x8_extract_lane_u = 0x19,
+ i16x8_replace_lane = 0x1a,
+ i32x4_extract_lane = 0x1b,
+ i32x4_replace_lane = 0x1c,
+ i64x2_extract_lane = 0x1d,
+ i64x2_replace_lane = 0x1e,
+ f32x4_extract_lane = 0x1f,
+ f32x4_replace_lane = 0x20,
+ f64x2_extract_lane = 0x21,
+ f64x2_replace_lane = 0x22,
+ i8x16_eq = 0x23,
+ i8x16_ne = 0x24,
+ i8x16_lt_s = 0x25,
+ i8x16_lt_u = 0x26,
+ i8x16_gt_s = 0x27,
+ i8x16_gt_u = 0x28,
+ i8x16_le_s = 0x29,
+ i8x16_le_u = 0x2a,
+ i8x16_ge_s = 0x2b,
+ i8x16_ge_u = 0x2c,
+ i16x8_eq = 0x2d,
+ i16x8_ne = 0x2e,
+ i16x8_lt_s = 0x2f,
+ i16x8_lt_u = 0x30,
+ i16x8_gt_s = 0x31,
+ i16x8_gt_u = 0x32,
+ i16x8_le_s = 0x33,
+ i16x8_le_u = 0x34,
+ i16x8_ge_s = 0x35,
+ i16x8_ge_u = 0x36,
+ i32x4_eq = 0x37,
+ i32x4_ne = 0x38,
+ i32x4_lt_s = 0x39,
+ i32x4_lt_u = 0x3a,
+ i32x4_gt_s = 0x3b,
+ i32x4_gt_u = 0x3c,
+ i32x4_le_s = 0x3d,
+ i32x4_le_u = 0x3e,
+ i32x4_ge_s = 0x3f,
+ i32x4_ge_u = 0x40,
+ f32x4_eq = 0x41,
+ f32x4_ne = 0x42,
+ f32x4_lt = 0x43,
+ f32x4_gt = 0x44,
+ f32x4_le = 0x45,
+ f32x4_ge = 0x46,
+ f64x2_eq = 0x47,
+ f64x2_ne = 0x48,
+ f64x2_lt = 0x49,
+ f64x2_gt = 0x4a,
+ f64x2_le = 0x4b,
+ f64x2_ge = 0x4c,
+ v128_not = 0x4d,
+ v128_and = 0x4e,
+ v128_andnot = 0x4f,
+ v128_or = 0x50,
+ v128_xor = 0x51,
+ v128_bitselect = 0x52,
+ i8x16_abs = 0x60,
+ i8x16_neg = 0x61,
+ i8x16_all_true = 0x63,
+ i8x16_bitmask = 0x64,
+ i8x16_narrow_i16x8_s = 0x65,
+ i8x16_narrow_i16x8_u = 0x66,
+ i8x16_shl = 0x6b,
+ i8x16_shr_s = 0x6c,
+ i8x16_shr_u = 0x6d,
+ i8x16_add = 0x6e,
+ i8x16_add_sat_s = 0x6f,
+ i8x16_add_sat_u = 0x70,
+ i8x16_sub = 0x71,
+ i8x16_sub_sat_s = 0x72,
+ i8x16_sub_sat_u = 0x73,
+ i8x16_min_s = 0x76,
+ i8x16_min_u = 0x77,
+ i8x16_max_s = 0x78,
+ i8x16_max_u = 0x79,
+ i8x16_avgr_u = 0x7b,
+ i16x8_abs = 0x80,
+ i16x8_neg = 0x81,
+ i16x8_all_true = 0x83,
+ i16x8_bitmask = 0x84,
+ i16x8_narrow_i32x4_s = 0x85,
+ i16x8_narrow_i32x4_u = 0x86,
+ i16x8_extend_low_i8x16_s = 0x87,
+ i16x8_extend_high_i8x16_s = 0x88,
+ i16x8_extend_low_i8x16_u = 0x89,
+ i16x8_extend_high_i8x16_u = 0x8a,
+ i16x8_shl = 0x8b,
+ i16x8_shr_s = 0x8c,
+ i16x8_shr_u = 0x8d,
+ i16x8_add = 0x8e,
+ i16x8_add_sat_s = 0x8f,
+ i16x8_add_sat_u = 0x90,
+ i16x8_sub = 0x91,
+ i16x8_sub_sat_s = 0x92,
+ i16x8_sub_sat_u = 0x93,
+ i16x8_mul = 0x95,
+ i16x8_min_s = 0x96,
+ i16x8_min_u = 0x97,
+ i16x8_max_s = 0x98,
+ i16x8_max_u = 0x99,
+ i16x8_avgr_u = 0x9b,
+ i32x4_abs = 0xa0,
+ i32x4_neg = 0xa1,
+ i32x4_all_true = 0xa3,
+ i32x4_bitmask = 0xa4,
+ i32x4_extend_low_i16x8_s = 0xa7,
+ i32x4_extend_high_i16x8_s = 0xa8,
+ i32x4_extend_low_i16x8_u = 0xa9,
+ i32x4_extend_high_i16x8_u = 0xaa,
+ i32x4_shl = 0xab,
+ i32x4_shr_s = 0xac,
+ i32x4_shr_u = 0xad,
+ i32x4_add = 0xae,
+ i32x4_sub = 0xb1,
+ i32x4_mul = 0xb5,
+ i32x4_min_s = 0xb6,
+ i32x4_min_u = 0xb7,
+ i32x4_max_s = 0xb8,
+ i32x4_max_u = 0xb9,
+ i32x4_dot_i16x8_s = 0xba,
+ i64x2_abs = 0xc0,
+ i64x2_neg = 0xc1,
+ i64x2_bitmask = 0xc4,
+ i64x2_extend_low_i32x4_s = 0xc7,
+ i64x2_extend_high_i32x4_s = 0xc8,
+ i64x2_extend_low_i32x4_u = 0xc9,
+ i64x2_extend_high_i32x4_u = 0xca,
+ i64x2_shl = 0xcb,
+ i64x2_shr_s = 0xcc,
+ i64x2_shr_u = 0xcd,
+ i64x2_add = 0xce,
+ i64x2_sub = 0xd1,
+ i64x2_mul = 0xd5,
+ f32x4_ceil = 0x67,
+ f32x4_floor = 0x68,
+ f32x4_trunc = 0x69,
+ f32x4_nearest = 0x6a,
+ f64x2_ceil = 0x74,
+ f64x2_floor = 0x75,
+ f64x2_trunc = 0x7a,
+ f64x2_nearest = 0x94,
+ f32x4_abs = 0xe0,
+ f32x4_neg = 0xe1,
+ f32x4_sqrt = 0xe3,
+ f32x4_add = 0xe4,
+ f32x4_sub = 0xe5,
+ f32x4_mul = 0xe6,
+ f32x4_div = 0xe7,
+ f32x4_min = 0xe8,
+ f32x4_max = 0xe9,
+ f32x4_pmin = 0xea,
+ f32x4_pmax = 0xeb,
+ f64x2_abs = 0xec,
+ f64x2_neg = 0xed,
+ f64x2_sqrt = 0xef,
+ f64x2_add = 0xf0,
+ f64x2_sub = 0xf1,
+ f64x2_mul = 0xf2,
+ f64x2_div = 0xf3,
+ f64x2_min = 0xf4,
+ f64x2_max = 0xf5,
+ f64x2_pmin = 0xf6,
+ f64x2_pmax = 0xf7,
+ i32x4_trunc_sat_f32x4_s = 0xf8,
+ i32x4_trunc_sat_f32x4_u = 0xf9,
+ f32x4_convert_i32x4_s = 0xfa,
+ f32x4_convert_i32x4_u = 0xfb,
+ v128_load32_zero = 0x5c,
+ v128_load64_zero = 0x5d,
+ i16x8_extmul_low_i8x16_s = 0x9c,
+ i16x8_extmul_high_i8x16_s = 0x9d,
+ i16x8_extmul_low_i8x16_u = 0x9e,
+ i16x8_extmul_high_i8x16_u = 0x9f,
+ i32x4_extmul_low_i16x8_s = 0xbc,
+ i32x4_extmul_high_i16x8_s = 0xbd,
+ i32x4_extmul_low_i16x8_u = 0xbe,
+ i32x4_extmul_high_i16x8_u = 0xbf,
+ i64x2_extmul_low_i32x4_s = 0xdc,
+ i64x2_extmul_high_i32x4_s = 0xdd,
+ i64x2_extmul_low_i32x4_u = 0xde,
+ i64x2_extmul_high_i32x4_u = 0xdf,
+ i16x8_q15mulr_sat_s = 0x82,
+ v128_any_true = 0x53,
+ v128_load8_lane = 0x54,
+ v128_load16_lane = 0x55,
+ v128_load32_lane = 0x56,
+ v128_load64_lane = 0x57,
+ v128_store8_lane = 0x58,
+ v128_store16_lane = 0x59,
+ v128_store32_lane = 0x5a,
+ v128_store64_lane = 0x5b,
+ i64x2_eq = 0xd6,
+ i64x2_ne = 0xd7,
+ i64x2_lt_s = 0xd8,
+ i64x2_gt_s = 0xd9,
+ i64x2_le_s = 0xda,
+ i64x2_ge_s = 0xdb,
+ i64x2_all_true = 0xc3,
+ f64x2_convert_low_i32x4_s = 0xfe,
+ f64x2_convert_low_i32x4_u = 0xff,
+ i32x4_trunc_sat_f64x2_s_zero = 0xfc,
+ i32x4_trunc_sat_f64x2_u_zero = 0xfd,
+ f32x4_demote_f64x2_zero = 0x5e,
+ f64x2_promote_low_f32x4 = 0x5f,
+ i8x16_popcnt = 0x62,
+ i16x8_extadd_pairwise_i8x16_s = 0x7c,
+ i16x8_extadd_pairwise_i8x16_u = 0x7d,
+ i32x4_extadd_pairwise_i16x8_s = 0x7e,
+ i32x4_extadd_pairwise_i16x8_u = 0x7f,
+}
import { mono_assert } from "./types";
import { NativePointer, ManagedPointer, VoidPtr } from "./types/emscripten";
import { Module, runtimeHelpers } from "./globals";
-import { WasmOpcode } from "./jiterpreter-opcodes";
+import { WasmOpcode, WasmSimdOpcode } from "./jiterpreter-opcodes";
import { MintOpcode } from "./mintops";
import cwraps from "./cwraps";
typeIndex: number;
module: string;
name: string;
- assumeUsed: boolean;
func: Function;
}
nextConstantSlot = 0;
compressImportNames = false;
+ lockImports = false;
constructor(constantSlotCount: number) {
this.stack = [new BlobBuilder()];
this.stackSize = 1;
this.inSection = false;
this.inFunction = false;
+ this.lockImports = false;
this.locals.clear();
this.functionTypeCount = this.permanentFunctionTypeCount;
this.functionTypesByIndex = Object.create(this.permanentFunctionTypesByIndex);
this.nextImportIndex = 0;
- this.importedFunctionCount = this.permanentImportedFunctionCount;
+ this.importedFunctionCount = 0;
this.importedFunctions = Object.create(this.permanentImportedFunctions);
for (const k in this.importedFunctions) {
const f = this.importedFunctions[k];
- if (!f.assumeUsed)
- f.index = undefined;
+ f.index = undefined;
}
this.functions.length = 0;
return current.getArrayView(false).slice(0, current.size);
}
+ getWasmImports () : WebAssembly.Imports {
+ const result : any = {
+ c: <any>this.getConstants(),
+ m: { h: (<any>Module).asm.memory },
+ f: { f: getWasmFunctionTable() },
+ };
+
+ const importsToEmit = this.getImportsToEmit();
+
+ for (let i = 0; i < importsToEmit.length; i++) {
+ const ifi = importsToEmit[i];
+ if (typeof (ifi.func) !== "function")
+ throw new Error(`Import '${ifi.name}' not found or not a function`);
+
+ const mangledName = this.getCompressedName(ifi);
+ let subTable = result[ifi.module];
+ if (!subTable) {
+ subTable = result[ifi.module] = {};
+ }
+ subTable[mangledName] = ifi.func;
+ }
+
+ return result;
+ }
+
// HACK: Approximate amount of space we need to generate the full module at present
// FIXME: This does not take into account any other functions already generated if they weren't
// emitted into the module immediately
- get bytesGeneratedSoFar() {
+ get bytesGeneratedSoFar () {
+ const importSize = this.compressImportNames
+ // mod (2 bytes) name (2-3 bytes) type (1 byte) typeidx (1-2 bytes)
+ ? 8
+ // we keep the uncompressed import names somewhat short, generally, so +12 bytes is about right
+ : 20;
+
return this.stack[0].size +
// HACK: A random constant for section headers and padding
32 +
- // mod (2 bytes) name (2-3 bytes) type (1 byte) typeidx (1-2 bytes)
- (this.importedFunctionCount * 8) +
+ (this.importedFunctionCount * importSize) +
// type index for each function
(this.functions.length * 2) +
// export entry for each export
return this.current.appendU8(value);
}
- appendU32(value: number) {
+ appendSimd (value: WasmSimdOpcode) {
+ this.current.appendU8(WasmOpcode.PREFIX_simd);
+ // Yes that's right. We're using LEB128 to encode 8-bit opcodes. Why? I don't know
+ return this.current.appendULeb(value);
+ }
+
+ appendU32 (value: number) {
return this.current.appendU32(value);
}
return imports;
}
- getCompressedName(ifi: ImportedFunctionInfo) {
- if (!this.compressImportNames || typeof (ifi.index) !== "number")
+ getCompressedName (ifi: ImportedFunctionInfo) {
+ if (!this.compressImportNames || typeof(ifi.index) !== "number")
return ifi.name;
let result = compressedNameCache[ifi.index!];
return result;
}
- _generateImportSection() {
- const importsToEmit = [];
+ getImportsToEmit () {
+ const result = [];
for (const k in this.importedFunctions) {
- const f = this.importedFunctions[k];
- if (f.index !== undefined)
- importsToEmit.push(f);
+ const v = this.importedFunctions[k];
+ if (typeof (v.index) !== "number")
+ continue;
+ result.push(v);
}
- importsToEmit.sort((lhs, rhs) => lhs.index! - rhs.index!);
+ result.sort((lhs, rhs) => lhs.index! - rhs.index!);
+ // console.log("result=[" + result.map(f => `#${f.index} ${f.module}.${f.name}`) + "]");
+ return result;
+ }
+
+ _generateImportSection () {
+ const importsToEmit = this.getImportsToEmit();
+ this.lockImports = true;
// Import section
this.beginSection(2);
- this.appendULeb(1 + importsToEmit.length + this.constantSlots.length);
+ this.appendULeb(2 + importsToEmit.length + this.constantSlots.length);
- // console.log(`referenced ${importsToEmit.length}/${allImports.length} import(s)`);
+ // console.log(`referenced ${importsToEmit.length} import(s)`);
for (let i = 0; i < importsToEmit.length; i++) {
const ifi = importsToEmit[i];
- // console.log(` #${ifi.index} ${ifi.module}.${ifi.name} = ${ifi.friendlyName}`);
+ // console.log(` #${ifi.index} ${ifi.module}.${ifi.name} = ${ifi.func}`);
this.appendName(ifi.module);
this.appendName(this.getCompressedName(ifi));
this.appendU8(0x0); // function
this.appendU8(0x00);
// Minimum size is in 64k pages, not bytes
this.appendULeb(0x01);
+
+ this.appendName("f");
+ this.appendName("f");
+ // tabletype
+ this.appendU8(0x01);
+ // funcref
+ this.appendU8(0x70);
+ // limits = { min=0x01, max=infinity }
+ this.appendU8(0x00);
+ this.appendULeb(0x01);
}
defineImportedFunction(
module: string, name: string, functionTypeName: string,
- assumeUsed: boolean, permanent: boolean, func: Function | number
- ): ImportedFunctionInfo {
- if (permanent && (this.importedFunctionCount > this.permanentImportedFunctionCount))
- throw new Error("New permanent imports cannot be defined after non-permanent ones");
+ permanent: boolean, func: Function | number
+ ) : ImportedFunctionInfo {
+ if (this.lockImports)
+ throw new Error("Import section already generated");
+ if (permanent && (this.importedFunctionCount > 0))
+ throw new Error("New permanent imports cannot be defined after any indexes have been assigned");
const type = this.functionTypes[functionTypeName];
if (!type)
throw new Error("No function type named " + functionTypeName);
throw new Error("A permanent import must have a permanent function type");
const typeIndex = type[0];
const table = permanent ? this.permanentImportedFunctions : this.importedFunctions;
- const index = assumeUsed
- ? (
- permanent
- ? this.permanentImportedFunctionCount++
- : this.importedFunctionCount++
- )
- : undefined;
if (typeof (func) === "number")
func = getWasmFunctionTable().get(func);
if (typeof (func) !== "function")
throw new Error(`Value passed for imported function ${name} was not a function or valid function pointer`);
const result = table[name] = {
- index,
+ index: undefined,
typeIndex,
module,
name,
- assumeUsed,
func
};
return result;
this.endSection();
}
- callImport(name: string) {
+ call_indirect (functionTypeName: string, tableIndex: number) {
+ const type = this.functionTypes[functionTypeName];
+ if (!type)
+ throw new Error("No function type named " + functionTypeName);
+ const typeIndex = type[0];
+ this.appendU8(WasmOpcode.call_indirect);
+ this.appendULeb(typeIndex);
+ this.appendULeb(tableIndex);
+ }
+
+ callImport (name: string) {
const func = this.importedFunctions[name];
if (!func)
throw new Error("No imported function named " + name);
- if (func.index === undefined)
+ if (typeof (func.index) !== "number")
func.index = this.importedFunctionCount++;
this.appendU8(WasmOpcode.call);
this.appendULeb(func.index);
compilation: 0
};
+export const simdFallbackCounters : { [name: string] : number } = {
+};
+
export const counters = {
traceCandidates: 0,
tracesCompiled: 0,
nullChecksEliminated: 0,
backBranchesEmitted: 0,
backBranchesNotEmitted: 0,
+ simdFallback: simdFallbackCounters,
};
export const _now = (globalThis.performance && globalThis.performance.now)
return [name, name, fn];
}
+export function bytesFromHex (hex: string) : Uint8Array {
+ const bytes = new Uint8Array(hex.length / 2);
+ for (let i = 0; i < hex.length; i += 2)
+ bytes[i / 2] = parseInt(hex.substring(i, i + 2), 16);
+ return bytes;
+}
+
export type JiterpreterOptions = {
enableAll?: boolean;
enableTraces: boolean;
enableBackwardBranches: boolean;
enableCallResume: boolean;
enableWasmEh: boolean;
+ enableSimd: boolean;
// For locations where the jiterpreter heuristic says we will be unable to generate
// a trace, insert an entry point opcode anyway. This enables collecting accurate
// stats for options like estimateHeat, but raises overhead.
"enableBackwardBranches": "jiterpreter-backward-branch-entries-enabled",
"enableCallResume": "jiterpreter-call-resume-enabled",
"enableWasmEh": "jiterpreter-wasm-eh-enabled",
+ "enableSimd": "jiterpreter-simd-enabled",
"enableStats": "jiterpreter-stats-enabled",
"disableHeuristic": "jiterpreter-disable-heuristic",
"estimateHeat": "jiterpreter-estimate-heat",
// The .NET Foundation licenses this file to you under the MIT license.
import { mono_assert, MonoMethod } from "./types";
+import { Module } from "./globals";
import { NativePointer } from "./types/emscripten";
import {
getU16, getI16,
getU32_unaligned, getI32_unaligned, getF32_unaligned, getF64_unaligned,
} from "./memory";
-import { WasmOpcode } from "./jiterpreter-opcodes";
-import { MintOpcode, OpcodeInfo } from "./mintops";
+import { WasmOpcode, WasmSimdOpcode } from "./jiterpreter-opcodes";
+import {
+ MintOpcode, OpcodeInfo, SimdInfo,
+ SimdIntrinsic2, SimdIntrinsic3, SimdIntrinsic4
+} from "./mintops";
import cwraps from "./cwraps";
import {
MintOpcodePtr, WasmValtype, WasmBuilder,
append_memset_dest, append_bailout, append_exit,
append_memmove_dest_src, try_append_memset_fast,
- try_append_memmove_fast, counters,
+ try_append_memmove_fast, counters, bytesFromHex,
getMemberOffset, JiterpMember, BailoutReason,
getOpcodeTableValue
} from "./jiterpreter-support";
import {
- sizeOfDataItem,
+ sizeOfDataItem, sizeOfV128, sizeOfStackval,
disabledOpcodes, countCallTargets,
callTargetCounts, trapTraceErrors,
) : number {
const abort = <MintOpcodePtr><any>0;
let isFirstInstruction = true, isConditionallyExecuted = false,
- firstOpcodeInBlock = true;
+ firstOpcodeInBlock = true, containsSimd = false;
let result = 0,
prologueOpcodeCounter = 0,
conditionalOpcodeCounter = 0;
let opcode = getU16(ip);
const info = OpcodeInfo[opcode];
+ const isSimdIntrins = (opcode >= MintOpcode.MINT_SIMD_INTRINS_P_P) &&
+ (opcode <= MintOpcode.MINT_SIMD_INTRINS_P_PPP);
+ const simdIntrinsArgCount = isSimdIntrins
+ ? opcode - MintOpcode.MINT_SIMD_INTRINS_P_P + 2
+ : 0;
+ const simdIntrinsIndex = isSimdIntrins
+ ? getArgU16(ip, 1 + simdIntrinsArgCount)
+ : 0;
+
mono_assert(info, () => `invalid opcode ${opcode}`);
- const opname = info[0];
+ const opname = isSimdIntrins
+ ? SimdInfo[simdIntrinsArgCount][simdIntrinsIndex]
+ : info[0];
const _ip = ip;
const isBackBranchTarget = builder.options.noExitBackwardBranches &&
is_backward_branch_target(ip, startOfBody, backwardBranchTable),
append_exit(builder, ip, exitOpcodeCounter, BailoutReason.ComplexBranch);
} else
ip = abort;
+ } else if (
+ (opcode >= MintOpcode.MINT_SIMD_V128_LDC) &&
+ (opcode <= MintOpcode.MINT_SIMD_INTRINS_P_PPP)
+ ) {
+ if (!emit_simd(builder, ip, opcode, opname, simdIntrinsArgCount, simdIntrinsIndex))
+ ip = abort;
+ else
+ containsSimd = true;
} else if (opcodeValue === 0) {
// This means it was explicitly marked as no-value in the opcode value table
// so we can just skip over it. This is done for things like nops.
// console.log(`estimated size: ${builder.size + builder.cfg.overheadBytes + builder.bytesGeneratedSoFar}`);
+ // HACK: Traces containing simd will be *much* shorter than non-simd traces,
+ // which will cause both the heuristic and our length requirement outside
+ // to reject them. For now, just add a big constant to the length
+ if (containsSimd)
+ result += 10240;
return result;
}
builder.cfg.startBranchBlock(ip, isBackBranchTarget);
}
-function append_ldloc (builder: WasmBuilder, offset: number, opcode: WasmOpcode) {
+function append_ldloc (builder: WasmBuilder, offset: number, opcodeOrPrefix: WasmOpcode, simdOpcode?: WasmSimdOpcode) {
builder.local("pLocals");
- builder.appendU8(opcode);
+ builder.appendU8(opcodeOrPrefix);
+ if (simdOpcode !== undefined) {
+ // This looks wrong but I assure you it's correct.
+ builder.appendULeb(simdOpcode);
+ }
// stackval is 8 bytes, but pLocals might not be 8 byte aligned so we use 4
// wasm spec prohibits alignment higher than natural alignment, just to be annoying
- const alignment = (opcode > WasmOpcode.f64_load) ? 0 : 2;
+ const alignment = (simdOpcode !== undefined) || (opcodeOrPrefix > WasmOpcode.f64_load) ? 0 : 2;
builder.appendMemarg(offset, alignment);
}
// where the offset+alignment pair is referred to as a 'memarg' by the spec.
// The actual store operation is equivalent to `pBase[offset] = value` (alignment has no
// observable impact on behavior, other than causing compilation failures if out of range)
-function append_stloc_tail (builder: WasmBuilder, offset: number, opcode: WasmOpcode) {
- builder.appendU8(opcode);
+function append_stloc_tail (builder: WasmBuilder, offset: number, opcodeOrPrefix: WasmOpcode, simdOpcode?: WasmSimdOpcode) {
+ builder.appendU8(opcodeOrPrefix);
+ if (simdOpcode !== undefined) {
+ // This looks wrong but I assure you it's correct.
+ builder.appendULeb(simdOpcode);
+ }
// stackval is 8 bytes, but pLocals might not be 8 byte aligned so we use 4
// wasm spec prohibits alignment higher than natural alignment, just to be annoying
- const alignment = (opcode > WasmOpcode.f64_store) ? 0 : 2;
+ const alignment = (simdOpcode !== undefined) || (opcodeOrPrefix > WasmOpcode.f64_store) ? 0 : 2;
builder.appendMemarg(offset, alignment);
invalidate_local(offset);
}
case MintOpcode.MINT_STFLD_O: {
/*
* Writing a ref-type field has to call an import to perform the write barrier anyway,
- * and technically it should use a different kind of barrier from copy_pointer. So
+ * and technically it should use a different kind of barrier from copy_ptr. So
* we define a special import that is responsible for performing the whole stfld_o
* operation with as little trace-side overhead as possible
* Previously the pseudocode looked like:
* cknull_ptr = *(MonoObject *)&locals[objectOffset];
* if (!cknull_ptr) bailout;
- * copy_pointer(cknull_ptr + fieldOffset, *(MonoObject *)&locals[localOffset])
+ * copy_ptr(cknull_ptr + fieldOffset, *(MonoObject *)&locals[localOffset])
* The null check optimization also allows us to safely omit the bailout check
* if we know that the target object isn't null. Even if the target object were
* somehow null in this case (bad! shouldn't be possible!) it won't be a crash
// src
append_ldloca(builder, localOffset, 0);
// FIXME: Use mono_gc_wbarrier_set_field_internal
- builder.callImport("copy_pointer");
+ builder.callImport("copy_ptr");
return true;
case MintOpcode.MINT_LDSFLD_VT: {
const sizeBytes = getArgU16(ip, 4);
[MintOpcode.MINT_POPCNT_I4]: [WasmOpcode.i32_popcnt, WasmOpcode.i32_load, WasmOpcode.i32_store],
[MintOpcode.MINT_CLZ_I8]: [WasmOpcode.i64_clz, WasmOpcode.i64_load, WasmOpcode.i64_store],
[MintOpcode.MINT_CTZ_I8]: [WasmOpcode.i64_ctz, WasmOpcode.i64_load, WasmOpcode.i64_store],
- [MintOpcode.MINT_POPCNT_I8]: [WasmOpcode.i64_popcnt, WasmOpcode.i32_load, WasmOpcode.i32_store],
+ [MintOpcode.MINT_POPCNT_I8]: [WasmOpcode.i64_popcnt, WasmOpcode.i64_load, WasmOpcode.i64_store],
};
// HACK: Generating correct wasm for these is non-trivial so we hand them off to C.
builder.local("cknull_ptr");
// Load address of value so that copy_managed_pointer can grab it
append_ldloca(builder, valueVarIndex, 0);
- builder.callImport("copy_pointer");
+ builder.callImport("copy_ptr");
} else {
// Pre-load address for the store operation
builder.local("cknull_ptr");
return true;
}
+const vec128Test =
+ "0061736d0100000001040160000003020100070801047465737400000a090107004100fd111a0b";
+let wasmSimdSupported : boolean | undefined;
+
+function getIsWasmSimdSupported () : boolean {
+ if (wasmSimdSupported !== undefined)
+ return wasmSimdSupported;
+
+ // Probe whether the current environment can handle wasm v128 opcodes.
+ try {
+ // Load and compile a test module that uses i32x4.splat. See wasm-simd-feature-detect.wat/wasm
+ const bytes = bytesFromHex(vec128Test);
+ counters.bytesGenerated += bytes.length;
+ new WebAssembly.Module(bytes);
+ wasmSimdSupported = true;
+ } catch (exc) {
+ console.log("MONO_WASM: Disabling WASM SIMD support due to JIT failure", exc);
+ wasmSimdSupported = false;
+ }
+
+ return wasmSimdSupported;
+}
+
+function get_import_name (
+ builder: WasmBuilder, typeName: string,
+ functionPtr: number
+) : string {
+ const name = `${typeName}_${functionPtr.toString(16)}`;
+ if (typeof (builder.importedFunctions[name]) !== "object")
+ builder.defineImportedFunction("s", name, typeName, false, functionPtr);
+
+ return name;
+}
+
+const simdCreateSizes = {
+ [MintOpcode.MINT_SIMD_V128_I1_CREATE]: 1,
+ [MintOpcode.MINT_SIMD_V128_I2_CREATE]: 2,
+ [MintOpcode.MINT_SIMD_V128_I4_CREATE]: 4,
+ [MintOpcode.MINT_SIMD_V128_I8_CREATE]: 8,
+};
+
+const simdCreateLoadOps = {
+ [MintOpcode.MINT_SIMD_V128_I1_CREATE]: WasmOpcode.i32_load8_s,
+ [MintOpcode.MINT_SIMD_V128_I2_CREATE]: WasmOpcode.i32_load16_s,
+ [MintOpcode.MINT_SIMD_V128_I4_CREATE]: WasmOpcode.i32_load,
+ [MintOpcode.MINT_SIMD_V128_I8_CREATE]: WasmOpcode.i64_load,
+};
+
+const simdCreateStoreOps = {
+ [MintOpcode.MINT_SIMD_V128_I1_CREATE]: WasmOpcode.i32_store8,
+ [MintOpcode.MINT_SIMD_V128_I2_CREATE]: WasmOpcode.i32_store16,
+ [MintOpcode.MINT_SIMD_V128_I4_CREATE]: WasmOpcode.i32_store,
+ [MintOpcode.MINT_SIMD_V128_I8_CREATE]: WasmOpcode.i64_store,
+};
+
+function emit_simd (
+ builder: WasmBuilder, ip: MintOpcodePtr,
+ opcode: MintOpcode, opname: string,
+ argCount: number, index: number
+) : boolean {
+ // First, if compiling an intrinsic attempt to emit the special vectorized implementation
+ // We only do this if SIMD is enabled since we'll be using the v128 opcodes.
+ if (builder.options.enableSimd && getIsWasmSimdSupported()) {
+ switch (argCount) {
+ case 2:
+ if (emit_simd_2(builder, ip, <SimdIntrinsic2>index))
+ return true;
+ break;
+ case 3:
+ if (emit_simd_3(builder, ip, <SimdIntrinsic3>index))
+ return true;
+ break;
+ case 4:
+ if (emit_simd_4(builder, ip, <SimdIntrinsic4>index))
+ return true;
+ break;
+ }
+ }
+
+ // Fall back to a mix of non-vectorized wasm and the interpreter's implementation of the opcodes
+ switch (opcode) {
+ case MintOpcode.MINT_SIMD_V128_LDC: {
+ if (builder.options.enableSimd && getIsWasmSimdSupported()) {
+ builder.local("pLocals");
+ builder.appendSimd(WasmSimdOpcode.v128_const);
+ const view = Module.HEAPU8.slice(<any>ip + 4, <any>ip + 4 + sizeOfV128);
+ builder.appendBytes(view);
+ append_simd_store(builder, ip);
+ } else {
+ // dest
+ append_ldloca(builder, getArgU16(ip, 1), sizeOfV128);
+ // src (ip + 2)
+ builder.ptr_const(<any>ip + 4);
+ append_memmove_dest_src(builder, sizeOfV128);
+ }
+ return true;
+ }
+ case MintOpcode.MINT_SIMD_V128_I1_CREATE:
+ case MintOpcode.MINT_SIMD_V128_I2_CREATE:
+ case MintOpcode.MINT_SIMD_V128_I4_CREATE:
+ case MintOpcode.MINT_SIMD_V128_I8_CREATE: {
+ // These opcodes pack a series of locals into a vector
+ const elementSize = simdCreateSizes[opcode],
+ numElements = sizeOfV128 / elementSize,
+ destOffset = getArgU16(ip, 1),
+ srcOffset = getArgU16(ip, 2),
+ loadOp = simdCreateLoadOps[opcode],
+ storeOp = simdCreateStoreOps[opcode];
+ for (let i = 0; i < numElements; i++) {
+ builder.local("pLocals");
+ // load element from stack slot
+ append_ldloc(builder, srcOffset + (i * sizeOfStackval), loadOp);
+ // then store to destination element
+ append_stloc_tail(builder, destOffset + (i * elementSize), storeOp);
+ }
+ return true;
+ }
+ case MintOpcode.MINT_SIMD_INTRINS_P_P: {
+ counters.simdFallback[opname] = (counters.simdFallback[opname] || 0) + 1;
+ // res
+ append_ldloca(builder, getArgU16(ip, 1), sizeOfV128);
+ // src
+ append_ldloca(builder, getArgU16(ip, 2), 0);
+ const importName = get_import_name(builder, "simd_p_p", <any>cwraps.mono_jiterp_get_simd_intrinsic(1, index));
+ builder.callImport(importName);
+ return true;
+ }
+ case MintOpcode.MINT_SIMD_INTRINS_P_PP: {
+ counters.simdFallback[opname] = (counters.simdFallback[opname] || 0) + 1;
+ // res
+ append_ldloca(builder, getArgU16(ip, 1), sizeOfV128);
+ // src
+ append_ldloca(builder, getArgU16(ip, 2), 0);
+ append_ldloca(builder, getArgU16(ip, 3), 0);
+ const importName = get_import_name(builder, "simd_p_pp", <any>cwraps.mono_jiterp_get_simd_intrinsic(2, index));
+ builder.callImport(importName);
+ return true;
+ }
+ case MintOpcode.MINT_SIMD_INTRINS_P_PPP: {
+ counters.simdFallback[opname] = (counters.simdFallback[opname] || 0) + 1;
+ // res
+ append_ldloca(builder, getArgU16(ip, 1), sizeOfV128);
+ // src
+ append_ldloca(builder, getArgU16(ip, 2), 0);
+ append_ldloca(builder, getArgU16(ip, 3), 0);
+ append_ldloca(builder, getArgU16(ip, 4), 0);
+ const importName = get_import_name(builder, "simd_p_ppp", <any>cwraps.mono_jiterp_get_simd_intrinsic(3, index));
+ builder.callImport(importName);
+ return true;
+ }
+ default:
+ console.log(`MONO_WASM: jiterpreter emit_simd failed for ${opname}`);
+ return false;
+ }
+}
+
+function append_simd_store (builder: WasmBuilder, ip: MintOpcodePtr) {
+ append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_store);
+}
+
+function append_simd_2_load (builder: WasmBuilder, ip: MintOpcodePtr, loadOp?: WasmSimdOpcode) {
+ builder.local("pLocals");
+ // This || is harmless since v128_load is 0
+ append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.PREFIX_simd, loadOp || WasmSimdOpcode.v128_load);
+}
+
+function append_simd_3_load (builder: WasmBuilder, ip: MintOpcodePtr) {
+ builder.local("pLocals");
+ append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load);
+ // FIXME: Can rhs be a scalar? We handle shifts separately already
+ append_ldloc(builder, getArgU16(ip, 3), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load);
+}
+
+function append_simd_4_load (builder: WasmBuilder, ip: MintOpcodePtr) {
+ builder.local("pLocals");
+ append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load);
+ append_ldloc(builder, getArgU16(ip, 3), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load);
+ append_ldloc(builder, getArgU16(ip, 4), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load);
+}
+
+const simdShiftTable = new Set<SimdIntrinsic3>([
+ SimdIntrinsic3.V128_I1_LEFT_SHIFT,
+ SimdIntrinsic3.V128_I2_LEFT_SHIFT,
+ SimdIntrinsic3.V128_I4_LEFT_SHIFT,
+ SimdIntrinsic3.V128_I8_LEFT_SHIFT,
+
+ SimdIntrinsic3.V128_I1_RIGHT_SHIFT,
+ SimdIntrinsic3.V128_I2_RIGHT_SHIFT,
+ SimdIntrinsic3.V128_I4_RIGHT_SHIFT,
+
+ SimdIntrinsic3.V128_I1_URIGHT_SHIFT,
+ SimdIntrinsic3.V128_I2_URIGHT_SHIFT,
+ SimdIntrinsic3.V128_I4_URIGHT_SHIFT,
+ SimdIntrinsic3.V128_I8_URIGHT_SHIFT,
+]);
+
+function append_stloc_simd_zero (builder: WasmBuilder, offset: number) {
+ builder.local("pLocals");
+ builder.appendSimd(WasmSimdOpcode.v128_const);
+ builder.appendBytes(new Uint8Array(sizeOfV128));
+ append_stloc_tail(builder, offset, WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_store);
+}
+
+function emit_simd_2 (builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrinsic2) : boolean {
+ const simple = <WasmSimdOpcode>cwraps.mono_jiterp_get_simd_opcode(1, index);
+ if (simple) {
+ append_simd_2_load(builder, ip);
+ builder.appendSimd(simple);
+ append_simd_store(builder, ip);
+ return true;
+ }
+
+ switch (index) {
+ case SimdIntrinsic2.V128_I1_CREATE_SCALAR:
+ // Zero then write scalar component
+ builder.local("pLocals");
+ append_stloc_simd_zero(builder, getArgU16(ip, 1));
+ append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.i32_load8_s);
+ append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i32_store8);
+ return true;
+ case SimdIntrinsic2.V128_I2_CREATE_SCALAR:
+ // Zero then write scalar component
+ builder.local("pLocals");
+ append_stloc_simd_zero(builder, getArgU16(ip, 1));
+ append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.i32_load16_s);
+ append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i32_store16);
+ return true;
+ case SimdIntrinsic2.V128_I4_CREATE_SCALAR:
+ // Zero then write scalar component
+ builder.local("pLocals");
+ append_stloc_simd_zero(builder, getArgU16(ip, 1));
+ append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.i32_load);
+ append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i32_store);
+ return true;
+ case SimdIntrinsic2.V128_I8_CREATE_SCALAR:
+ // Zero then write scalar component
+ builder.local("pLocals");
+ append_stloc_simd_zero(builder, getArgU16(ip, 1));
+ append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.i64_load);
+ append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i64_store);
+ return true;
+
+ case SimdIntrinsic2.V128_I1_CREATE:
+ append_simd_2_load(builder, ip, WasmSimdOpcode.v128_load8_splat);
+ append_simd_store(builder, ip);
+ return true;
+ case SimdIntrinsic2.V128_I2_CREATE:
+ append_simd_2_load(builder, ip, WasmSimdOpcode.v128_load16_splat);
+ append_simd_store(builder, ip);
+ return true;
+ case SimdIntrinsic2.V128_I4_CREATE:
+ append_simd_2_load(builder, ip, WasmSimdOpcode.v128_load32_splat);
+ append_simd_store(builder, ip);
+ return true;
+ case SimdIntrinsic2.V128_I8_CREATE:
+ append_simd_2_load(builder, ip, WasmSimdOpcode.v128_load64_splat);
+ append_simd_store(builder, ip);
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+function emit_simd_3 (builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrinsic3) : boolean {
+ const simple = <WasmSimdOpcode>cwraps.mono_jiterp_get_simd_opcode(2, index);
+ if (simple) {
+ const isShift = simdShiftTable.has(index);
+ if (isShift) {
+ builder.local("pLocals");
+ append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load);
+ append_ldloc(builder, getArgU16(ip, 3), WasmOpcode.i32_load);
+ builder.appendSimd(simple);
+ append_simd_store(builder, ip);
+ } else {
+ append_simd_3_load(builder, ip);
+ builder.appendSimd(simple);
+ append_simd_store(builder, ip);
+ }
+ return true;
+ }
+
+ switch (index) {
+ case SimdIntrinsic3.V128_BITWISE_EQUALITY:
+ case SimdIntrinsic3.V128_BITWISE_INEQUALITY:
+ append_simd_3_load(builder, ip);
+ // FIXME: i64x2_ne and i64x2_any_true?
+ builder.appendSimd(WasmSimdOpcode.i64x2_eq);
+ builder.appendSimd(WasmSimdOpcode.i64x2_all_true);
+ if (index === SimdIntrinsic3.V128_BITWISE_INEQUALITY)
+ builder.appendU8(WasmOpcode.i32_eqz);
+ append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i32_store);
+ return true;
+ default:
+ return false;
+ }
+
+ return false;
+}
+
+function emit_simd_4 (builder: WasmBuilder, ip: MintOpcodePtr, index: SimdIntrinsic4) : boolean {
+ const simple = <WasmSimdOpcode>cwraps.mono_jiterp_get_simd_opcode(3, index);
+ if (simple) {
+ append_simd_4_load(builder, ip);
+ builder.appendSimd(simple);
+ append_simd_store(builder, ip);
+ return true;
+ }
+
+ switch (index) {
+ case SimdIntrinsic4.V128_CONDITIONAL_SELECT:
+ builder.local("pLocals");
+ // Wasm spec: result = iorπ(iandπ(π1, π3), iandπ(π2, inotπ(π3)))
+ // Our opcode: *arg0 = (*arg2 & *arg1) | (*arg3 & ~*arg1)
+ append_ldloc(builder, getArgU16(ip, 3), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load);
+ append_ldloc(builder, getArgU16(ip, 4), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load);
+ append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.PREFIX_simd, WasmSimdOpcode.v128_load);
+ builder.appendSimd(WasmSimdOpcode.v128_bitselect);
+ append_simd_store(builder, ip);
+ return true;
+ default:
+ return false;
+ }
+}
+
function append_safepoint (builder: WasmBuilder, ip: MintOpcodePtr) {
// Check whether a safepoint is required
builder.ptr_const(cwraps.mono_jiterp_get_polling_required_address());
import cwraps from "./cwraps";
import {
MintOpcodePtr, WasmValtype, WasmBuilder, addWasmFunctionPointer,
- _now, elapsedTimes, counters, getRawCwrap, importDef,
+ _now, elapsedTimes,
+ counters, getRawCwrap, importDef,
JiterpreterOptions, getOptions, recordFailure,
JiterpMember, getMemberOffset,
BailoutReasonNames, BailoutReason
export const
sizeOfDataItem = 4,
sizeOfObjectHeader = 8,
+ sizeOfV128 = 16,
+ sizeOfStackval = 8,
// While stats are enabled, dump concise stats every N traces so that it's clear a long-running
// task isn't frozen if it's jitting lots of traces
autoDumpInterval = 500;
traceImports = [
importDef("bailout", recordBailout),
- importDef("copy_pointer", getRawCwrap("mono_wasm_copy_managed_pointer")),
+ importDef("copy_ptr", getRawCwrap("mono_wasm_copy_managed_pointer")),
importDef("entry", getRawCwrap("mono_jiterp_increase_entry_count")),
importDef("value_copy", getRawCwrap("mono_jiterp_value_copy")),
importDef("gettype", getRawCwrap("mono_jiterp_gettype_ref")),
WasmValtype.i32, true
);
builder.defineType(
- "copy_pointer",
- {
+ "copy_ptr", {
"dest": WasmValtype.i32,
"src": WasmValtype.i32
},
},
WasmValtype.i32, true
);
+ builder.defineType(
+ "simd_p_p", {
+ "arg0": WasmValtype.i32,
+ "arg1": WasmValtype.i32,
+ }, WasmValtype.void, true
+ );
+ builder.defineType(
+ "simd_p_pp", {
+ "arg0": WasmValtype.i32,
+ "arg1": WasmValtype.i32,
+ "arg2": WasmValtype.i32,
+ }, WasmValtype.void, true
+ );
+ builder.defineType(
+ "simd_p_ppp", {
+ "arg0": WasmValtype.i32,
+ "arg1": WasmValtype.i32,
+ "arg2": WasmValtype.i32,
+ "arg3": WasmValtype.i32,
+ }, WasmValtype.void, true
+ );
const traceImports = getTraceImports();
// Pre-define function imports as persistent
for (let i = 0; i < traceImports.length; i++) {
mono_assert(traceImports[i], () => `trace #${i} missing`);
- builder.defineImportedFunction("i", traceImports[i][0], traceImports[i][1], false, true, traceImports[i][2]);
+ builder.defineImportedFunction("i", traceImports[i][0], traceImports[i][1], true, traceImports[i][2]);
}
}
if (trace > 0)
console.log(`${(<any>(builder.base)).toString(16)} ${methodFullName || traceName} generated ${buffer.length} byte(s) of wasm`);
counters.bytesGenerated += buffer.length;
+
if (buffer.length >= maxModuleSize) {
console.warn(`MONO_WASM: Jiterpreter generated too much code (${buffer.length} bytes) for trace ${traceName}. Please report this issue.`);
return 0;
}
- const traceModule = new WebAssembly.Module(buffer);
- const traceInstance = new WebAssembly.Instance(traceModule, {
- i: builder.getImportedFunctionTable(),
- c: <any>builder.getConstants(),
- m: { h: (<any>Module).asm.memory },
- });
+ const traceModule = new WebAssembly.Module(buffer);
+ const wasmImports = builder.getWasmImports();
+ const traceInstance = new WebAssembly.Instance(traceModule, wasmImports);
// Get the exported trace function
const fn = traceInstance.exports[traceName];
console.log(builder.traceBuf[i]);
}
- console.log(`// MONO_WASM: ${methodFullName || methodName}:${traceOffset.toString(16)} generated, blob follows //`);
+ console.log(`// MONO_WASM: ${methodFullName || traceName} generated, blob follows //`);
let s = "", j = 0;
try {
// We may have thrown an uncaught exception while inside a block,
console.log(`// ${keys[i]}: ${abortCounts[keys[i]]} abort(s)`);
}
- if ((typeof (globalThis.setTimeout) === "function") && (b !== undefined))
+ for (const k in counters.simdFallback)
+ console.log(`// simd ${k}: ${counters.simdFallback[k]} fallback insn(s)`);
+
+ if ((typeof(globalThis.setTimeout) === "function") && (b !== undefined))
setTimeout(
() => jiterpreter_dump_stats(b),
15000
--- /dev/null
+(module
+ (func $test (export "test")
+ (i32x4.splat (i32.const 0))
+ drop
+ )
+)
<PropertyGroup>
<ICULibDir Condition="'$(MonoWasmThreads)' != 'true'">$([MSBuild]::NormalizeDirectory('$(PkgMicrosoft_NETCore_Runtime_ICU_Transport)', 'runtimes', 'browser-wasm', 'native', 'lib'))</ICULibDir>
<ICULibDir Condition="'$(MonoWasmThreads)' == 'true'">$([MSBuild]::NormalizeDirectory('$(PkgMicrosoft_NETCore_Runtime_ICU_Transport)', 'runtimes', 'browser-wasm-threads', 'native', 'lib'))</ICULibDir>
- <WasmEnableSIMD Condition="'$(WasmEnableSIMD)' == ''">false</WasmEnableSIMD>
+ <WasmEnableSIMD Condition="'$(WasmEnableSIMD)' == ''">true</WasmEnableSIMD>
<WasmEnableLegacyJsInterop Condition="'$(WasmEnableLegacyJsInterop)' == ''">true</WasmEnableLegacyJsInterop>
<FilterSystemTimeZones Condition="'$(FilterSystemTimeZones)' == ''">false</FilterSystemTimeZones>
<EmccCmd>emcc</EmccCmd>
<ItemGroup>
<_EmccLinkFlags Include="-s INITIAL_MEMORY=$(EmccInitialHeapSize)" />
<_EmccLinkFlags Include="-s STACK_SIZE=$(EmccStackSize)" />
- <_EmccCommonFlags Condition="'$(WasmEnableSIMD)' == 'true'" Include="-msimd128" />
+ <_EmccCommonFlags Include="-msimd128" />
<_EmccCommonFlags Condition="'$(MonoWasmThreads)' == 'true'" Include="-s USE_PTHREADS=1" />
<_EmccLinkFlags Condition="'$(MonoWasmThreads)' == 'true'" Include="-Wno-pthreads-mem-growth" />
<_EmccLinkFlags Condition="'$(MonoWasmThreads)' == 'true'" Include="-s PTHREAD_POOL_SIZE=0" />
</Target>
<Target Name="GenerateMintopsTS"
- Inputs="$(MonoProjectRoot)wasm\runtime\genmintops.py;$(MonoProjectRoot)mono\mini\interp\mintops.def"
+ Inputs="$(MonoProjectRoot)wasm\runtime\genmintops.py;$(MonoProjectRoot)mono\mini\interp\mintops.def;$(MonoProjectRoot)mono\mini\interp\interp-simd-intrins.def"
Outputs="$(NativeGeneratedFilesDir)mintops.ts">
- <Exec Command="$(PythonCmd) $(MonoProjectRoot)wasm/runtime/genmintops.py $(MonoProjectRoot)mono/mini/interp/mintops.def $(NativeGeneratedFilesDir)mintops.ts" />
+ <Exec Command="$(PythonCmd) $(MonoProjectRoot)wasm/runtime/genmintops.py $(MonoProjectRoot)mono/mini/interp/mintops.def $(MonoProjectRoot)mono/mini/interp/interp-simd-intrins.def $(NativeGeneratedFilesDir)mintops.ts" />
</Target>
<Target Name="BuildWithRollup"