}
break;
}
+ /* SIMD that is not table-generated */
+ /* TODO: once https://github.com/dotnet/runtime/issues/83252 is done,
+ * move the following two to the codegen table in simd-arm64.h
+ */
+ case OP_ONES_COMPLEMENT:
+ arm_neon_not (code, get_vector_size_macro (ins), dreg, sreg1);
+ break;
+ case OP_NEGATION:
+ if (is_type_float_macro (ins->inst_c1)) {
+ arm_neon_fneg (code, get_vector_size_macro (ins), get_type_size_macro (ins->inst_c1), dreg, sreg1);
+ } else {
+ arm_neon_neg (code, get_vector_size_macro (ins), get_type_size_macro (ins->inst_c1), dreg, sreg1);
+ }
+ break;
+ case OP_XBINOP:
+ switch (ins->inst_c0) {
+ case OP_IMAX:
+ code = emit_smax_i8 (code, get_vector_size_macro (ins), get_type_size_macro (ins->inst_c1), dreg, sreg1, sreg2);
+ break;
+ case OP_IMAX_UN:
+ code = emit_umax_i8 (code, get_vector_size_macro (ins), get_type_size_macro (ins->inst_c1), dreg, sreg1, sreg2);
+ break;
+ case OP_IMIN:
+ code = emit_smin_i8 (code, get_vector_size_macro (ins), get_type_size_macro (ins->inst_c1), dreg, sreg1, sreg2);
+ break;
+ case OP_IMIN_UN:
+ code = emit_umin_i8 (code, get_vector_size_macro (ins), get_type_size_macro (ins->inst_c1), dreg, sreg1, sreg2);
+ break;
+ default:
+ g_assert_not_reached ();
+ }
+ break;
+ case OP_XZERO:
+ arm_neon_eor_16b (code, dreg, dreg, dreg);
+ break;
+ case OP_XONES:
+ arm_neon_eor_16b (code, dreg, dreg, dreg);
+ arm_neon_not_16b (code, dreg, dreg);
+ break;
+ case OP_XEXTRACT:
+ code = emit_xextract (code, VREG_FULL, ins->inst_c0, dreg, sreg1);
+ break;
case OP_STOREX_MEMBASE:
code = emit_strfpq (code, sreg1, dreg, ins->inst_offset);
break;
if (cfg->compile_aot && cfg->code_exec_only) {
mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_X128_GOT, ins->inst_p0);
arm_ldrx_lit (code, ARMREG_IP0, 0);
- arm_ldrfpq (code, ins->dreg, ARMREG_IP0, 0);
+ arm_ldrfpq (code, dreg, ARMREG_IP0, 0);
} else {
mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_X128, ins->inst_p0);
- arm_neon_ldrq_lit (code, ins->dreg, 0);
+ arm_neon_ldrq_lit (code, dreg, 0);
}
break;
}
case OP_EXPAND_I4:
case OP_EXPAND_I8: {
const int t = get_type_size_macro (ins->inst_c1);
- arm_neon_dup_g (code, VREG_FULL, t, ins->dreg, ins->sreg1);
+ arm_neon_dup_g (code, VREG_FULL, t, dreg, sreg1);
break;
}
case OP_EXPAND_R4:
case OP_EXPAND_R8: {
const int t = get_type_size_macro (ins->inst_c1);
- arm_neon_fdup_e (code, VREG_FULL, t, ins->dreg, ins->sreg1, 0);
+ arm_neon_fdup_e (code, VREG_FULL, t, dreg, sreg1, 0);
break;
}
case OP_EXTRACT_I1:
const int t = get_type_size_macro (ins->inst_c1);
// smov is not defined for i64
if (is_type_unsigned_macro (ins->inst_c1) || t == TYPE_I64) {
- arm_neon_umov (code, t, ins->dreg, ins->sreg1, ins->inst_c0);
+ arm_neon_umov (code, t, dreg, sreg1, ins->inst_c0);
} else {
- arm_neon_smov (code, t, ins->dreg, ins->sreg1, ins->inst_c0);
+ arm_neon_smov (code, t, dreg, sreg1, ins->inst_c0);
}
break;
}
// Technically, this broadcasts element #inst_c0 to all dest XREG elements; whereas it should
// set the FREG to the said element. Since FREG and XREG pool is the same on arm64 and the rest
// of the F/XREG is ignored in FREG mode, this operation remains valid.
- arm_neon_fdup_e (code, VREG_FULL, t, ins->dreg, ins->sreg1, ins->inst_c0);
+ arm_neon_fdup_e (code, VREG_FULL, t, dreg, sreg1, ins->inst_c0);
}
break;
+ case OP_INSERT_I1:
+ case OP_INSERT_I2:
+ case OP_INSERT_I4:
+ case OP_INSERT_I8: {
+ const int t = get_type_size_macro (ins->inst_c1);
+ arm_neon_ins_g(code, t, dreg, sreg1, ins->inst_c0);
+ break;
+ }
+ case OP_INSERT_R4:
+ case OP_INSERT_R8: {
+ int t = 0;
+ switch (ins->inst_c1) {
+ case MONO_TYPE_R4:
+ t = SIZE_4;
+ break;
+ case MONO_TYPE_R8:
+ t = SIZE_8;
+ break;
+ }
+ arm_neon_ins_e(code, t, dreg, sreg1, ins->inst_c0, 0);
+ break;
+ }
case OP_ARM64_XADDV: {
switch (ins->inst_c0) {
case INTRINS_AARCH64_ADV_SIMD_FADDV:
if (ins->inst_c1 == MONO_TYPE_R8) {
- arm_neon_faddp (code, VREG_FULL, TYPE_F64, ins->dreg, ins->sreg1, ins->sreg1);
+ arm_neon_faddp (code, VREG_FULL, TYPE_F64, dreg, sreg1, sreg1);
} else if (ins->inst_c1 == MONO_TYPE_R4) {
- arm_neon_faddp (code, VREG_FULL, TYPE_F32, ins->dreg, ins->sreg1, ins->sreg1);
- arm_neon_faddp (code, VREG_FULL, TYPE_F32, ins->dreg, ins->dreg, ins->dreg);
+ arm_neon_faddp (code, VREG_FULL, TYPE_F32, dreg, sreg1, sreg1);
+ arm_neon_faddp (code, VREG_FULL, TYPE_F32, dreg, dreg, dreg);
} else {
g_assert_not_reached ();
}
case INTRINS_AARCH64_ADV_SIMD_UADDV:
case INTRINS_AARCH64_ADV_SIMD_SADDV:
if (get_type_size_macro (ins->inst_c1) == TYPE_I64)
- arm_neon_addp (code, VREG_FULL, TYPE_I64, ins->dreg, ins->sreg1, ins->sreg1);
+ arm_neon_addp (code, VREG_FULL, TYPE_I64, dreg, sreg1, sreg1);
else
g_assert_not_reached (); // remaining int types are handled through the codegen table
break;
}
break;
}
+ case OP_CREATE_SCALAR_INT: {
+ const int t = get_type_size_macro (ins->inst_c1);
+ arm_neon_eor_16b (code, dreg, dreg, dreg);
+ arm_neon_ins_g(code, t, dreg, sreg1, 0);
+ break;
+ }
+ case OP_CREATE_SCALAR_FLOAT: {
+ int t = 0;
+ switch (ins->inst_c1) {
+ case MONO_TYPE_R4:
+ t = SIZE_4;
+ break;
+ case MONO_TYPE_R8:
+ t = SIZE_8;
+ break;
+ }
+ // Use a temp register for zero op, as sreg1 and dreg share the same register here
+ arm_neon_eor_16b (code, NEON_TMP_REG, NEON_TMP_REG, NEON_TMP_REG);
+ arm_neon_ins_e(code, t, NEON_TMP_REG, sreg1, 0, 0);
+ arm_neon_mov (code, dreg, NEON_TMP_REG);
+ break;
+ }
+ case OP_CREATE_SCALAR_UNSAFE_INT: {
+ const int t = get_type_size_macro (ins->inst_c1);
+ arm_neon_ins_g(code, t, dreg, sreg1, 0);
+ break;
+ }
+ case OP_CREATE_SCALAR_UNSAFE_FLOAT: {
+ if (dreg != sreg1) {
+ int t = 0;
+ switch (ins->inst_c1) {
+ case MONO_TYPE_R4:
+ t = SIZE_4;
+ break;
+ case MONO_TYPE_R8:
+ t = SIZE_8;
+ break;
+ }
+ arm_neon_ins_e(code, t, dreg, sreg1, 0, 0);
+ }
+ break;
+ }
+ // Enable this when adding support for Narrow and enable support for Create at the same time
+ // case OP_XCONCAT:
+ // arm_neon_ext_16b(code, dreg, sreg1, sreg2, 8);
+ // break;
/* BRANCH */
case OP_BR:
arm_cbnzx (code, sreg1, 0);
break;
- /* SIMD that is not table-generated */
- /* TODO: once https://github.com/dotnet/runtime/issues/83252 is done,
- * move the following two to the codegen table in simd-arm64.h
- */
- case OP_ONES_COMPLEMENT:
- arm_neon_not (code, get_vector_size_macro (ins), dreg, sreg1);
- break;
- case OP_NEGATION:
- if (is_type_float_macro (ins->inst_c1)) {
- arm_neon_fneg (code, get_vector_size_macro (ins), get_type_size_macro (ins->inst_c1), dreg, sreg1);
- } else {
- arm_neon_neg (code, get_vector_size_macro (ins), get_type_size_macro (ins->inst_c1), dreg, sreg1);
- }
- break;
- case OP_XBINOP:
- switch (ins->inst_c0) {
- case OP_IMAX:
- code = emit_smax_i8 (code, get_vector_size_macro (ins), get_type_size_macro (ins->inst_c1), dreg, sreg1, sreg2);
- break;
- case OP_IMAX_UN:
- code = emit_umax_i8 (code, get_vector_size_macro (ins), get_type_size_macro (ins->inst_c1), dreg, sreg1, sreg2);
- break;
- case OP_IMIN:
- code = emit_smin_i8 (code, get_vector_size_macro (ins), get_type_size_macro (ins->inst_c1), dreg, sreg1, sreg2);
- break;
- case OP_IMIN_UN:
- code = emit_umin_i8 (code, get_vector_size_macro (ins), get_type_size_macro (ins->inst_c1), dreg, sreg1, sreg2);
- break;
- default:
- g_assert_not_reached ();
- }
- break;
- case OP_XZERO:
- arm_neon_eor_16b (code, dreg, dreg, dreg);
- break;
- case OP_XONES:
- arm_neon_eor_16b (code, dreg, dreg, dreg);
- arm_neon_not_16b (code, dreg, dreg);
- break;
- case OP_XEXTRACT:
- code = emit_xextract (code, VREG_FULL, ins->inst_c0, dreg, sreg1);
- break;
-
/* ALU */
case OP_IADD:
arm_addw (code, dreg, sreg1, sreg2);
}
static int
-type_to_insert_op (MonoType *type)
+type_to_insert_op (MonoTypeEnum type)
{
- switch (type->type) {
+ switch (type) {
case MONO_TYPE_I1:
case MONO_TYPE_U1:
return OP_INSERT_I1;
static MonoInst *
emit_vector_create_elementwise (
MonoCompile *cfg, MonoMethodSignature *fsig, MonoType *vtype,
- MonoType *etype, MonoInst **args)
+ MonoTypeEnum type, MonoInst **args)
{
- int op = type_to_insert_op (etype);
+ int op = type_to_insert_op (type);
MonoClass *vklass = mono_class_from_mono_type_internal (vtype);
MonoInst *ins = emit_xzero (cfg, vklass);
for (int i = 0; i < fsig->param_count; ++i) {
ins = emit_simd_ins (cfg, vklass, op, ins->dreg, args [i]->dreg);
ins->inst_c0 = i;
+ ins->inst_c1 = type;
}
return ins;
}
SN_AsUInt16,
SN_AsUInt32,
SN_AsUInt64,
- SN_AsVector128,
- SN_AsVector2,
- SN_AsVector256,
- SN_AsVector3,
- SN_AsVector4,
SN_BitwiseAnd,
SN_BitwiseOr,
SN_Ceiling,
SN_ToScalar,
SN_ToVector128,
SN_ToVector128Unsafe,
- SN_ToVector256,
- SN_ToVector256Unsafe,
SN_WidenLower,
SN_WidenUpper,
SN_WithElement,
if (!COMPILE_LLVM (cfg))
return NULL;
#endif
-// FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64
-#ifdef TARGET_ARM64
- if (!(cfg->compile_aot && cfg->full_aot && !cfg->interp))
- return NULL;
-#endif
int id = lookup_intrins (sri_vector_methods, sizeof (sri_vector_methods), cmethod);
if (id == -1) {
return NULL;
}
- if (!strcmp (m_class_get_name (cfg->method->klass), "Vector256") || !strcmp (m_class_get_name (cfg->method->klass), "Vector512"))
+ if (!strcmp (m_class_get_name (cmethod->klass), "Vector256") || !strcmp (m_class_get_name (cmethod->klass), "Vector512"))
return NULL;
// FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64
#ifdef TARGET_ARM64
if (!COMPILE_LLVM (cfg)) {
+ if (!(!strcmp (m_class_get_name (cmethod->klass), "Vector128") || !strcmp (m_class_get_name (cmethod->klass), "Vector")))
+ return NULL;
switch (id) {
- case SN_Add:
- case SN_Equals:
- case SN_GreaterThan:
- case SN_GreaterThanOrEqual:
- case SN_LessThan:
- case SN_LessThanOrEqual:
- case SN_Negate:
- case SN_OnesComplement:
- case SN_EqualsAny:
- case SN_GreaterThanAny:
- case SN_GreaterThanOrEqualAny:
- case SN_LessThanAny:
- case SN_LessThanOrEqualAny:
- case SN_EqualsAll:
- case SN_GreaterThanAll:
- case SN_GreaterThanOrEqualAll:
- case SN_LessThanAll:
- case SN_LessThanOrEqualAll:
- case SN_Subtract:
- case SN_BitwiseAnd:
- case SN_BitwiseOr:
- case SN_Xor:
- case SN_As:
- case SN_AsByte:
- case SN_AsDouble:
- case SN_AsInt16:
- case SN_AsInt32:
- case SN_AsInt64:
- case SN_AsSByte:
- case SN_AsSingle:
- case SN_AsUInt16:
- case SN_AsUInt32:
- case SN_AsUInt64:
- case SN_Max:
- case SN_Min:
- case SN_Sum:
- case SN_ToScalar:
- case SN_Floor:
- case SN_Ceiling:
- case SN_Divide:
- case SN_Multiply:
- case SN_Sqrt:
- case SN_Abs:
- break;
- default:
+ case SN_AndNot:
+ case SN_ConditionalSelect:
+ case SN_ConvertToDouble:
+ case SN_ConvertToInt32:
+ case SN_ConvertToInt64:
+ case SN_ConvertToSingle:
+ case SN_ConvertToUInt32:
+ case SN_ConvertToUInt64:
+ case SN_Create:
+ case SN_Dot:
+ case SN_ExtractMostSignificantBits:
+ case SN_GetElement:
+ case SN_GetLower:
+ case SN_GetUpper:
+ case SN_Narrow:
+ case SN_Shuffle:
+ case SN_ToVector128:
+ case SN_ToVector128Unsafe:
+ case SN_WidenLower:
+ case SN_WidenUpper:
+ case SN_WithElement:
return NULL;
+ default:
+ break;
}
- MonoClass *arg0_class = mono_class_from_mono_type_internal (fsig->params [0]);
- int class_size = mono_class_value_size (arg0_class, NULL);
- if (class_size != 16)
- return NULL;
}
#endif
MonoType *etype = get_vector_t_elem_type (fsig->ret);
if (!MONO_TYPE_IS_VECTOR_PRIMITIVE (etype))
return NULL;
- if (fsig->param_count == 1 && mono_metadata_type_equal (fsig->params [0], etype))
- return emit_simd_ins (cfg, klass, type_to_expand_op (etype->type), args [0]->dreg, -1);
- else if (is_create_from_half_vectors_overload (fsig))
+ if (fsig->param_count == 1 && mono_metadata_type_equal (fsig->params [0], etype)) {
+ MonoInst* ins = emit_simd_ins (cfg, klass, type_to_expand_op (etype->type), args [0]->dreg, -1);
+ ins->inst_c1 = arg0_type;
+ return ins;
+ } else if (is_create_from_half_vectors_overload (fsig))
return emit_simd_ins (cfg, klass, OP_XCONCAT, args [0]->dreg, args [1]->dreg);
else if (is_elementwise_create_overload (fsig, etype))
- return emit_vector_create_elementwise (cfg, fsig, fsig->ret, etype, args);
+ return emit_vector_create_elementwise (cfg, fsig, fsig->ret, arg0_type, args);
break;
}
case SN_CreateScalar: {
MonoType *etype = get_vector_t_elem_type (fsig->ret);
if (!MONO_TYPE_IS_VECTOR_PRIMITIVE (etype))
return NULL;
- return emit_simd_ins_for_sig (cfg, klass, OP_CREATE_SCALAR, -1, arg0_type, fsig, args);
+ if (COMPILE_LLVM (cfg))
+ return emit_simd_ins_for_sig (cfg, klass, OP_CREATE_SCALAR, -1, arg0_type, fsig, args);
+ else {
+ if (type_enum_is_float (arg0_type)) {
+ return emit_simd_ins_for_sig (cfg, klass, OP_CREATE_SCALAR_FLOAT, -1, arg0_type, fsig, args);
+ } else {
+ return emit_simd_ins_for_sig (cfg, klass, OP_CREATE_SCALAR_INT, -1, arg0_type, fsig, args);
+ }
+ }
+
}
case SN_CreateScalarUnsafe: {
MonoType *etype = get_vector_t_elem_type (fsig->ret);
if (!MONO_TYPE_IS_VECTOR_PRIMITIVE (etype))
return NULL;
- return emit_simd_ins_for_sig (cfg, klass, OP_CREATE_SCALAR_UNSAFE, -1, arg0_type, fsig, args);
+ if (COMPILE_LLVM (cfg))
+ return emit_simd_ins_for_sig (cfg, klass, OP_CREATE_SCALAR_UNSAFE, -1, arg0_type, fsig, args);
+ else {
+ if (type_enum_is_float (arg0_type)) {
+ return emit_simd_ins_for_sig (cfg, klass, OP_CREATE_SCALAR_UNSAFE_FLOAT, -1, arg0_type, fsig, args);
+ } else {
+ return emit_simd_ins_for_sig (cfg, klass, OP_CREATE_SCALAR_UNSAFE_INT, -1, arg0_type, fsig, args);
+ }
+ }
}
case SN_Dot: {
if (!is_element_type_primitive (fsig->params [0]))