feature = (MonoCPUFeatures) (MONO_CPU_X86_FULL_SSEAVX_COMBINED & ~feature);
#elif defined(TARGET_ARM64)
- // TODO: neon, sha1, sha2, asimd, etc...
+ if (!strcmp (attr + prefix, "base"))
+ feature = MONO_CPU_ARM64_BASE;
+ else if (!strcmp (attr + prefix, "crc"))
+ feature = MONO_CPU_ARM64_CRC;
#elif defined(TARGET_WASM)
if (!strcmp (attr + prefix, "simd"))
feature = MONO_CPU_WASM_SIMD;
INTRINS_OVR(WASM_ANYTRUE_V4, wasm_anytrue)
INTRINS_OVR(WASM_ANYTRUE_V2, wasm_anytrue)
#endif
+#if defined(TARGET_ARM64)
+INTRINS_OVR(BITREVERSE_I32, bitreverse)
+INTRINS_OVR(BITREVERSE_I64, bitreverse)
+INTRINS(AARCH64_CRC32B, aarch64_crc32b)
+INTRINS(AARCH64_CRC32H, aarch64_crc32h)
+INTRINS(AARCH64_CRC32W, aarch64_crc32w)
+INTRINS(AARCH64_CRC32X, aarch64_crc32x)
+INTRINS(AARCH64_CRC32CB, aarch64_crc32cb)
+INTRINS(AARCH64_CRC32CH, aarch64_crc32ch)
+INTRINS(AARCH64_CRC32CW, aarch64_crc32cw)
+INTRINS(AARCH64_CRC32CX, aarch64_crc32cx)
+#endif
#undef INTRINS
#undef INTRINS_OVR
return "unknown fp";
}
+const char *
+mono_arch_xregname (int reg)
+{
+ static const char * rnames[] = {
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+ "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
+ "v30", "v31"
+ };
+ if (reg >= 0 && reg < 32)
+ return rnames [reg];
+ return "unknown";
+}
+
int
mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
{
#define MONO_MAX_IREGS 32
#define MONO_MAX_FREGS 32
+#define MONO_MAX_XREGS 32
+
+#if !defined(DISABLE_SIMD) && defined(ENABLE_NETCORE)
+#define MONO_ARCH_SIMD_INTRINSICS 1
+#endif
#define MONO_CONTEXT_SET_LLVM_EXC_REG(ctx, exc) do { (ctx)->regs [0] = (gsize)exc; } while (0)
/* v8..v15 */
#define MONO_ARCH_CALLEE_SAVED_FREGS 0xff00
+#define MONO_ARCH_CALLEE_SAVED_XREGS 0
+
+#define MONO_ARCH_CALLEE_XREGS MONO_ARCH_CALLEE_FREGS
+
#define MONO_ARCH_USE_FPSTACK FALSE
#define MONO_ARCH_INST_SREG2_MASK(ins) (0)
case OP_POPCNT64:
values [ins->dreg] = call_intrins (ctx, INTRINS_CTPOP_I64, &lhs, "");
break;
- case OP_LZCNT32:
- case OP_LZCNT64: {
- LLVMValueRef args [2];
- args [0] = lhs;
- args [1] = LLVMConstInt (LLVMInt1Type (), 1, FALSE);
- values [ins->dreg] = call_intrins (ctx, ins->opcode == OP_LZCNT32 ? INTRINS_CTLZ_I32 : INTRINS_CTLZ_I64, args, "");
- break;
- }
case OP_CTTZ32:
case OP_CTTZ64: {
LLVMValueRef args [2];
break;
}
#endif /* ENABLE_NETCORE */
-#endif /* SIMD */
+#endif /* defined(TARGET_X86) || defined(TARGET_AMD64) */
+
+// Shared between ARM64 and X86
+#if defined(ENABLE_NETCORE) && (defined(TARGET_ARM64) || defined(TARGET_X86) || defined(TARGET_AMD64))
+ case OP_LZCNT32:
+ case OP_LZCNT64: {
+ LLVMValueRef args [2];
+ args [0] = lhs;
+ args [1] = LLVMConstInt (LLVMInt1Type (), 1, FALSE);
+ values [ins->dreg] = LLVMBuildCall (builder, get_intrins (ctx, ins->opcode == OP_LZCNT32 ? INTRINS_CTLZ_I32 : INTRINS_CTLZ_I64), args, 2, "");
+ break;
+ }
+#endif
+
+#if defined(ENABLE_NETCORE) && defined(TARGET_ARM64)
+ case OP_XOP_I4_I4:
+ case OP_XOP_I8_I8: {
+ IntrinsicId id = (IntrinsicId)0;
+ switch (ins->inst_c0) {
+ case SIMD_OP_ARM64_RBIT32: id = INTRINS_BITREVERSE_I32; break;
+ case SIMD_OP_ARM64_RBIT64: id = INTRINS_BITREVERSE_I64; break;
+ default: g_assert_not_reached (); break;
+ }
+ values [ins->dreg] = call_intrins (ctx, id, &lhs, "");
+ break;
+ }
+ case OP_XOP_I4_I4_I4:
+ case OP_XOP_I4_I4_I8: {
+ IntrinsicId id = (IntrinsicId)0;
+ gboolean zext_last = FALSE;
+ switch (ins->inst_c0) {
+ case SIMD_OP_ARM64_CRC32B: id = INTRINS_AARCH64_CRC32B; zext_last = TRUE; break;
+ case SIMD_OP_ARM64_CRC32H: id = INTRINS_AARCH64_CRC32H; zext_last = TRUE; break;
+ case SIMD_OP_ARM64_CRC32W: id = INTRINS_AARCH64_CRC32W; zext_last = TRUE; break;
+ case SIMD_OP_ARM64_CRC32X: id = INTRINS_AARCH64_CRC32X; break;
+ case SIMD_OP_ARM64_CRC32CB: id = INTRINS_AARCH64_CRC32CB; zext_last = TRUE; break;
+ case SIMD_OP_ARM64_CRC32CH: id = INTRINS_AARCH64_CRC32CH; zext_last = TRUE; break;
+ case SIMD_OP_ARM64_CRC32CW: id = INTRINS_AARCH64_CRC32CW; zext_last = TRUE; break;
+ case SIMD_OP_ARM64_CRC32CX: id = INTRINS_AARCH64_CRC32CX; break;
+ default: g_assert_not_reached (); break;
+ }
+ LLVMValueRef arg1 = rhs;
+ if (zext_last)
+ arg1 = LLVMBuildZExt (ctx->builder, arg1, LLVMInt32Type (), "");
+ LLVMValueRef args [] = { lhs, arg1 };
+ values [ins->dreg] = call_intrins (ctx, id, args, "");
+ break;
+ }
+ case OP_LSCNT32:
+ case OP_LSCNT64: {
+ // %shr = ashr i32 %x, 31
+ // %xor = xor i32 %shr, %x
+ // %mul = shl i32 %xor, 1
+ // %add = or i32 %mul, 1
+ // %0 = tail call i32 @llvm.ctlz.i32(i32 %add, i1 false)
+ LLVMValueRef shr = LLVMBuildAShr (builder, lhs, ins->opcode == OP_LSCNT32 ?
+ LLVMConstInt (LLVMInt32Type (), 31, FALSE) :
+ LLVMConstInt (LLVMInt64Type (), 63, FALSE), "");
+ LLVMValueRef one = ins->opcode == OP_LSCNT32 ?
+ LLVMConstInt (LLVMInt32Type (), 1, FALSE) :
+ LLVMConstInt (LLVMInt64Type (), 1, FALSE);
+ LLVMValueRef xor = LLVMBuildXor (builder, shr, lhs, "");
+ LLVMValueRef mul = LLVMBuildShl (builder, xor, one, "");
+ LLVMValueRef add = LLVMBuildOr (builder, mul, one, "");
+
+ LLVMValueRef args [2];
+ args [0] = add;
+ args [1] = LLVMConstInt (LLVMInt1Type (), 0, FALSE);
+ values [ins->dreg] = LLVMBuildCall (builder, get_intrins (ctx, ins->opcode == OP_LSCNT32 ? INTRINS_CTLZ_I32 : INTRINS_CTLZ_I64), args, 2, "");
+ break;
+ }
+#endif
case OP_DUMMY_USE:
break;
intrins = add_intrins1 (module, id, sse_i8_t);
break;
#endif
+#ifdef TARGET_ARM64
+ case INTRINS_BITREVERSE_I32:
+ intrins = add_intrins1 (module, id, LLVMInt32Type ());
+ break;
+ case INTRINS_BITREVERSE_I64:
+ intrins = add_intrins1 (module, id, LLVMInt64Type ());
+ break;
+#endif
default:
g_assert_not_reached ();
break;
{ "bmi", MONO_CPU_X86_BMI1 },
{ "bmi2", MONO_CPU_X86_BMI2 },
#endif
+#if defined(TARGET_ARM64)
+ { "crc", MONO_CPU_ARM64_CRC },
+#endif
};
if (!cpu_features)
cpu_features = MONO_CPU_INITED | (MonoCPUFeatures)mono_llvm_check_cpu_features (flags_map, G_N_ELEMENTS (flags_map));
+
return cpu_features;
}
/* SIMD opcodes. */
+#if defined(TARGET_X86) || defined(TARGET_AMD64) || defined(TARGET_WASM) || defined(TARGET_ARM64)
+
+MINI_OP(OP_EXTRACT_I4, "extract_i4", IREG, XREG, NONE)
+MINI_OP(OP_ICONV_TO_R4_RAW, "iconv_to_r4_raw", FREG, IREG, NONE)
+
+MINI_OP(OP_EXTRACT_I2, "extract_i2", IREG, XREG, NONE)
+MINI_OP(OP_EXTRACT_U2, "extract_u2", IREG, XREG, NONE)
+MINI_OP(OP_EXTRACT_I1, "extract_i1", IREG, XREG, NONE)
+MINI_OP(OP_EXTRACT_U1, "extract_u1", IREG, XREG, NONE)
+MINI_OP(OP_EXTRACT_R4, "extract_r4", FREG, XREG, NONE)
+MINI_OP(OP_EXTRACT_R8, "extract_r8", FREG, XREG, NONE)
+MINI_OP(OP_EXTRACT_I8, "extract_i8", LREG, XREG, NONE)
+
+/* Used by LLVM */
+MINI_OP(OP_INSERT_I1, "insert_i1", XREG, XREG, IREG)
+MINI_OP(OP_INSERT_I2, "insert_i2", XREG, XREG, IREG)
+MINI_OP(OP_INSERT_I4, "insert_i4", XREG, XREG, IREG)
+MINI_OP(OP_INSERT_I8, "insert_i8", XREG, XREG, LREG)
+MINI_OP(OP_INSERT_R4, "insert_r4", XREG, XREG, FREG)
+MINI_OP(OP_INSERT_R8, "insert_r8", XREG, XREG, FREG)
+
+MINI_OP(OP_EXTRACTX_U2, "extractx_u2", IREG, XREG, NONE)
+
+/*these slow ops are modeled around the availability of a fast 2 bytes insert op*/
+/*insertx_u1_slow takes old value and new value as source regs */
+MINI_OP(OP_INSERTX_U1_SLOW, "insertx_u1_slow", XREG, IREG, IREG)
+/*insertx_i4_slow takes target xreg and new value as source regs */
+MINI_OP(OP_INSERTX_I4_SLOW, "insertx_i4_slow", XREG, XREG, IREG)
+
+MINI_OP(OP_INSERTX_R4_SLOW, "insertx_r4_slow", XREG, XREG, FREG)
+MINI_OP(OP_INSERTX_R8_SLOW, "insertx_r8_slow", XREG, XREG, FREG)
+MINI_OP(OP_INSERTX_I8_SLOW, "insertx_i8_slow", XREG, XREG, LREG)
+
+MINI_OP(OP_FCONV_TO_R4_X, "fconv_to_r4_x", XREG, FREG, NONE)
+MINI_OP(OP_FCONV_TO_R8_X, "fconv_to_r8_x", XREG, FREG, NONE)
+MINI_OP(OP_XCONV_R8_TO_I4, "xconv_r8_to_i4", IREG, XREG, NONE)
+MINI_OP(OP_ICONV_TO_X, "iconv_to_x", XREG, IREG, NONE)
+
+MINI_OP(OP_EXPAND_I1, "expand_i1", XREG, IREG, NONE)
+MINI_OP(OP_EXPAND_I2, "expand_i2", XREG, IREG, NONE)
+MINI_OP(OP_EXPAND_I4, "expand_i4", XREG, IREG, NONE)
+MINI_OP(OP_EXPAND_R4, "expand_r4", XREG, FREG, NONE)
+MINI_OP(OP_EXPAND_I8, "expand_i8", XREG, IREG, NONE)
+MINI_OP(OP_EXPAND_R8, "expand_r8", XREG, FREG, NONE)
+
+#endif
+
#if defined(TARGET_X86) || defined(TARGET_AMD64) || defined(TARGET_WASM)
MINI_OP(OP_ADDPS, "addps", XREG, XREG, XREG)
MINI_OP(OP_PSHLQ, "pshlq", XREG, XREG, NONE)
MINI_OP(OP_PSHLQ_REG, "pshlq_reg", XREG, XREG, XREG)
-MINI_OP(OP_EXTRACT_I4, "extract_i4", IREG, XREG, NONE)
-MINI_OP(OP_ICONV_TO_R4_RAW, "iconv_to_r4_raw", FREG, IREG, NONE)
-
-MINI_OP(OP_EXTRACT_I2, "extract_i2", IREG, XREG, NONE)
-MINI_OP(OP_EXTRACT_U2, "extract_u2", IREG, XREG, NONE)
-MINI_OP(OP_EXTRACT_I1, "extract_i1", IREG, XREG, NONE)
-MINI_OP(OP_EXTRACT_U1, "extract_u1", IREG, XREG, NONE)
-MINI_OP(OP_EXTRACT_R4, "extract_r4", FREG, XREG, NONE)
-MINI_OP(OP_EXTRACT_R8, "extract_r8", FREG, XREG, NONE)
-MINI_OP(OP_EXTRACT_I8, "extract_i8", LREG, XREG, NONE)
-
-/* Used by LLVM */
-MINI_OP(OP_INSERT_I1, "insert_i1", XREG, XREG, IREG)
-MINI_OP(OP_INSERT_I2, "insert_i2", XREG, XREG, IREG)
-MINI_OP(OP_INSERT_I4, "insert_i4", XREG, XREG, IREG)
-MINI_OP(OP_INSERT_I8, "insert_i8", XREG, XREG, LREG)
-MINI_OP(OP_INSERT_R4, "insert_r4", XREG, XREG, FREG)
-MINI_OP(OP_INSERT_R8, "insert_r8", XREG, XREG, FREG)
-
-MINI_OP(OP_EXTRACTX_U2, "extractx_u2", IREG, XREG, NONE)
-
-/*these slow ops are modeled around the availability of a fast 2 bytes insert op*/
-/*insertx_u1_slow takes old value and new value as source regs */
-MINI_OP(OP_INSERTX_U1_SLOW, "insertx_u1_slow", XREG, IREG, IREG)
-/*insertx_i4_slow takes target xreg and new value as source regs */
-MINI_OP(OP_INSERTX_I4_SLOW, "insertx_i4_slow", XREG, XREG, IREG)
-
-MINI_OP(OP_INSERTX_R4_SLOW, "insertx_r4_slow", XREG, XREG, FREG)
-MINI_OP(OP_INSERTX_R8_SLOW, "insertx_r8_slow", XREG, XREG, FREG)
-MINI_OP(OP_INSERTX_I8_SLOW, "insertx_i8_slow", XREG, XREG, LREG)
-
-MINI_OP(OP_FCONV_TO_R4_X, "fconv_to_r4_x", XREG, FREG, NONE)
-MINI_OP(OP_FCONV_TO_R8_X, "fconv_to_r8_x", XREG, FREG, NONE)
-MINI_OP(OP_XCONV_R8_TO_I4, "xconv_r8_to_i4", IREG, XREG, NONE)
-MINI_OP(OP_ICONV_TO_X, "iconv_to_x", XREG, IREG, NONE)
-
-MINI_OP(OP_EXPAND_I1, "expand_i1", XREG, IREG, NONE)
-MINI_OP(OP_EXPAND_I2, "expand_i2", XREG, IREG, NONE)
-MINI_OP(OP_EXPAND_I4, "expand_i4", XREG, IREG, NONE)
-MINI_OP(OP_EXPAND_R4, "expand_r4", XREG, FREG, NONE)
-MINI_OP(OP_EXPAND_I8, "expand_i8", XREG, IREG, NONE)
-MINI_OP(OP_EXPAND_R8, "expand_r8", XREG, FREG, NONE)
-
MINI_OP(OP_PREFETCH_MEMBASE, "prefetch_membase", NONE, IREG, NONE)
MINI_OP(OP_CVTDQ2PD, "cvtdq2pd", XREG, XREG, NONE)
MINI_OP(OP_XOP_X_X_X, "xop_x_x_x", XREG, XREG, XREG)
MINI_OP(OP_XOP_X_X_I4, "xop_x_x_i4", XREG, XREG, IREG)
MINI_OP(OP_XOP_X_X_I8, "xop_x_x_i8", XREG, XREG, LREG)
+MINI_OP(OP_XOP_I4_I8, "xop_i4_i8", IREG, LREG, NONE)
+MINI_OP(OP_XOP_I8_I8, "xop_i8_i8", LREG, LREG, NONE)
+MINI_OP(OP_XOP_I4_I4, "xop_i4_i4", IREG, IREG, NONE)
+MINI_OP(OP_XOP_I4_I4_I4, "xop_i4_i4_i4", IREG, IREG, IREG)
+MINI_OP(OP_XOP_I4_I4_I8, "xop_i4_i4_i8", IREG, IREG, LREG)
MINI_OP(OP_XCAST, "xcast", XREG, XREG, NONE)
/* Extract element of vector */
MINI_OP(OP_LZCNT64, "lzcnt64", LREG, LREG, NONE)
MINI_OP(OP_POPCNT32, "popcnt32", IREG, IREG, NONE)
MINI_OP(OP_POPCNT64, "popcnt64", LREG, LREG, NONE)
+
+#ifdef TARGET_ARM64
+MINI_OP(OP_LSCNT32, "lscnt32", IREG, IREG, NONE)
+MINI_OP(OP_LSCNT64, "lscnt64", LREG, LREG, NONE)
+#endif // TARGET_ARM64
}
#endif
+#if defined(TARGET_ARM64)
+ // All Arm64 devices have this set
+ features |= MONO_CPU_ARM64_BASE;
+#endif
+
// apply parameters passed via -mattr
return (features | mono_cpu_features_enabled) & ~mono_cpu_features_disabled;
}
#define MONO_IS_REAL_MOVE(ins) (((ins)->opcode == OP_MOVE) || ((ins)->opcode == OP_FMOVE) || ((ins)->opcode == OP_XMOVE) || ((ins)->opcode == OP_RMOVE))
#define MONO_IS_ZERO(ins) (((ins)->opcode == OP_VZERO) || ((ins)->opcode == OP_XZERO))
+#ifdef TARGET_ARM64
+// FIXME: enable for Arm64
+#define MONO_CLASS_IS_SIMD(cfg, klass) (0)
+#else
#define MONO_CLASS_IS_SIMD(cfg, klass) (((cfg)->opt & MONO_OPT_SIMD) && m_class_is_simd_type (klass))
+#endif
#else
#ifdef TARGET_WASM
MONO_CPU_WASM_SIMD = 1 << 1,
#endif
+#ifdef TARGET_ARM64
+ MONO_CPU_ARM64_BASE = 1 << 1,
+ MONO_CPU_ARM64_CRC = 1 << 2,
+#endif
} MonoCPUFeatures;
G_ENUM_FUNCTIONS (MonoCPUFeatures)
SIMD_OP_SSE_PSIGND,
SIMD_OP_SSE_PMADDUBSW,
SIMD_OP_SSE_PMULHRSW,
- SIMD_OP_SSE_LDDQU
+ SIMD_OP_SSE_LDDQU,
+ SIMD_OP_ARM64_CRC32B,
+ SIMD_OP_ARM64_CRC32H,
+ SIMD_OP_ARM64_CRC32W,
+ SIMD_OP_ARM64_CRC32X,
+ SIMD_OP_ARM64_CRC32CB,
+ SIMD_OP_ARM64_CRC32CH,
+ SIMD_OP_ARM64_CRC32CW,
+ SIMD_OP_ARM64_CRC32CX,
+ SIMD_OP_ARM64_RBIT32,
+ SIMD_OP_ARM64_RBIT64
} SimdOp;
const char *mono_arch_xregname (int reg);
return (SimdIntrinsic *)mono_binary_search (cmethod->name, intrinsics, size / sizeof (SimdIntrinsic), sizeof (SimdIntrinsic), &simd_intrinsic_info_compare_by_name);
}
-static int
-type_to_expand_op (MonoType *type)
-{
- switch (type->type) {
- case MONO_TYPE_I1:
- case MONO_TYPE_U1:
- return OP_EXPAND_I1;
- case MONO_TYPE_I2:
- case MONO_TYPE_U2:
- return OP_EXPAND_I2;
- case MONO_TYPE_I4:
- case MONO_TYPE_U4:
- return OP_EXPAND_I4;
- case MONO_TYPE_I8:
- case MONO_TYPE_U8:
- return OP_EXPAND_I8;
- case MONO_TYPE_R4:
- return OP_EXPAND_R4;
- case MONO_TYPE_R8:
- return OP_EXPAND_R8;
- default:
- g_assert_not_reached ();
- }
-}
-
/*
* Return a simd vreg for the simd value represented by SRC.
* SRC is the 'this' argument to methods.
return etype;
}
+#ifdef TARGET_AMD64
+
+static int
+type_to_expand_op (MonoType *type)
+{
+ switch (type->type) {
+ case MONO_TYPE_I1:
+ case MONO_TYPE_U1:
+ return OP_EXPAND_I1;
+ case MONO_TYPE_I2:
+ case MONO_TYPE_U2:
+ return OP_EXPAND_I2;
+ case MONO_TYPE_I4:
+ case MONO_TYPE_U4:
+ return OP_EXPAND_I4;
+ case MONO_TYPE_I8:
+ case MONO_TYPE_U8:
+ return OP_EXPAND_I8;
+ case MONO_TYPE_R4:
+ return OP_EXPAND_R4;
+ case MONO_TYPE_R8:
+ return OP_EXPAND_R8;
+ default:
+ g_assert_not_reached ();
+ }
+}
+
static guint16 vector_methods [] = {
SN_ConvertToDouble,
SN_ConvertToInt32,
return NULL;
}
+#endif // !TARGET_ARM64
+
+#ifdef TARGET_ARM64
+
+static SimdIntrinsic armbase_methods [] = {
+ {SN_LeadingSignCount},
+ {SN_LeadingZeroCount},
+ {SN_ReverseElementBits},
+ {SN_get_IsSupported}
+};
+
+static SimdIntrinsic crc32_methods [] = {
+ {SN_ComputeCrc32},
+ {SN_ComputeCrc32C},
+ {SN_get_IsSupported}
+};
+
+static MonoInst*
+emit_arm64_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
+{
+ // Arm64 intrinsics are LLVM-only
+ if (!COMPILE_LLVM (cfg))
+ return NULL;
+
+ MonoInst *ins;
+ gboolean supported, is_64bit;
+ MonoClass *klass = cmethod->klass;
+ MonoTypeEnum arg0_type = fsig->param_count > 0 ? get_underlying_type (fsig->params [0]) : MONO_TYPE_VOID;
+ gboolean arg0_i32 = (arg0_type == MONO_TYPE_I4) || (arg0_type == MONO_TYPE_U4);
+ SimdIntrinsic *info;
+
+ if (is_hw_intrinsics_class (klass, "ArmBase", &is_64bit)) {
+ info = lookup_intrins_info (armbase_methods, sizeof (armbase_methods), cmethod);
+ if (!info)
+ return NULL;
+
+ supported = (mini_get_cpu_features (cfg) & MONO_CPU_ARM64_BASE) != 0;
+
+ switch (info->id) {
+ case SN_get_IsSupported:
+ EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
+ ins->type = STACK_I4;
+ return ins;
+ case SN_LeadingZeroCount:
+ return emit_simd_ins_for_sig (cfg, klass, arg0_i32 ? OP_LZCNT32 : OP_LZCNT64, 0, arg0_type, fsig, args);
+ case SN_LeadingSignCount:
+ return emit_simd_ins_for_sig (cfg, klass, arg0_i32 ? OP_LSCNT32 : OP_LSCNT64, 0, arg0_type, fsig, args);
+ case SN_ReverseElementBits:
+ return emit_simd_ins_for_sig (cfg, klass,
+ (is_64bit ? OP_XOP_I8_I8 : OP_XOP_I4_I4),
+ (is_64bit ? SIMD_OP_ARM64_RBIT64 : SIMD_OP_ARM64_RBIT32),
+ arg0_type, fsig, args);
+ default:
+ g_assert_not_reached (); // if a new API is added we need to either implement it or change IsSupported to false
+ }
+ }
+
+ if (is_hw_intrinsics_class (klass, "Crc32", &is_64bit)) {
+ info = lookup_intrins_info (crc32_methods, sizeof (crc32_methods), cmethod);
+ if (!info)
+ return NULL;
+
+ supported = (mini_get_cpu_features (cfg) & MONO_CPU_ARM64_CRC) != 0;
+
+ switch (info->id) {
+ case SN_get_IsSupported:
+ EMIT_NEW_ICONST (cfg, ins, supported ? 1 : 0);
+ ins->type = STACK_I4;
+ return ins;
+ case SN_ComputeCrc32:
+ case SN_ComputeCrc32C: {
+ SimdOp op = (SimdOp)0;
+ gboolean is_c = info->id == SN_ComputeCrc32C;
+ switch (get_underlying_type (fsig->params [1])) {
+ case MONO_TYPE_U1: op = is_c ? SIMD_OP_ARM64_CRC32CB : SIMD_OP_ARM64_CRC32B; break;
+ case MONO_TYPE_U2: op = is_c ? SIMD_OP_ARM64_CRC32CH : SIMD_OP_ARM64_CRC32H; break;
+ case MONO_TYPE_U4: op = is_c ? SIMD_OP_ARM64_CRC32CW : SIMD_OP_ARM64_CRC32W; break;
+ case MONO_TYPE_U8: op = is_c ? SIMD_OP_ARM64_CRC32CX : SIMD_OP_ARM64_CRC32X; break;
+ default: g_assert_not_reached (); break;
+ }
+ return emit_simd_ins_for_sig (cfg, klass, is_64bit ? OP_XOP_I4_I4_I8 : OP_XOP_I4_I4_I4, op, arg0_type, fsig, args);
+ }
+ default:
+ g_assert_not_reached (); // if a new API is added we need to either implement it or change IsSupported to false
+ }
+ }
+ return NULL;
+}
+#endif // TARGET_ARM64
#ifdef TARGET_AMD64
return NULL;
}
-#endif
static guint16 vector_128_methods [] = {
SN_AsByte,
return NULL;
}
+#endif // !TARGET_ARM64
+
MonoInst*
mono_emit_simd_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
{
if (m_class_get_nested_in (cmethod->klass))
class_ns = m_class_get_name_space (m_class_get_nested_in (cmethod->klass));
+#ifdef TARGET_ARM64
+ if (!strcmp (class_ns, "System.Runtime.Intrinsics.Arm")) {
+ MonoInst *ins = emit_arm64_intrinsics (cfg, cmethod, fsig, args);
+ return ins;
+ }
+#endif // TARGET_ARM64
+
#ifdef TARGET_AMD64 // TODO: test and enable for x86 too
if (!strcmp (class_ns, "System.Runtime.Intrinsics.X86")) {
- MonoInst *ins = emit_x86_intrinsics (cfg ,cmethod, fsig, args);
+ MonoInst *ins = emit_x86_intrinsics (cfg, cmethod, fsig, args);
return ins;
}
-#endif
+
+ // FIXME: implement Vector64<T>, Vector128<T> and Vector<T> for Arm64
if (!strcmp (class_ns, "System.Runtime.Intrinsics")) {
if (!strcmp (class_name, "Vector128`1"))
if (!strcmp (class_name, "Vector`1"))
return emit_sys_numerics_vector_t (cfg, cmethod, fsig, args);
}
+#endif // TARGET_AMD64
return NULL;
}
METHOD(TestZ)
// Sse42
METHOD(Crc32)
+// ArmBase
+METHOD(LeadingSignCount)
+METHOD(ReverseElementBits)
+// Crc32
+METHOD(ComputeCrc32)
+METHOD(ComputeCrc32C)