#define arm_format_ldrfp_imm(p, size, opc, rt, rn, pimm, scale) arm_emit ((p), ((size) << 30) | (0xf << 26) | (0x1 << 24) | ((opc) << 22) | (arm_encode_pimm12 ((pimm), (scale)) << 10) | ((rn) << 5) | ((rt) << 0))
/* Load double */
-#define arm_ldrfpx(p, dt, xn, simm) arm_format_ldrfp_imm ((p), ARMSIZE_X, 0x1, dt, xn, simm, 8)
+#define arm_ldrfpx(p, dt, xn, simm) arm_format_ldrfp_imm ((p), ARMSIZE_X, 0x1, (dt), (xn), (simm), 8)
/* Load single */
-#define arm_ldrfpw(p, dt, xn, simm) arm_format_ldrfp_imm ((p), ARMSIZE_W, 0x1, dt, xn, simm, 4)
+#define arm_ldrfpw(p, dt, xn, simm) arm_format_ldrfp_imm ((p), ARMSIZE_W, 0x1, (dt), (xn), (simm), 4)
/* Load 128 bit */
-#define arm_ldrfpq(p, qt, xn, simm) arm_format_ldrfp_imm ((p), 0, 0x3, qt, xn, simm, 16)
+#define arm_ldrfpq(p, qt, xn, simm) arm_format_ldrfp_imm ((p), 0x0, 0x3, (qt), (xn), (simm), 16)
+
+/* LDR (literal, SIMD&FP) PC-relative*/
+/* Load single */
+#define arm_neon_ldrs_lit(p, rd, target) arm_emit ((p), 0b00011100000000000000000000000000 | (0b00 << 30) | (arm_get_disp19 ((p), (target)) << 5) | (rd))
+/* Load double */
+#define arm_neon_ldrd_lit(p, rd, target) arm_emit ((p), 0b00011100000000000000000000000000 | (0b01 << 30) | (arm_get_disp19 ((p), (target)) << 5) | (rd))
+/* Load 128 bit */
+#define arm_neon_ldrq_lit(p, rd, target) arm_emit ((p), 0b00011100000000000000000000000000 | (0b10 << 30) | (arm_get_disp19 ((p), (target)) << 5) | (rd))
+#define arm_neon_ldrq_lit_fixup(p, target) *((guint32*)p) = (*((guint32*)p) & 0xff00001f) | (arm_get_disp19 ((p), (target)) << 5)
/* Arithmetic (immediate) */
static G_GNUC_UNUSED inline guint32
#define TYPE_F32 0
#define TYPE_F64 1
+/* NEON :: move SIMD register*/
+#define arm_neon_mov(p, rd, rn) arm_neon_orr ((p), VREG_FULL, (rd), (rn), (rn))
+
/* NEON :: AES */
#define arm_neon_aes_opcode(p, size, opcode, rd, rn) arm_neon_opcode_2reg ((p), VREG_FULL, 0b00001110001010000000100000000000 | (size) << 22 | (opcode) << 12, (rd), (rn))
#define arm_neon_aese(p, rd, rn) arm_neon_aes_opcode ((p), 0b00, 0b00100, (rd), (rn))
label: len:0
store_membase_imm: dest:b len:20
store_membase_reg: dest:b src1:i len:20
+storex_membase: dest:b src1:x len:12
storei1_membase_imm: dest:b len:20
storei1_membase_reg: dest:b src1:i len:12
storei2_membase_imm: dest:b len:20
storei2_memindex: dest:b src1:i src2:i len:4
storei4_memindex: dest:b src1:i src2:i len:4
load_membase: dest:i src1:b len:20
+loadx_membase: dest:x src1:b len:12
loadi1_membase: dest:i src1:b len:32
loadu1_membase: dest:i src1:b len:32
loadi2_membase: dest:i src1:b len:32
atomic_store_u8: dest:b src1:i len:20
atomic_store_r4: dest:b src1:f len:28
atomic_store_r8: dest:b src1:f len:24
+xbinop: dest:x src1:x src2:x len:4
+xzero: dest:x len:4
+xmove: dest:x src1:x len:4
+xconst: dest:x len:10
generic_class_init: src1:a len:44 clob:c
gc_safe_point: src1:i len:12 clob:c
return code;
}
+static WARN_UNUSED_RESULT guint8*
+emit_strfpq (guint8 *code, int rt, int rn, int imm)
+{
+ if (arm_is_pimm12_scaled (imm, 16)) {
+ arm_strfpq (code, rt, rn, imm);
+ } else {
+ g_assert (rn != ARMREG_IP0);
+ code = emit_imm (code, ARMREG_IP0, imm);
+ arm_addx (code, ARMREG_IP0, rn, ARMREG_IP0);
+ arm_strfpq (code, rt, ARMREG_IP0, 0);
+ }
+ return code;
+}
+
static WARN_UNUSED_RESULT guint8*
emit_strx (guint8 *code, int rt, int rn, int imm)
{
return code;
}
+static WARN_UNUSED_RESULT guint8*
+emit_ldrfpq (guint8 *code, int rt, int rn, int imm)
+{
+ if (arm_is_pimm12_scaled (imm, 16)) {
+ arm_ldrfpq (code, rt, rn, imm);
+ } else {
+ g_assert (rn != ARMREG_IP0);
+ code = emit_imm (code, ARMREG_IP0, imm);
+ arm_addx (code, ARMREG_IP0, rn, ARMREG_IP0);
+ arm_ldrfpq (code, rt, ARMREG_IP0, 0);
+ }
+ return code;
+}
+
guint8*
mono_arm_emit_ldrx (guint8 *code, int rt, int rn, int imm)
{
cfg->ret->dreg = cinfo->ret.reg;
break;
case ArgVtypeInIRegs:
- case ArgHFA:
+ case ArgHFA: {
/* Allocate a local to hold the result, the epilog will copy it to the correct place */
+ MonoType *ret_type = mini_get_underlying_type (sig->ret);
+ MonoClass *klass = mono_class_from_mono_type_internal (ret_type);
+ if (MONO_CLASS_IS_SIMD (cfg, klass)) {
+ int align_simd = mono_type_size (m_class_get_byval_arg (klass), NULL);
+ offset = ALIGN_TO (offset, align_simd);
+ }
+
cfg->ret->opcode = OP_REGOFFSET;
cfg->ret->inst_basereg = cfg->frame_reg;
cfg->ret->inst_offset = offset;
else
offset += 16;
break;
+ }
case ArgVtypeByRef:
/* This variable will be initialized in the prolog from R8 */
cfg->vret_addr->opcode = OP_REGOFFSET;
ins->opcode = OP_REGOFFSET;
ins->inst_basereg = cfg->frame_reg;
ins->inst_offset = offset + offsets [i];
- //printf ("allocated local %d to ", i); mono_print_tree_nl (ins);
+ //printf ("allocated local %d to ", i); mono_print_ins (ins);
}
}
offset += locals_stack_size;
return code;
}
+static int
+get_vector_size_macro (MonoInst *ins)
+{
+ int size = mono_class_value_size (ins->klass, NULL);
+ switch (size) {
+ case 16:
+ return VREG_FULL;
+ case 8:
+ return VREG_LOW;
+ default:
+ g_assert_not_reached ();
+ }
+}
+
+static int
+get_type_size_macro (MonoTypeEnum type)
+{
+ switch (type) {
+ case MONO_TYPE_I1:
+ case MONO_TYPE_U1:
+ return TYPE_I8;
+ case MONO_TYPE_I2:
+ case MONO_TYPE_U2:
+ return TYPE_I16;
+ case MONO_TYPE_I4:
+ case MONO_TYPE_U4:
+ return TYPE_I32;
+ case MONO_TYPE_I8:
+ case MONO_TYPE_U8:
+ return TYPE_I64;
+ case MONO_TYPE_I:
+ case MONO_TYPE_U:
+#if TARGET_SIZEOF_VOID_P == 8
+ return TYPE_I64;
+#else
+ return TYPE_I32;
+#endif
+ case MONO_TYPE_R4:
+ return TYPE_F32;
+ case MONO_TYPE_R8:
+ return TYPE_F64;
+ default:
+ g_assert_not_reached ();
+ }
+}
+
void
mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
{
}
break;
}
+ case OP_STOREX_MEMBASE:
+ code = emit_strfpq (code, sreg1, dreg, ins->inst_offset);
+ break;
+ case OP_LOADX_MEMBASE:
+ code = emit_ldrfpq (code, dreg, sreg1, ins->inst_offset);
+ break;
+ case OP_XZERO:
+ arm_neon_eor_16b (code, dreg, dreg, dreg);
+ break;
+ case OP_XMOVE:
+ arm_neon_mov (code, dreg, sreg1);
+ break;
+ case OP_XCONST: {
+ if (cfg->compile_aot && cfg->code_exec_only) {
+ mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_X128_GOT, ins->inst_p0);
+ arm_ldrx_lit (code, ARMREG_IP0, 0);
+ arm_ldrfpq (code, ins->dreg, ARMREG_IP0, 0);
+ } else {
+ mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_X128, ins->inst_p0);
+ arm_neon_ldrq_lit (code, ins->dreg, 0);
+ }
+ break;
+ }
/* BRANCH */
case OP_BR:
mono_add_patch_info_rel (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_true_bb, MONO_R_ARM64_CBZ);
arm_cbnzx (code, sreg1, 0);
break;
+ case OP_XBINOP:
+ switch (ins->inst_c0) {
+ case OP_IADD:
+ arm_neon_add (code, get_vector_size_macro (ins), get_type_size_macro (ins->inst_c1), dreg, sreg1, sreg2);
+ break;
+ case OP_FADD:
+ arm_neon_fadd (code, get_vector_size_macro (ins), get_type_size_macro (ins->inst_c1), dreg, sreg1, sreg2);
+ break;
+ default:
+ g_assert_not_reached ();
+ }
+ break;
/* ALU */
case OP_IADD:
arm_addw (code, dreg, sreg1, sreg2);
size += 32;
exc_throw_found [i] = TRUE;
}
+ } else if (ji->type == MONO_PATCH_INFO_X128) {
+ size += 16 + 15; /* sizeof (Vector128<T>) + alignment */
}
}
set_code_cursor (cfg, code);
}
+ /* Handle relocations with RIP relative addressing */
+ for (ji = cfg->patch_info; ji; ji = ji->next) {
+ gboolean remove = FALSE;
+
+ if (ji->type == MONO_PATCH_INFO_X128) {
+ guint8 *pos;
+
+ code = (guint8*)ALIGN_TO (code, 16);
+ pos = cfg->native_code + ji->ip.i;
+ arm_neon_ldrq_lit_fixup (pos, code);
+ memcpy (code, ji->data.target, 16);
+ code += 16;
+
+ remove = TRUE;
+ }
+
+ if (remove) {
+ if (ji == cfg->patch_info)
+ cfg->patch_info = ji->next;
+ else {
+ MonoJumpInfo *tmp;
+
+ for (tmp = cfg->patch_info; tmp->next != ji; tmp = tmp->next)
+ ;
+ tmp->next = ji->next;
+ }
+ }
+ set_code_cursor (cfg, code);
+ }
+
set_code_cursor (cfg, code);
}
#if !defined(DISABLE_SIMD)
#define MONO_ARCH_SIMD_INTRINSICS 1
+#define MONO_ARCH_NEED_SIMD_BANK 1
+#define MONO_ARCH_USE_SHARED_FP_SIMD_BANK 1
#endif
#define MONO_CONTEXT_SET_LLVM_EXC_REG(ctx, exc) do { (ctx)->regs [0] = (gsize)exc; } while (0)
/* v8..v15 */
#define MONO_ARCH_CALLEE_SAVED_FREGS 0xff00
-#define MONO_ARCH_CALLEE_SAVED_XREGS 0
+#define MONO_ARCH_CALLEE_SAVED_XREGS MONO_ARCH_CALLEE_SAVED_FREGS
#define MONO_ARCH_CALLEE_XREGS MONO_ARCH_CALLEE_FREGS
* Align the size too so the code generated for passing vtypes in
* registers doesn't overwrite random locals.
*/
- size = (size + (align - 1)) & ~(align -1);
+ size = ALIGN_TO (size, align);
}
if (backward) {
- offset += size;
- offset += align - 1;
- offset &= ~(align - 1);
+ offset = ALIGN_TO (offset + size, align);
slot = offset;
}
else {
- offset += align - 1;
- offset &= ~(align - 1);
+ offset = ALIGN_TO (offset, align);
slot = offset;
offset += size;
}
#define MONO_IS_REAL_MOVE(ins) (((ins)->opcode == OP_MOVE) || ((ins)->opcode == OP_FMOVE) || ((ins)->opcode == OP_XMOVE) || ((ins)->opcode == OP_RMOVE))
#define MONO_IS_ZERO(ins) (((ins)->opcode == OP_VZERO) || ((ins)->opcode == OP_XZERO))
-#ifdef TARGET_ARM64
-/*
- * SIMD is only supported on arm64 when using the LLVM backend. When not using
- * the LLVM backend, treat SIMD datatypes as regular value types.
- */
-#define MONO_CLASS_IS_SIMD(cfg, klass) (((cfg)->opt & MONO_OPT_SIMD) && COMPILE_LLVM (cfg) && m_class_is_simd_type (klass))
-#else
#define MONO_CLASS_IS_SIMD(cfg, klass) (((cfg)->opt & MONO_OPT_SIMD) && m_class_is_simd_type (klass) && (COMPILE_LLVM (cfg) || mono_type_size (m_class_get_byval_arg (klass), NULL) == 16))
-#endif
#else
static MonoInst*
emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
{
+#if defined(TARGET_AMD64) || defined(TARGET_WASM)
if (!COMPILE_LLVM (cfg))
return NULL;
-
+#endif
+// FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64
#ifdef TARGET_ARM64
if (!(cfg->compile_aot && cfg->full_aot && !cfg->interp))
return NULL;
if (!strcmp (m_class_get_name (cfg->method->klass), "Vector256"))
return NULL; // TODO: Fix Vector256.WithUpper/WithLower
-
+
+// FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64
+#ifdef TARGET_ARM64
+ if (!COMPILE_LLVM (cfg)) {
+ if (id != SN_Add)
+ return NULL;
+ MonoClass *arg0_class = mono_class_from_mono_type_internal (fsig->params [0]);
+ int class_size = mono_class_value_size (arg0_class, NULL);
+ if (class_size != 16)
+ return NULL;
+ }
+#endif
+
MonoClass *klass = cmethod->klass;
MonoTypeEnum arg0_type = fsig->param_count > 0 ? get_underlying_type (fsig->params [0]) : MONO_TYPE_VOID;
#else
return NULL;
#endif
-}
+ }
case SN_Add:
case SN_BitwiseAnd:
case SN_BitwiseOr:
SN_get_AllBitsSet,
SN_get_Count,
SN_get_IsSupported,
+ SN_get_One,
SN_get_Zero,
SN_op_Addition,
SN_op_BitwiseAnd,
break;
}
+#if defined(TARGET_AMD64) || defined(TARGET_WASM)
if (!COMPILE_LLVM (cfg))
return NULL;
+#endif
+
+// FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64
+#ifdef TARGET_ARM64
+ if (!COMPILE_LLVM (cfg)) {
+ if (size != 16)
+ return NULL;
+ if (!(id == SN_get_One || id == SN_get_Zero))
+ return NULL;
+ }
+#endif
switch (id) {
case SN_get_Count: {
case SN_get_AllBitsSet: {
return emit_xones (cfg, klass);
}
+ case SN_get_One: {
+ if (size != 16)
+ return NULL;
+ switch (etype->type) {
+ case MONO_TYPE_I1:
+ case MONO_TYPE_U1: {
+ guint8 value[16];
+
+ for (int i = 0; i < len; ++i) {
+ value [i] = 1;
+ }
+
+ return emit_xconst_v128 (cfg, klass, value);
+ }
+ case MONO_TYPE_I2:
+ case MONO_TYPE_U2: {
+ guint16 value[8];
+
+ for (int i = 0; i < len; ++i) {
+ value [i] = 1;
+ }
+
+ return emit_xconst_v128 (cfg, klass, (guint8*)value);
+ }
+#if TARGET_SIZEOF_VOID_P == 4
+ case MONO_TYPE_I:
+ case MONO_TYPE_U:
+#endif
+ case MONO_TYPE_I4:
+ case MONO_TYPE_U4: {
+ guint32 value[4];
+
+ for (int i = 0; i < len; ++i) {
+ value [i] = 1;
+ }
+
+ return emit_xconst_v128 (cfg, klass, (guint8*)value);
+ }
+#if TARGET_SIZEOF_VOID_P == 8
+ case MONO_TYPE_I:
+ case MONO_TYPE_U:
+#endif
+ case MONO_TYPE_I8:
+ case MONO_TYPE_U8: {
+ guint64 value[2];
+
+ for (int i = 0; i < len; ++i) {
+ value [i] = 1;
+ }
+
+ return emit_xconst_v128 (cfg, klass, (guint8*)value);
+ }
+ case MONO_TYPE_R4: {
+ float value[4];
+
+ for (int i = 0; i < len; ++i) {
+ value [i] = 1.0f;
+ }
+
+ return emit_xconst_v128 (cfg, klass, (guint8*)value);
+ }
+ case MONO_TYPE_R8: {
+ double value[2];
+
+ for (int i = 0; i < len; ++i) {
+ value [i] = 1.0;
+ }
+
+ return emit_xconst_v128 (cfg, klass, (guint8*)value);
+ }
+ default:
+ g_assert_not_reached ();
+ }
+ }
case SN_op_Addition:
case SN_op_BitwiseAnd:
case SN_op_BitwiseOr: