#define arm_neon_and(p, width, rd, rn, rm) arm_neon_3svec_opcode ((p), (width), 0b0, 0b00, 0b00011, (rd), (rn), (rm))
#define arm_neon_orr(p, width, rd, rn, rm) arm_neon_3svec_opcode ((p), (width), 0b0, 0b10, 0b00011, (rd), (rn), (rm))
#define arm_neon_eor(p, width, rd, rn, rm) arm_neon_3svec_opcode ((p), (width), 0b1, 0b00, 0b00011, (rd), (rn), (rm))
+#define arm_neon_bif(p, width, rd, rn, rm) arm_neon_3svec_opcode ((p), (width), 0b1, 0b11, 0b00011, (rd), (rn), (rm))
// Specific macros:
#define arm_neon_shadd_8b(p, rd, rn, rm) arm_neon_3svec_opcode ((p), VREG_LOW, 0b0, SIZE_1, 0b00000, (rd), (rn), (rm))
#define arm_neon_bit_8b(p, rd, rn, rm) arm_neon_3svec_opcode ((p), VREG_LOW, 0b1, 0b10, 0b00011, (rd), (rn), (rm))
#define arm_neon_bit_16b(p, rd, rn, rm) arm_neon_3svec_opcode ((p), VREG_FULL, 0b1, 0b10, 0b00011, (rd), (rn), (rm))
-#define arm_neon_bif_8b(p, rd, rn, rm) arm_neon_3svec_opcode ((p), VREG_LOW, 0b1, 0b11, 0b00011, (rd), (rn), (rm))
-#define arm_neon_bif_16b(p, rd, rn, rm) arm_neon_3svec_opcode ((p), VREG_FULL, 0b1, 0b11, 0b00011, (rd), (rn), (rm))
-
-
/* NEON :: modified immediate */
#define arm_neon_mimm_opcode(p, q, op, cmode, o2, imm, rd) arm_neon_opcode_1reg ((p), (q), 0b00001111000000000000010000000000 | (op) << 29 | (cmode) << 12 | (o2) << 11 | (imm & 0b11100000) << 11 | (imm & 0b11111) << 5, (rd))
storei2_memindex: dest:b src1:i src2:i len:4
storei4_memindex: dest:b src1:i src2:i len:4
load_membase: dest:i src1:b len:20
-loadx_membase: dest:x src1:b len:12
+loadx_membase: dest:x src1:b len:16
loadi1_membase: dest:i src1:b len:32
loadu1_membase: dest:i src1:b len:32
loadi2_membase: dest:i src1:b len:32
atomic_store_u8: dest:b src1:i len:20
atomic_store_r4: dest:b src1:f len:28
atomic_store_r8: dest:b src1:f len:24
-xbinop: dest:x src1:x src2:x len:4
+xbinop: dest:x src1:x src2:x len:8 clob:1
xzero: dest:x len:4
xmove: dest:x src1:x len:4
xconst: dest:x len:10
#define OPFMT_WTDSS _w, _t, dreg, sreg1, sreg2
#define OPFMT_WTDSS_REV _w, _t, dreg, sreg2, sreg1
#define _UNDEF(...) g_assert_not_reached ()
+#define _SKIP(...) goto manual_instruction_emit
#define SIMD_OP_CODE(reg_w, op, c) ((reg_w << 31) | (op) << 16 | (c))
#define VREG_64 VREG_LOW
#define VREG_128 VREG_FULL
#define FP_TEMP_REG ARMREG_D16
#define FP_TEMP_REG2 ARMREG_D17
+#define NEON_TMP_REG FP_TEMP_REG
#define THUNK_SIZE (4 * 4)
return code;
}
+static WARN_UNUSED_RESULT guint8*
+emit_smax_i8 (guint8 *code, int width, int type, int rd, int rn, int rm)
+{
+ g_assert (rd == rn);
+ if (rn != rm) {
+ arm_neon_cmgt (code, width, type, NEON_TMP_REG, rn, rm);
+ arm_neon_bif (code, width, rd, rm, NEON_TMP_REG);
+ }
+ return code;
+}
+
+static WARN_UNUSED_RESULT guint8*
+emit_umax_i8 (guint8 *code, int width, int type, int rd, int rn, int rm)
+{
+ g_assert (rd == rn);
+ if (rn != rm) {
+ arm_neon_cmhi (code, width, type, NEON_TMP_REG, rn, rm);
+ arm_neon_bif (code, width, rd, rm, NEON_TMP_REG);
+ }
+ return code;
+}
+
+static WARN_UNUSED_RESULT guint8*
+emit_smin_i8 (guint8 *code, int width, int type, int rd, int rn, int rm)
+{
+ g_assert (rd == rn);
+ if (rn != rm) {
+ arm_neon_cmgt (code, width, type, NEON_TMP_REG, rm, rn);
+ arm_neon_bif (code, width, rd, rm, NEON_TMP_REG);
+ }
+ return code;
+}
+
+static WARN_UNUSED_RESULT guint8*
+emit_umin_i8 (guint8 *code, int width, int type, int rd, int rn, int rm)
+{
+ g_assert (rd == rn);
+ if (rn != rm) {
+ arm_neon_cmhi (code, width, type, NEON_TMP_REG, rm, rn);
+ arm_neon_bif (code, width, rd, rm, NEON_TMP_REG);
+ }
+ return code;
+}
+
guint8*
mono_arm_emit_ldrx (guint8 *code, int rt, int rn, int imm)
{
goto after_instruction_emit;
}
-
+
+ manual_instruction_emit:
switch (ins->opcode) {
case OP_ICONST:
code = emit_imm (code, dreg, ins->inst_c0);
/* SIMD that is not table-generated */
/* TODO: once https://github.com/dotnet/runtime/issues/83252 is done,
- * move these to the codegen table in simd-arm64.h
+ * move the following two to the codegen table in simd-arm64.h
*/
case OP_ONES_COMPLEMENT:
arm_neon_not (code, get_vector_size_macro (ins), dreg, sreg1);
arm_neon_neg (code, get_vector_size_macro (ins), get_type_size_macro (ins->inst_c1), dreg, sreg1);
}
break;
+ case OP_XBINOP:
+ switch (ins->inst_c0) {
+ case OP_IMAX:
+ code = emit_smax_i8 (code, get_vector_size_macro (ins), get_type_size_macro (ins->inst_c1), dreg, sreg1, sreg2);
+ break;
+ case OP_IMAX_UN:
+ code = emit_umax_i8 (code, get_vector_size_macro (ins), get_type_size_macro (ins->inst_c1), dreg, sreg1, sreg2);
+ break;
+ case OP_IMIN:
+ code = emit_smin_i8 (code, get_vector_size_macro (ins), get_type_size_macro (ins->inst_c1), dreg, sreg1, sreg2);
+ break;
+ case OP_IMIN_UN:
+ code = emit_umin_i8 (code, get_vector_size_macro (ins), get_type_size_macro (ins->inst_c1), dreg, sreg1, sreg2);
+ break;
+ default:
+ g_assert_not_reached ();
+ }
+ break;
case OP_XZERO:
arm_neon_eor_16b (code, dreg, dreg, dreg);
break;
break;
}
case OP_IMAX:
- case OP_IMIN: {
+ case OP_IMIN:
+ case OP_IMAX_UN:
+ case OP_IMIN_UN: {
gboolean is_unsigned = ins->inst_c1 == MONO_TYPE_U1 || ins->inst_c1 == MONO_TYPE_U2 || ins->inst_c1 == MONO_TYPE_U4 || ins->inst_c1 == MONO_TYPE_U8;
LLVMIntPredicate op;
switch (ins->inst_c0) {
case OP_IMIN:
op = is_unsigned ? LLVMIntULT : LLVMIntSLT;
break;
+ case OP_IMAX_UN:
+ op = LLVMIntUGT;
+ break;
+ case OP_IMIN_UN:
+ op = LLVMIntULT;
+ break;
default:
g_assert_not_reached ();
}
case OP_IMIN:
iid = is_unsigned ? INTRINS_AARCH64_ADV_SIMD_UMIN : INTRINS_AARCH64_ADV_SIMD_SMIN;
break;
+ case OP_IMAX_UN:
+ iid = INTRINS_AARCH64_ADV_SIMD_UMAX;
+ break;
+ case OP_IMIN_UN:
+ iid = INTRINS_AARCH64_ADV_SIMD_UMIN;
+ break;
default:
g_assert_not_reached ();
}
SIMD_OP (128, OP_XBINOP, OP_FADD, WTDSS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fadd, arm_neon_fadd)
SIMD_OP (128, OP_XBINOP, OP_ISUB, WTDSS, arm_neon_sub, arm_neon_sub, arm_neon_sub, arm_neon_sub, _UNDEF, _UNDEF)
SIMD_OP (128, OP_XBINOP, OP_FSUB, WTDSS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fsub, arm_neon_fsub)
+SIMD_OP (128, OP_XBINOP, OP_IMAX, WTDSS, arm_neon_smax, arm_neon_smax, arm_neon_smax, _SKIP, _UNDEF, _UNDEF)
+SIMD_OP (128, OP_XBINOP, OP_IMAX_UN, WTDSS, arm_neon_umax, arm_neon_umax, arm_neon_umax, _SKIP, _UNDEF, _UNDEF)
+SIMD_OP (128, OP_XBINOP, OP_FMAX, WTDSS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fmax, arm_neon_fmax)
+SIMD_OP (128, OP_XBINOP, OP_IMIN, WTDSS, arm_neon_smin, arm_neon_smin, arm_neon_smin, _SKIP, _UNDEF, _UNDEF)
+SIMD_OP (128, OP_XBINOP, OP_IMIN_UN, WTDSS, arm_neon_umin, arm_neon_umin, arm_neon_umin, _SKIP, _UNDEF, _UNDEF)
+SIMD_OP (128, OP_XBINOP, OP_FMIN, WTDSS, _UNDEF, _UNDEF, _UNDEF, _UNDEF, arm_neon_fmin, arm_neon_fmin)
SIMD_OP (128, OP_XBINOP_FORCEINT, XBINOP_FORCEINT_AND, WDSS, arm_neon_and, arm_neon_and, arm_neon_and, arm_neon_and, arm_neon_and, arm_neon_and)
SIMD_OP (128, OP_XBINOP_FORCEINT, XBINOP_FORCEINT_OR, WDSS, arm_neon_orr, arm_neon_orr, arm_neon_orr, arm_neon_orr, arm_neon_orr, arm_neon_orr)
SIMD_OP (128, OP_XBINOP_FORCEINT, XBINOP_FORCEINT_XOR, WDSS, arm_neon_eor, arm_neon_eor, arm_neon_eor, arm_neon_eor, arm_neon_eor, arm_neon_eor)
case SN_op_Division:
return NULL;
case SN_Max:
- instc0 = OP_IMAX;
+ instc0 = type_enum_is_unsigned (arg_type) ? OP_IMAX_UN : OP_IMAX;
break;
case SN_Min:
- instc0 = OP_IMIN;
+ instc0 = type_enum_is_unsigned (arg_type) ? OP_IMIN_UN : OP_IMIN;
break;
case SN_Multiply:
case SN_op_Multiply:
case SN_BitwiseAnd:
case SN_BitwiseOr:
case SN_Xor:
+ case SN_Max:
+ case SN_Min:
break;
default:
return NULL;