- Enable full float32 support (#56111)
authorNeale Ferguson <neale@sinenomine.net>
Thu, 22 Jul 2021 09:02:56 +0000 (19:02 +1000)
committerGitHub <noreply@github.com>
Thu, 22 Jul 2021 09:02:56 +0000 (11:02 +0200)
-- Add instructions for FP operations: abs[f]/ceil[f]/floor[f]/round[f]/sqrt[f]/trunc[f]
-- Enable these instructions in mono_arch_emit_inst_for_method()
-- Handle return of float32 results
-- Correct rounding mode for OP_LCONV_TO_R_UN processing
- Enable MONO_OPT_LINEAR optimizations
-- Correct prolog processing of structure returned variable
- Rework OP_LOCALLOC for cases where alloc size > 4k
- Add OP_POPCNTxx support
- Minor typo (missing tab)

src/mono/mono/arch/s390x/s390x-codegen.h
src/mono/mono/mini/cpu-s390x.md
src/mono/mono/mini/mini-s390x.c
src/mono/mono/mini/mini-s390x.h

index e779f86..ec9562a 100644 (file)
@@ -896,11 +896,18 @@ typedef struct {
 
 #define S390_RIE_4(c,opc,g1,i2,m3) do                          \
 {                                                              \
-       s390_emit16(c, ((opc & 0xff00) | (g1) << 4)           \
+       s390_emit16(c, ((opc & 0xff00) | (g1) << 4));           \
        s390_emit16(c, (i2));                                   \
        s390_emit16(c, ((m3) << 12 | (opc & 0xff)));            \
 } while (0)
 
+#define S390_RIE_6(c,opc,g1,g2,i3,i4,i5) do                    \
+{                                                              \
+       s390_emit16(c, ((opc & 0xff00) | ((g1) << 4) | g2));    \
+       s390_emit16(c, ((i3) << 8) | i4);                       \
+       s390_emit16(c, ((i5) << 8 | (opc & 0xff)));             \
+} while (0)
+
 #define S390_RIL_1(c,opc,g1,m2) do                                     \
 {                                                                      \
        s390_emit16(c, ((opc >> 4) << 8 | (g1) << 4 | (opc & 0xf)));    \
@@ -918,17 +925,24 @@ typedef struct {
        s390_emit16(c, ((opc, & 0xff00) | (r1) << 4) | (r2));   \
        s390_emit16(c, ((b) << 12) | (d));                      \
        s390_emit16(c, ((i) << 4) | ((opc) & 0xff));            \
-}
+} while (0)
 
 #define S390_RRS(c,opc,r1,r2,m3,b,d) do                                \
 {                                                              \
        s390_emit16(c, ((opc, & 0xff00) | (r1) << 4) | (r2));   \
        s390_emit16(c, ((b) << 12) | (d));                      \
        s390_emit16(c, ((m3) << 12) | ((opc) & 0xff));          \
-}
+} while (0)
 
 #define S390_SI(c,opc,s1,p1,m2)                s390_emit32(c, (opc << 24 | (m2) << 16 | (s1) << 12 | ((p1) & 0xfff)));
 
+#define S390_SIL(c, opc, b, d, i) do                           \
+{                                                              \
+       s390_emit16(c, opc);                                    \
+       s390_emit16(c, ((b) << 12) | ((d) & 0x0fff));           \
+       s390_emit16(c, ((i) & 0xffff));                         \
+} while (0)
+
 #define S390_SIY(c,opc,s1,p1,m2) do                            \
 {                                                              \
        s390_emit16(c, ((opc & 0xff00) | m2));                  \
@@ -1242,7 +1256,7 @@ typedef struct {
 #define s390_clgij(c, r, i, b)         S390_RIE_3(c, 0xec7d, r, i, m, d)
 #define s390_clgr(c, r1, r2)           S390_RRE(c, 0xb921, r1, r2)
 #define s390_clgdbr(c, r1, m3, r2, m4) S390_RRF_4(c, 0xb3ad, r1, m3, r2, m4)
-#define s390_clgebr(c, r1, m3, r2, m4) S390_RRF_4(c, 0xb39c, r1, m3, r2, m4)
+#define s390_clgebr(c, r1, m3, r2, m4) S390_RRF_4(c, 0xb3ac, r1, m3, r2, m4)
 #define s390_clgrj(c, r1, r2, m, v)    S390_RIE_2(c, 0xec65, r1, r2, m, v)
 #define s390_clgrb(c, r1, r2, m3, b, d)        S390_RRS(c, 0xece5, r1, r2, m3, b, d)
 #define s390_cli(c, b, d, v)           S390_SI(c, 0x95, b, d, v)
@@ -1257,6 +1271,7 @@ typedef struct {
 #define s390_crl(c, r, v)              S390_RIL_1(c, 0xc6d, r, v)
 #define s390_crt(c, r1, r2, m3)                S390_RRF_2(c, 0xb972, r1, r2, m3);
 #define s390_cgrt(c, r1, r2, m3)       S390_RRF_2(c, 0xb960, r1, r2, m3);
+#define s390_cpsdr(c, r1, r2, r3)      S390_RRF_2(c, 0xb372, r1, r2, r3);
 #define s390_cs(c, r1, r2, b, d)       S390_RX(c, 0xba, r1, r2, b, d)
 #define s390_csg(c, r1, r2, b, d)      S390_RSY_1(c, 0xeb30, r1, r2, b, d)
 #define s390_csst(c, d1, b1, d2, b2, r)        S390_SSF(c, 0xc82, b1, d1, b2, d2, r)
@@ -1272,6 +1287,9 @@ typedef struct {
 #define s390_dsgfr(c, r1, r2)          S390_RRE(c, 0xb91d, r1, r2)
 #define s390_dsgr(c, r1, r2)           S390_RRE(c, 0xb90d, r1, r2)
 #define s390_ear(c, r1, r2)            S390_RRE(c, 0xb24f, r1, r2)
+#define s390_fidbra(c, r1, m3, r2, m4) S390_RRF_4(c, 0xb35f, r1, m3, r2, m4)
+#define s390_fiebra(c, r1, m3, r2, m4) S390_RRF_4(c, 0xb357, r1, m3, r2, m4)
+#define s390_flogr(c, r1, r2)          S390_RRE(c, 0xb983, r1, r2)
 #define s390_ic(c, r, x, b, d)         S390_RX(c, 0x43, r, x, b, d)
 #define s390_icm(c, r, m, b, d)                S390_RX(c, 0xbf, r, m, b, d)
 #define s390_icmy(c, r, x, b, d)       S390_RXY(c, 0xeb81, r, x, b, d)
@@ -1388,6 +1406,19 @@ typedef struct {
 #define s390_lnebr(c, r1, r2)          S390_RRE(c, 0xb301, r1, r2)
 #define s390_lngr(c, r1, r2)           S390_RRE(c, 0xb901, r1, r2)
 #define s390_lnr(c, r1, r2)            S390_RR(c, 0x11, r1, r2)
+#define s390_loc(c, r, m, b, d)                S390_RSY_2(c, 0xebf2, r, m, b, d)
+#define s390_locg(c, r, m, b, d)       S390_RSY_2(c, 0xebe2, r, m, b, d)
+#define s390_locr(c, r1, m, r2)                S390_RRF_2(c, 0xb9f2, r1, m, r2)
+#define s390_locgr(c, r1, m, r2)       S390_RRF_2(c, 0xb9e2, r1, m, r2)
+#define s390_locfh(c, r, n, b, d)      S390_RRF_2(c, 0xebe0, r, m, b, d)
+#define s390_locfhr(c, r1, m, r2)      S390_RRF_2(c, 0xb9e0, r1, m, r2)
+#define s390_lpdbr(c, r1, r2)          S390_RRE(c, 0xb310, r1, r2)
+#define s390_lpebr(c, r1, r2)          S390_RRE(c, 0xb300, r1, r2)
+#define s390_lpgr(c, r1, r2)           S390_RRE(c, 0xb900, r1, r2)
+#define s390_lpr(c, r1, r2)            S390_RR(c, 0x10, r1, r2)
+#define s390_lr(c, r1, r2)             S390_RR(c, 0x18, r1, r2)
+#define s390_lrl(c, r1, d)             S390_RIL_1(c, 0xc4d, r1, d)
+#define s390_lt(c, r, x, b, d)         S390_RXY(c, 0xe312, r, x, b, d)
 #define s390_lpdbr(c, r1, r2)          S390_RRE(c, 0xb310, r1, r2)
 #define s390_lpgr(c, r1, r2)           S390_RRE(c, 0xb900, r1, r2)
 #define s390_lpr(c, r1, r2)            S390_RR(c, 0x10, r1, r2)
@@ -1419,6 +1450,7 @@ typedef struct {
 #define s390_msr(c, r1, r2)            S390_RRE(c, 0xb252, r1, r2)
 #define s390_msrkc(c, r1, r2, r3)      S390_RRF_1(c, 0xb9fd, r1, r2, r3)
 #define s390_mvc(c, l, b1, d1, b2, d2) S390_SS_1(c, 0xd2, l, b1, d1, b2, d2)
+#define s390_mvghi(c, b1, d1, i2)      S390_SIL(c, 0xe548, b1, d1, i2) 
 #define s390_mvcl(c, r1, r2)           S390_RR(c, 0x0e, r1, r2)
 #define s390_mvcle(c, r1, r3, d2, b2)  S390_RS_1(c, 0xa8, r1, r3, d2, b2)
 #define s390_mvi(c, b, d, v)           S390_SI(c, 0x92, b, d, v)
@@ -1439,7 +1471,7 @@ typedef struct {
 #define s390_mem(c)                    S390_RR(c, 0x07, 0xe, 0)
 #define s390_nr(c, r1, r2)             S390_RR(c, 0x14, r1, r2)
 #define s390_nrk(c, r1, r2)            S390_RRF_1(c, 0xb9f4, r1, r2)
-#define s390_ny(c, r, x, b, d)         S390_RRY(c, 0xe354, r1, r2)
+#define s390_ny(c, r, x, b, d)         S390_RXY(c, 0xe354, r, x, b, d)
 #define s390_o(c, r, x, b, d)          S390_RX(c, 0x56, r, x, b, d)
 #define s390_oihf(c, r, v)             S390_RIL_1(c, 0xc0c, r, v)
 #define s390_oihh(c, r, v)             S390_RI(c, 0xa58, r, v)
@@ -1452,6 +1484,9 @@ typedef struct {
 #define s390_ogr(c, r1, r2)            S390_RRE(c, 0xb981, r1, r2)
 #define s390_ogrk(c, r1, r2, r3)       S390_RRF_1(c, 0xb9e6, r1, r2, r3)
 #define s390_or(c, r1, r2)             S390_RR(c, 0x16, r1, r2)
+#define s390_oy(c, r, x, b, d)         S390_RXY(c, 0xe356, r, x, b, d)
+#define s390_popcnt(c, r1, m, r2)      S390_RRF_2(c, 0xb9e1, r1, m, r2)
+#define s390_risbg(c,r1,r2,i3,i4,i5)   S390_RIE_6(c, 0xec55, r1, r2, i3, i4, i5)
 #define s390_s(c, r, x, b, d)          S390_RX(c, 0x5b, r, x, b, d)
 #define s390_sdb(c, r, x, b, d)                S390_RXE(c, 0xed1b, r, x, b, d)
 #define s390_sdbr(c, r1, r2)           S390_RRE(c, 0xb31b, r1, r2)
index 2af9ffc..2c896b9 100644 (file)
@@ -147,11 +147,13 @@ r4_conv_to_i4: dest:i src1:f len:16
 r4_conv_to_u4: dest:i src1:f len:32
 r4_conv_to_i8: dest:i src1:f len:32
 r4_conv_to_r8: dest:f src1:f len:17
+r4_conv_to_u8: dest:i src1:f len:17
 r4_conv_to_r4: dest:f src1:f len:17
 r4_add: dest:f src1:f src2:f clob:1 len:5
 r4_sub: dest:f src1:f src2:f clob:1 len:5
 r4_mul: dest:f src1:f src2:f clob:1 len:5
 r4_div: dest:f src1:f src2:f clob:1 len:5
+r4_rem: dest:f src1:f src2:f clob:1 len:12
 r4_neg: dest:f src1:f clob:1 len:23
 r4_ceq: dest:i src1:f src2:f len:35
 r4_cgt: dest:i src1:f src2:f len:35
@@ -187,11 +189,13 @@ loadu1_membase: dest:i src1:b len:30
 loadu2_membase: dest:i src1:b len:30
 loadu4_mem: dest:i len:8
 loadu4_membase: dest:i src1:b len:30
-localloc: dest:i src1:i len:110
+localloc: dest:i src1:i len:180
 memory_barrier: len:10
 move: dest:i src1:i len:4
 mul_imm: dest:i src1:i len:24
 nop: len:4
+popcnt32: dest:i src1:i len:38
+popcnt64: dest:i src1:i len:34
 relaxed_nop: len:4
 arglist: src1:i len:28
 bigmul: len:2 dest:i src1:a src2:i
@@ -218,7 +222,18 @@ zext_i4: dest:i src1:i len:4
 shl_imm: dest:i src1:i len:10
 shr_imm: dest:i src1:i len:10
 shr_un_imm: dest:i src1:i len:10
+abs: dest:f src1:f len:4
+absf: dest:f src1:f len:4
+ceil: dest:f src1:f len:4
+ceilf: dest:f src1:f len:4
+floor: dest:f src1:f len:4
+floorf: dest:f src1:f len:4
+round: dest:f src1:f len:4
 sqrt: dest:f src1:f len:4
+sqrtf: dest:f src1:f len:4
+trunc: dest:f src1:f len:4
+truncf: dest:f src1:f len:4
+fcopysign: dest:f src1:f src2:f len:4
 start_handler: len:26
 store_membase_imm: dest:b len:46
 store_membase_reg: dest:b src1:i len:26
index bc4e9b2..4f0a400 100644 (file)
@@ -406,6 +406,9 @@ static const char *typeParm[] = { "General", "Base", "FPR8", "FPR4", "StructByVa
 
 /*====================== End of Global Variables ===================*/
 
+static GENERATE_TRY_GET_CLASS_WITH_CACHE (math, "System", "Math")
+static GENERATE_TRY_GET_CLASS_WITH_CACHE (mathf, "System", "MathF")
+
 /**
  *  
  * @brief Return general register name
@@ -834,7 +837,7 @@ mono_arch_cpu_optimizations (guint32 *exclude_mask)
        /*
          * No s390-specific optimizations yet
         */
-       *exclude_mask = MONO_OPT_LINEARS;
+       *exclude_mask = 0;
        return opts;
 }
 
@@ -1141,8 +1144,6 @@ enum_retvalue:
                        cinfo->struct_ret = 1;
                        cinfo->ret.size   = size;
                        cinfo->ret.vtsize = size;
-                       // cinfo->ret.reg = s390_r2;
-                       // sz->code_size += 4;
            }
                        break;
                case MONO_TYPE_VOID:
@@ -1361,23 +1362,14 @@ enum_retvalue:
                add_general (&gr, sz, &cinfo->sigCookie);
        }
 
-       /*----------------------------------------------------------*/
-       /* If we are passing a structure back then if it won't be   */
-       /* in a register(s) then we make room at the end of the     */
-       /* parameters that may have been placed on the stack        */
-       /*----------------------------------------------------------*/
+       /*
+        * If we are passing a structure back then we make room at
+        * the end of the parameters that may have been placed on 
+        * the stack
+        */
        if (cinfo->struct_ret) {
                cinfo->ret.offset = sz->stack_size;
-               switch (cinfo->ret.size) {
-               case 0:
-               case 1:
-               case 2:
-               case 4:
-               case 8:
-                       break;
-               default:
-                       sz->stack_size   += S390_ALIGN(cinfo->ret.size, align);
-               }
+               sz->stack_size   += S390_ALIGN(cinfo->ret.size, align);
        }
 
        cinfo->lastgr   = gr;
@@ -1436,7 +1428,9 @@ mono_arch_allocate_vars (MonoCompile *cfg)
 
        sig   = mono_method_signature_internal (cfg->method);
        
-       cinfo = get_call_info (cfg->mempool, sig);
+       if (!cfg->arch.cinfo)
+               cfg->arch.cinfo = get_call_info (cfg->mempool, sig);
+       cinfo = cfg->arch.cinfo;
 
        /*--------------------------------------------------------------*/
        /* local vars are at a positive offset from the stack pointer   */
@@ -1663,8 +1657,13 @@ void
 mono_arch_create_vars (MonoCompile *cfg)
 {
        MonoMethodSignature *sig = mono_method_signature_internal (cfg->method);
+       CallInfo *cinfo;
 
-       if (MONO_TYPE_ISSTRUCT (sig->ret)) {
+       if (!cfg->arch.cinfo)
+               cfg->arch.cinfo = get_call_info (cfg->mempool, sig);
+       cinfo = cfg->arch.cinfo;
+
+       if (cinfo->struct_ret) {
                cfg->vret_addr = mono_compile_create_var (cfg, mono_get_int_type (), OP_ARG);
                if (G_UNLIKELY (cfg->verbose_level > 1)) {
                        printf ("vret_addr = ");
@@ -3446,6 +3445,13 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        EMIT_COND_SYSTEM_EXCEPTION (S390_CC_LT, "OverflowException");
                        s390_lgfr (code, ins->dreg, ins->sreg1);
                        break;
+               case OP_RCONV_TO_R4:
+                       if (ins->dreg != ins->sreg1)
+                               s390_ler (code, ins->dreg, ins->sreg1);
+                       break;
+               case OP_RCONV_TO_R8:
+                       s390_ldebr (code, ins->dreg, ins->sreg1);
+                       break;
                case OP_FMOVE:
                        if (ins->dreg != ins->sreg1) {
                                s390_ldr   (code, ins->dreg, ins->sreg1);
@@ -3476,6 +3482,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_S390_SETF4RET:
                        if (!cfg->r4fp)
                                s390_ledbr (code, ins->dreg, ins->sreg1);
+                       else
+                               s390_ldr (code, ins->dreg, ins->sreg1);
                        break;
                case OP_TLS_GET: {
                        if (s390_is_imm16 (ins->inst_offset)) {
@@ -3513,7 +3521,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_TAILCALL :
                case OP_TAILCALL_REG :
                case OP_TAILCALL_MEMBASE : {
-                       MonoCallInst *call = (MonoCallInst *) ins;
+                       call = (MonoCallInst *) ins;
 
                        /*
                         * Restore SP to caller's SP
@@ -3592,55 +3600,72 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                }
                        break;
                case OP_FCALL: {
-                       MonoCallInst *call = (MonoCallInst *) ins;
+                       call = (MonoCallInst *) ins;
                        const MonoJumpInfoTarget patch = mono_call_to_patch (call);
                        code = emit_call (cfg, code, patch.type, patch.target);
                        if (!cfg->r4fp && call->signature->ret->type == MONO_TYPE_R4)
                                s390_ldebr (code, s390_f0, s390_f0);
                }
                        break;
+               case OP_RCALL: {
+                       call = (MonoCallInst *) ins;
+                       const MonoJumpInfoTarget patch = mono_call_to_patch (call);
+                       code = emit_call (cfg, code, patch.type, patch.target);
+                       if (ins->dreg != s390_f0)
+                               s390_ldr (code, ins->dreg, s390_f0);
+                       break;
+               }
                case OP_LCALL:
                case OP_VCALL:
                case OP_VCALL2:
                case OP_VOIDCALL:
-               case OP_RCALL:
                case OP_CALL: {
-                       MonoCallInst *call = (MonoCallInst *) ins;
+                       call = (MonoCallInst *) ins;
                        const MonoJumpInfoTarget patch = mono_call_to_patch (call);
                        code = emit_call (cfg, code, patch.type, patch.target);
                }
                        break;
-               case OP_FCALL_REG: {
+               case OP_FCALL_REG:
                        call = (MonoCallInst*)ins;
                        s390_lgr  (code, s390_r1, ins->sreg1);
                        s390_basr (code, s390_r14, s390_r1);
                        if (!cfg->r4fp && call->signature->ret->type == MONO_TYPE_R4)
                                s390_ldebr (code, s390_f0, s390_f0);
-               }
                        break;
+               case OP_RCALL_REG:
+                       call = (MonoCallInst*)ins;
+                       s390_lgr  (code, s390_r1, ins->sreg1);
+                       s390_basr (code, s390_r14, s390_r1);
+                       if (ins->dreg != s390_f0)
+                               s390_ldr (code, ins->dreg, s390_f0);
+                        break;
                case OP_LCALL_REG:
                case OP_VCALL_REG:
                case OP_VCALL2_REG:
                case OP_VOIDCALL_REG:
-               case OP_RCALL_REG:
                case OP_CALL_REG: {
                        s390_lgr  (code, s390_r1, ins->sreg1);
                        s390_basr (code, s390_r14, s390_r1);
                }
                        break;
-               case OP_FCALL_MEMBASE: {
+               case OP_FCALL_MEMBASE:
                        call = (MonoCallInst*)ins;
                        s390_lg   (code, s390_r1, 0, ins->sreg1, ins->inst_offset);
                        s390_basr (code, s390_r14, s390_r1);
                        if (!cfg->r4fp && call->signature->ret->type == MONO_TYPE_R4)
                                s390_ldebr (code, s390_f0, s390_f0);
-               }
+                       break;
+               case OP_RCALL_MEMBASE:
+                       call = (MonoCallInst*)ins;
+                       s390_lg   (code, s390_r1, 0, ins->sreg1, ins->inst_offset);
+                       s390_basr (code, s390_r14, s390_r1);
+                       if (ins->dreg != s390_f0)
+                               s390_ldr (code, ins->dreg, s390_f0);
                        break;
                case OP_LCALL_MEMBASE:
                case OP_VCALL_MEMBASE:
                case OP_VCALL2_MEMBASE:
                case OP_VOIDCALL_MEMBASE:
-               case OP_RCALL_MEMBASE:
                case OP_CALL_MEMBASE: {
                        s390_lg   (code, s390_r1, 0, ins->sreg1, ins->inst_offset);
                        s390_basr (code, s390_r14, s390_r1);
@@ -3656,53 +3681,99 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 
                        area_offset = S390_ALIGN(area_offset, S390_STACK_ALIGNMENT);
 
-                        /*
-                         * Get alloc size and round to doubleword
-                         */  
-                       s390_lgr  (code, s390_r1, ins->sreg1);
-                       s390_aghi (code, s390_r1, 14);
+                       /* Get current backchain pointer */
+                       s390_lg  (code, s390_r13, 0, STK_BASE, 0);
+
+                       /*
+                        * Round object size to doubleword 
+                        */
+                       s390_lgr (code, s390_r1, ins->sreg1);
+                       s390_aghi (code, s390_r1, 7);
                        s390_srlg (code, s390_r1, s390_r1, 0, 3);
                        s390_sllg (code, s390_r1, s390_r1, 0, 3);
 
-                        /*
-                         * If we need to initialize then hold on to the length
-                         */ 
-                       if (ins->flags & MONO_INST_INIT) 
-                                s390_lgr  (code, s390_r0, s390_r1);
+                       if (mono_hwcap_s390x_has_gie) {
+                               if (ins->flags & MONO_INST_INIT)
+                                       s390_lgr  (code, s390_r0, s390_r1);     
+
+                               s390_lgr (code, s390_r0, s390_r1);
+                               s390_risbg (code, s390_r2, s390_r1, 0, 0xb3, 0);
+                               s390_sgrk (code, s390_r2, STK_BASE, s390_r2);
+
+                               s390_cgr (code, STK_BASE, s390_r2);                     /* L0: */
+                               s390_je (code, 9);                                      /* je L1 */
+                               s390_aghi (code, STK_BASE, -4096);
+                               s390_mvghi (code, s390_r15, 0, 0);
+                               s390_j (code, -9);                                      /* j L0 */
+
+                               s390_risbg (code, s390_r2, s390_r1, 0x34, 0xbf, 0);     /* L1: */
+                               s390_ltgr (code, s390_r2, s390_r2);
+                               s390_jz (code, 13);                                     /* jz L2: */
+
+                               s390_sgr (code, STK_BASE, s390_r2);
+                               s390_risbg (code, s390_r1, s390_r1, 0x34, 0xbf, 0);
+                               s390_lay (code, s390_r1, s390_r1, STK_BASE, -8);
+                               s390_mvghi (code, s390_r1, 0, 0);
+                                                                                       /* L2: */
+                       } else {
+                               s390_lgr (code, s390_r2, s390_r1);
+                               s390_nill (code, s390_r2, 0xf000);
+                               s390_lgr (code, s390_r0, STK_BASE);
+                               s390_sgr (code, s390_r0, s390_r2);
+                               s390_lgr (code, s390_r2, s390_r0);
+
+                               s390_cgr (code, STK_BASE, s390_r2);                     /* L0: */
+                               s390_je (code, 11);                                     /* je L1 */
+                               s390_aghi (code, STK_BASE, -4096);
+                               s390_lghi (code, s390_r0, 0);
+                               s390_stg (code, s390_r0, 0, STK_BASE, 4088);
+                               s390_j (code, -11);                                     /* j L0 */
+                                
+                               s390_lghi (code, s390_r2, 4095);                        /* L1: */
+                               s390_ngr (code, s390_r2, s390_r1);
+                               s390_ltgr (code, s390_r2, s390_r2);
+                               s390_jz (code, 7);                                      /* jz L2 */
+
+                               s390_sgr (code, STK_BASE, s390_r2);
+                               s390_stg (code, s390_r2, s390_r1, STK_BASE, -8);
+                                                                                       /* L2: */
+                               if (ins->flags & MONO_INST_INIT)
+                                       s390_lgr  (code, s390_r0, s390_r1);     
+                       }
 
-                        /*
-                         * Adjust the stack pointer and save the backchain
-                         */ 
-                       s390_lg   (code, s390_r13, 0, STK_BASE, 0);
-                       s390_sgr  (code, STK_BASE, s390_r1);
-                       s390_stg  (code, s390_r13, 0, STK_BASE, 0);
+                       /* 
+                        * Compute address of localloc'd object 
+                        */
+                       s390_lgr (code, s390_r1, STK_BASE);
+                       if (s390_is_imm16(area_offset))
+                               s390_aghi (code, s390_r1, area_offset);
+                       else
+                               s390_agfi (code, s390_r1, area_offset);
+                       s390_aghi (code, s390_r1, 7);
+                       s390_srlg (code, s390_r1, s390_r1, 0, 3);
+                       s390_sllg (code, s390_r1, s390_r1, 0, 3);
+                       s390_lgr (code, ins->dreg, s390_r1);
 
-                        /*
-                         * Skip the stack save requirements and point to localloc area 
-                         * and ensure it's correctly aligned
-                         */
-                       s390_la   (code, ins->dreg, 0, STK_BASE, area_offset);
-                       s390_aghi (code, ins->dreg, 7);
-                       s390_srlg (code, ins->dreg, ins->dreg, 0, 3);
-                       s390_sllg (code, ins->dreg, ins->dreg, 0, 3);
+                       /* Save backchain pointer */
+                       s390_stg (code, s390_r13, 0, STK_BASE, 0);
 
                         /*
                          * If we need to zero the area then clear from localloc start
                          * using the length we saved earlier
-                         */ 
-                       if (ins->flags & MONO_INST_INIT) {
-                               s390_lgr  (code, s390_r1, s390_r0);
-                               s390_lgr  (code, s390_r0, ins->dreg);
-                               s390_lgr  (code, s390_r14, s390_r12);
-                               s390_lghi (code, s390_r13, 0);
-                               s390_mvcle(code, s390_r0, s390_r12, 0, 0);
-                               s390_jo   (code, -2);
-                               s390_lgr  (code, s390_r12, s390_r14);
-                       }
-
-                        /*
-                         * If we have an LMF then we have to adjust its BP 
                          */
+                        if (ins->flags & MONO_INST_INIT) {
+                                s390_lgr  (code, s390_r1, s390_r0);
+                                s390_lgr  (code, s390_r0, ins->dreg);
+                                s390_lgr  (code, s390_r14, s390_r12);
+                                s390_lghi (code, s390_r13, 0);
+                                s390_mvcle(code, s390_r0, s390_r12, 0, 0);
+                                s390_jo   (code, -2);
+                                s390_lgr  (code, s390_r12, s390_r14);
+                        }
+
+                       /*
+                       * If we have an LMF then we have to adjust its BP
+                       */
                        if (cfg->method->save_lmf) {
                                int lmfOffset = cfg->stack_usage - sizeof(MonoLMF);
 
@@ -3714,7 +3785,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                        S390_SET  (code, s390_r13, lmfOffset);
                                }
                                s390_stg (code, s390_r15, s390_r13, cfg->frame_reg,
-                                          MONO_STRUCT_OFFSET(MonoLMF, ebp));
+                                         MONO_STRUCT_OFFSET(MonoLMF, ebp));
                        }
                }
                        break;
@@ -4085,9 +4156,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        } else {
                                S390_SET (code, s390_r13, ins->inst_p0);
                                s390_le (code, ins->dreg, 0, s390_r13, 0);
-                               if (!cfg->r4fp) {
+                               if (!cfg->r4fp)
                                        s390_ldebr (code, ins->dreg, ins->dreg);
-                               }
+                               else
+                                       s390_le (code, ins->dreg, 0, s390_r13, 0);
                        }
                }
                        break;
@@ -4135,7 +4207,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
                case OP_LCONV_TO_R_UN: {
                        if (mono_hwcap_s390x_has_fpe) {
-                               s390_cdlgbr (code, ins->dreg, 5, ins->sreg1, 0);
+                               s390_cdlgbr (code, ins->dreg, 6, ins->sreg1, 0);
                        } else {
                                short int *jump;
                                s390_lgdr  (code, s390_r0, s390_r15);
@@ -4214,7 +4286,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_FCONV_TO_U4:
                case OP_FCONV_TO_U:
                        if (mono_hwcap_s390x_has_fpe) {
-                               s390_clfdbr (code, ins->dreg, 5, ins->sreg1, 0);
+                               s390_clgdbr (code, ins->dreg, 5, ins->sreg1, 0);
                        } else {
                                code = emit_double_to_int (cfg, code, ins->dreg, ins->sreg1, 4, FALSE);
                        }
@@ -4265,11 +4337,11 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;
                case OP_RCONV_TO_I4:
                case OP_RCONV_TO_I:
-                       s390_cfebr (code, ins->dreg, 5, ins->sreg1);
+                       s390_cgebr (code, ins->dreg, 5, ins->sreg1);
                        break;
                case OP_RCONV_TO_U4:
                        if (mono_hwcap_s390x_has_fpe) {
-                               s390_clfebr (code, ins->dreg, 5, ins->sreg1, 0);
+                               s390_clgebr (code, ins->dreg, 5, ins->sreg1, 0);
                        } else {
                                code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 4, FALSE);
                        }
@@ -4308,14 +4380,42 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        PTRSLOT(code, o[4]);
                }
                        break;
-               case OP_ABS: {
+               case OP_ABS:
                        s390_lpdbr (code, ins->dreg, ins->sreg1);
-               }
                        break;
-               case OP_SQRT: {
+                case OP_ABSF:
+                        s390_lpebr (code, ins->dreg, ins->sreg1);
+                        break;
+                case OP_CEIL:
+                        s390_fidbra (code, ins->dreg, 6, ins->sreg1, 4);
+                        break;
+                case OP_CEILF:
+                        s390_fiebra (code, ins->dreg, 6, ins->sreg1, 4);
+                        break;
+                case OP_FLOOR:
+                        s390_fidbra (code, ins->dreg, 7, ins->sreg1, 4);
+                        break;
+                case OP_FLOORF:
+                        s390_fiebra (code, ins->dreg, 7, ins->sreg1, 4);
+                        break;
+                case OP_FCOPYSIGN:
+                        s390_cpsdr (code, ins->dreg, ins->sreg2, ins->sreg1);
+                        break;
+                case OP_ROUND:
+                        s390_fidbra (code, ins->dreg, 4, ins->sreg1, 4);
+                        break;
+               case OP_SQRT:
                        s390_sqdbr (code, ins->dreg, ins->sreg1);
-               }
                        break;
+                case OP_SQRTF:
+                        s390_sqebr (code, ins->dreg, ins->sreg1);
+                        break;
+                case OP_TRUNC:
+                        s390_fidbra (code, ins->dreg, 5, ins->sreg1, 4);
+                        break;
+                case OP_TRUNCF:
+                        s390_fiebra (code, ins->dreg, 5, ins->sreg1, 4);
+                        break;
                case OP_FADD: {
                        CHECK_SRCDST_COM_F;
                        s390_adbr (code, ins->dreg, src2);
@@ -4433,56 +4533,80 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                }
                        break;
                case OP_RCEQ: {
-                       s390_cdbr  (code, ins->sreg1, ins->sreg2);
+                        if (cfg->r4fp)
+                                s390_cebr  (code, ins->sreg1, ins->sreg2);
+                        else
+                                s390_cdbr  (code, ins->sreg1, ins->sreg2);
                        s390_lghi  (code, ins->dreg, 1);
                        s390_je    (code, 4);
                        s390_lghi  (code, ins->dreg, 0);
                }
                        break;
                case OP_RCLT: {
-                       s390_cdbr  (code, ins->sreg1, ins->sreg2);
+                        if (cfg->r4fp)
+                                s390_cebr  (code, ins->sreg1, ins->sreg2);
+                        else
+                                s390_cdbr  (code, ins->sreg1, ins->sreg2);
                        s390_lghi  (code, ins->dreg, 1);
                        s390_jl    (code, 4);
                        s390_lghi  (code, ins->dreg, 0);
                }
                        break;
                case OP_RCLT_UN: {
-                       s390_cdbr  (code, ins->sreg1, ins->sreg2);
+                        if (cfg->r4fp)
+                                s390_cebr  (code, ins->sreg1, ins->sreg2);
+                        else
+                                s390_cdbr  (code, ins->sreg1, ins->sreg2);
                        s390_lghi  (code, ins->dreg, 1);
                        s390_jlo   (code, 4);
                        s390_lghi  (code, ins->dreg, 0);
                }
                        break;
                case OP_RCGT: {
-                       s390_cdbr  (code, ins->sreg1, ins->sreg2);
+                        if (cfg->r4fp)
+                                s390_cebr  (code, ins->sreg1, ins->sreg2);
+                        else
+                                s390_cdbr  (code, ins->sreg1, ins->sreg2);
                        s390_lghi  (code, ins->dreg, 1);
                        s390_jh    (code, 4);
                        s390_lghi  (code, ins->dreg, 0);
                }
                        break;
                case OP_RCGT_UN: {
-                       s390_cdbr  (code, ins->sreg1, ins->sreg2);
+                        if (cfg->r4fp)
+                                s390_cebr  (code, ins->sreg1, ins->sreg2);
+                        else
+                                s390_cdbr  (code, ins->sreg1, ins->sreg2);
                        s390_lghi  (code, ins->dreg, 1);
                        s390_jho   (code, 4);
                        s390_lghi  (code, ins->dreg, 0);
                }
                        break;
                case OP_RCNEQ: {
-                       s390_cdbr  (code, ins->sreg1, ins->sreg2);
+                        if (cfg->r4fp)
+                                s390_cebr  (code, ins->sreg1, ins->sreg2);
+                        else
+                                s390_cdbr  (code, ins->sreg1, ins->sreg2);
                        s390_lghi  (code, ins->dreg, 1);
                        s390_jne   (code, 4);
                        s390_lghi  (code, ins->dreg, 0);
                }
                        break;
                case OP_RCGE: {
-                       s390_cdbr  (code, ins->sreg1, ins->sreg2);
+                        if (cfg->r4fp)
+                                s390_cebr  (code, ins->sreg1, ins->sreg2);
+                        else
+                                s390_cdbr  (code, ins->sreg1, ins->sreg2);
                        s390_lghi  (code, ins->dreg, 1);
                        s390_jhe   (code, 4);
                        s390_lghi  (code, ins->dreg, 0);
                }
                        break;
                case OP_RCLE: {
-                       s390_cebr  (code, ins->sreg1, ins->sreg2);
+                        if (cfg->r4fp)
+                                s390_cebr  (code, ins->sreg1, ins->sreg2);
+                        else
+                                s390_cdbr  (code, ins->sreg1, ins->sreg2);
                        s390_lghi  (code, ins->dreg, 1);
                        s390_jle   (code, 4);
                        s390_lghi  (code, ins->dreg, 0);
@@ -4655,6 +4779,32 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                case OP_MEMORY_BARRIER:
                        s390_mem (code);
                        break;
+                case OP_POPCNT32:
+                        s390_llgfr (code, s390_r1, ins->sreg1);
+                        if (mono_hwcap_s390x_has_mie3) {
+                                s390_popcnt (code, ins->dreg, 0x80, s390_r1);
+                        } else {
+                                s390_popcnt (code, s390_r0, 0, s390_r1);
+                                s390_ahhlr  (code, s390_r0, s390_r0, s390_r0);
+                                s390_sllg   (code, s390_r1, s390_r0, 0, 16);
+                                s390_algr   (code, s390_r0, s390_r1);
+                                s390_sllg   (code, s390_r1, s390_r0, 0, 8);
+                                s390_algr   (code, s390_r0, s390_r1);
+                                s390_srlg   (code, ins->dreg, s390_r0, 0, 56);
+                        }
+                        break;
+                case OP_POPCNT64:
+                        if (mono_hwcap_s390x_has_mie3) {
+                                s390_popcnt (code, ins->dreg, 0x80, ins->sreg1);
+                        } else {
+                                s390_ahhlr  (code, s390_r0, s390_r0, s390_r0);
+                                s390_sllg   (code, s390_r1, s390_r0, 0, 16);
+                                s390_algr   (code, s390_r0, s390_r1);
+                                s390_sllg   (code, s390_r1, s390_r0, 0, 8);
+                                s390_algr   (code, s390_r0, s390_r1);
+                                s390_srlg   (code, ins->dreg, s390_r0, 0, 56);
+                        }
+                        break;
                case OP_LIVERANGE_START: {
                        if (cfg->verbose_level > 1)
                                printf ("R%d START=0x%x\n", MONO_VARINFO (cfg, ins->inst_c0)->vreg, (int)(code - cfg->native_code));
@@ -5131,17 +5281,19 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        }
                        break;
                case OP_EXTRACT_I1:
+               case OP_EXTRACT_U1:
                        amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 4);
                        if (ins->inst_c0)
                                amd64_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_c0 * 8);
-                       amd64_widen_reg (code, ins->dreg, ins->dreg, ins->inst_c1 == MONO_TYPE_I1, FALSE);
+                       amd64_widen_reg (code, ins->dreg, ins->dreg, ins->inst_c1 == OP_EXTRACT_I1, FALSE);
                        break;
                case OP_EXTRACT_I2:
+               case OP_EXTRACT_U2:
                        /*amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 4);
                        if (ins->inst_c0)
                                amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, 16, 4);*/
                        s390x_pextrw_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);
-                       amd64_widen_reg_size (code, ins->dreg, ins->dreg, ins->inst_c1 == MONO_TYPE_I2, TRUE, 4);
+                       amd64_widen_reg_size (code, ins->dreg, ins->dreg, ins->inst_c1 == OP_EXTRACT_I2, TRUE, 4);
                        break;
                case OP_EXTRACT_R8:
                        if (ins->inst_c0)
@@ -5554,15 +5706,19 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                           cfg->rgctx_var->inst_offset);
        }
 
-#if 0
+#if 1
+char *methodName = getenv("MONO_TRACE_METHOD");
+if (methodName != NULL) {
 printf("ns: %s k: %s m: %s\n",method->klass->name_space,method->klass->name,method->name);fflush(stdout);
 // Tests:set_ip
-if ((strcmp(method->klass->name_space,"") == 0) && 
-    (strcmp(method->klass->name,"Tests") == 0) &&
-    (strcmp(method->name, "set_ip") == 0)) {
-    // (strcmp("CancellationToken,TaskCreationOptions,TaskContinuationOptions,TaskScheduler",mono_signature_get_desc(method->signature, FALSE)) != 0))  {
- printf("SIGNATURE: %s\n",mono_signature_get_desc(method->signature, FALSE)); fflush(stdout);
- s390_j (code, 0);
+//if ((strcmp(method->klass->name_space,"") == 0) && 
+//    (strcmp(method->klass->name,"Tests") == 0) &&
+//    (strcmp(method->name, "set_ip") == 0)) {
+// (strcmp("CancellationToken,TaskCreationOptions,TaskContinuationOptions,TaskScheduler",mono_signature_get_desc(method->signature, FALSE)) != 0))  {
+   if ((strcmp(method->name, methodName) == 0)) {
+       printf("SIGNATURE: %s\n",mono_signature_get_desc(method->signature, FALSE)); fflush(stdout);
+       s390_j (code, 0);
+   }
 }
 #endif
 
@@ -5583,13 +5739,16 @@ if ((strcmp(method->klass->name_space,"") == 0) &&
        sig = mono_method_signature_internal (method);
        pos = 0;
 
-       cinfo = get_call_info (cfg->mempool, sig);
+       cinfo = cfg->arch.cinfo;
 
        if (cinfo->struct_ret) {
                ArgInfo *ainfo     = &cinfo->ret;
                inst               = cfg->vret_addr;
                inst->backend.size = ainfo->vtsize;
-               s390_stg (code, ainfo->reg, 0, inst->inst_basereg, inst->inst_offset);
+               if (inst->opcode == OP_REGVAR)
+                       s390_lgr (code, inst->dreg, ainfo->reg);
+               else
+                       s390_stg (code, ainfo->reg, 0, inst->inst_basereg, inst->inst_offset);
        }
 
        /**
@@ -6067,6 +6226,98 @@ mono_arch_emit_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMetho
 {
        MonoInst *ins = NULL;
 
+       int opcode = 0;
+       MonoStackType stack_type = STACK_R8;
+
+       if (cmethod->klass == mono_class_try_get_math_class ()) {
+               // unary double
+               if (fsig->param_count == 1 && fsig->params [0]->type == MONO_TYPE_R8) {
+                       if (strcmp (cmethod->name, "Abs") == 0) {
+                               opcode = OP_ABS;
+                       } else if (strcmp (cmethod->name, "Ceiling") == 0) {
+                               opcode = OP_CEIL;
+                       } else if (strcmp (cmethod->name, "Floor") == 0) {
+                               opcode = OP_FLOOR;
+                       } else if (strcmp (cmethod->name, "Round") == 0) {
+                               opcode = OP_ROUND;
+                       } else if (strcmp (cmethod->name, "Sqrt") == 0) {
+                               opcode = OP_SQRT;
+                       } else if (strcmp (cmethod->name, "Truncate") == 0) {
+                               opcode = OP_TRUNC;
+                       }
+               }
+               // unary float (overloaded)
+               else if (fsig->param_count == 1 && fsig->params [0]->type == MONO_TYPE_R4) {
+                       if (strcmp (cmethod->name, "Abs") == 0) {
+                               if (cfg->r4fp) {
+                                       opcode = OP_ABSF;
+                                       stack_type = STACK_R4;
+                               } else {
+                                       opcode = OP_ABS;
+                               }
+                       }
+               }
+               // binary double
+               else if (fsig->param_count == 2 && fsig->params [0]->type == MONO_TYPE_R8 && fsig->params [1]->type == MONO_TYPE_R8) {
+                       if (strcmp (cmethod->name, "CopySign") == 0) {
+                               opcode = OP_FCOPYSIGN;
+                       }
+               }
+       } else if (cmethod->klass == mono_class_try_get_mathf_class ()) {
+               if (fsig->param_count == 1) {
+                       stack_type = STACK_R4;
+                       if (strcmp (cmethod->name, "Abs") == 0) {
+                               if (cfg->r4fp) {
+                                       opcode = OP_ABSF;
+                                       stack_type = STACK_R4;
+                               } else {
+                                       opcode = OP_ABS;
+                               }
+                       } else if (strcmp (cmethod->name, "Ceiling") == 0) {
+                               if (cfg->r4fp) {
+                                       opcode = OP_CEILF;
+                                       stack_type = STACK_R4;
+                               } else {
+                                       opcode = OP_CEIL;
+                               }
+                       } else if (strcmp (cmethod->name, "Floor") == 0) {
+                               if (cfg->r4fp) {
+                                       opcode = OP_FLOORF;
+                                       stack_type = STACK_R4;
+                               } else {
+                                       opcode = OP_FLOOR;
+                               }
+                       } else if (strcmp (cmethod->name, "Sqrt") == 0) {
+                               if (cfg->r4fp) {
+                                       opcode = OP_SQRTF;
+                                       stack_type = STACK_R4;
+                               } else {
+                                       opcode = OP_SQRT;
+                               }
+                       } else if (strcmp (cmethod->name, "Truncate") == 0) {
+                               if (cfg->r4fp) {
+                                       opcode = OP_TRUNCF;
+                                       stack_type = STACK_R4;
+                               } else {
+                                       opcode = OP_TRUNC;
+                               }
+                               opcode = OP_TRUNCF;
+                       }
+               }
+       }
+
+       if (opcode) {
+               MONO_INST_NEW (cfg, ins, opcode);
+               ins->type = stack_type;
+               ins->dreg = mono_alloc_freg (cfg);
+               ins->sreg1 = args [0]->dreg;
+               if (fsig->param_count > 1) {
+                       ins->sreg2 = args [1]->dreg;
+               }
+               g_assert (fsig->param_count <= 2);
+               MONO_ADD_INS (cfg->cbb, ins);
+       }
+
        return ins;
 }
 
index d7d3566..9a1aee5 100644 (file)
@@ -38,6 +38,7 @@ typedef struct MonoCompileArch {
        int         fpSize;             /** Size of floating point save area */
        MonoInst    *ss_tramp_var;      /** Single-step variable */
        MonoInst    *bp_tramp_var;      /** Breakpoint variable */
+       CallInfo    *cinfo;             /** Caller information */
        guint8      *thunks;            /** Thunking area */
        int         thunks_size;        /** Size of thunking area */
 } MonoCompileArch;
@@ -57,7 +58,6 @@ struct SeqPointInfo {
 };
 
 #define MONO_ARCH_SIGSEGV_ON_ALTSTACK                  1
-#define MONO_ARCH_EMULATE_LCONV_TO_R8_UN               1
 #define MONO_ARCH_NO_EMULATE_LONG_MUL_OPTS             1
 #define MONO_ARCH_NO_EMULATE_LONG_SHIFT_OPS            1
 #define MONO_ARCH_HAVE_IS_INT_OVERFLOW                 1
@@ -83,6 +83,7 @@ struct SeqPointInfo {
 #define MONO_ARCH_HAVE_SDB_TRAMPOLINES                 1
 #define MONO_ARCH_HAVE_SETUP_RESUME_FROM_SIGNAL_HANDLER_CTX    1
 #define MONO_ARCH_HAVE_UNWIND_BACKTRACE                1
+#define MONO_ARCH_FLOAT32_SUPPORTED                    1
 
 #define S390_STACK_ALIGNMENT            8
 #define S390_FIRST_ARG_REG             s390_r2