From: syeon.hwang Date: Thu, 12 Jul 2012 11:03:35 +0000 (+0900) Subject: [Title] Apply SSE2 instruction patch from QEMU 1.1 X-Git-Tag: Tizen_Studio_1.3_Release_p2.3.1~1528^2~121 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=68716c40571eca5b1691c2b17c03495ab514849c;p=sdk%2Femulator%2Fqemu.git [Title] Apply SSE2 instruction patch from QEMU 1.1 [Type] [Module] [Priority] [CQ#] [Redmine#] [Problem] [Cause] [Solution] [TestCase] --- diff --git a/target-i386/TODO b/target-i386/TODO index c8ada075d0..a8d69cf87f 100644 --- a/target-i386/TODO +++ b/target-i386/TODO @@ -15,7 +15,6 @@ Correctness issues: - DRx register support - CR0.AC emulation - SSE alignment checks -- fix SSE min/max with nans Optimizations/Features: diff --git a/target-i386/helper.h b/target-i386/helper.h index 6b518ad89f..761954e925 100644 --- a/target-i386/helper.h +++ b/target-i386/helper.h @@ -197,6 +197,7 @@ DEF_HELPER_2(lzcnt, tl, tl, int) /* MMX/SSE */ +DEF_HELPER_1(ldmxcsr, void, i32) DEF_HELPER_0(enter_mmx, void) DEF_HELPER_0(emms, void) DEF_HELPER_2(movq, void, ptr, ptr) diff --git a/target-i386/ops_sse.h b/target-i386/ops_sse.h index 47dde78f89..0d33ca1985 100644 --- a/target-i386/ops_sse.h +++ b/target-i386/ops_sse.h @@ -584,10 +584,15 @@ void helper_ ## name ## sd (Reg *d, Reg *s)\ #define FPU_SUB(size, a, b) float ## size ## _sub(a, b, &env->sse_status) #define FPU_MUL(size, a, b) float ## size ## _mul(a, b, &env->sse_status) #define FPU_DIV(size, a, b) float ## size ## _div(a, b, &env->sse_status) -#define FPU_MIN(size, a, b) (a) < (b) ? (a) : (b) -#define FPU_MAX(size, a, b) (a) > (b) ? (a) : (b) #define FPU_SQRT(size, a, b) float ## size ## _sqrt(b, &env->sse_status) +/* Note that the choice of comparison op here is important to get the + * special cases right: for min and max Intel specifies that (-0,0), + * (NaN, anything) and (anything, NaN) return the second argument. + */ +#define FPU_MIN(size, a, b) float ## size ## _lt(a, b, &env->sse_status) ? (a) : (b) +#define FPU_MAX(size, a, b) float ## size ## _lt(b, a, &env->sse_status) ? (a) : (b) + SSE_HELPER_S(add, FPU_ADD) SSE_HELPER_S(sub, FPU_SUB) SSE_HELPER_S(mul, FPU_MUL) @@ -1648,10 +1653,10 @@ void glue(helper_roundps, SUFFIX) (Reg *d, Reg *s, uint32_t mode) break; } - d->L(0) = float64_round_to_int(s->L(0), &env->sse_status); - d->L(1) = float64_round_to_int(s->L(1), &env->sse_status); - d->L(2) = float64_round_to_int(s->L(2), &env->sse_status); - d->L(3) = float64_round_to_int(s->L(3), &env->sse_status); + d->XMM_S(0) = float32_round_to_int(s->XMM_S(0), &env->sse_status); + d->XMM_S(1) = float32_round_to_int(s->XMM_S(1), &env->sse_status); + d->XMM_S(2) = float32_round_to_int(s->XMM_S(2), &env->sse_status); + d->XMM_S(3) = float32_round_to_int(s->XMM_S(3), &env->sse_status); #if 0 /* TODO */ if (mode & (1 << 3)) @@ -1684,8 +1689,8 @@ void glue(helper_roundpd, SUFFIX) (Reg *d, Reg *s, uint32_t mode) break; } - d->Q(0) = float64_round_to_int(s->Q(0), &env->sse_status); - d->Q(1) = float64_round_to_int(s->Q(1), &env->sse_status); + d->XMM_D(0) = float64_round_to_int(s->XMM_D(0), &env->sse_status); + d->XMM_D(1) = float64_round_to_int(s->XMM_D(1), &env->sse_status); #if 0 /* TODO */ if (mode & (1 << 3)) @@ -1718,7 +1723,7 @@ void glue(helper_roundss, SUFFIX) (Reg *d, Reg *s, uint32_t mode) break; } - d->L(0) = float64_round_to_int(s->L(0), &env->sse_status); + d->XMM_S(0) = float32_round_to_int(s->XMM_S(0), &env->sse_status); #if 0 /* TODO */ if (mode & (1 << 3)) @@ -1751,7 +1756,7 @@ void glue(helper_roundsd, SUFFIX) (Reg *d, Reg *s, uint32_t mode) break; } - d->Q(0) = float64_round_to_int(s->Q(0), &env->sse_status); + d->XMM_D(0) = float64_round_to_int(s->XMM_D(0), &env->sse_status); #if 0 /* TODO */ if (mode & (1 << 3)) @@ -1770,44 +1775,44 @@ SSE_HELPER_I(helper_pblendw, W, 8, FBLENDP) void glue(helper_dpps, SUFFIX) (Reg *d, Reg *s, uint32_t mask) { - float32 iresult = 0 /*float32_zero*/; + float32 iresult = float32_zero; if (mask & (1 << 4)) iresult = float32_add(iresult, - float32_mul(d->L(0), s->L(0), &env->sse_status), + float32_mul(d->XMM_S(0), s->XMM_S(0), &env->sse_status), &env->sse_status); if (mask & (1 << 5)) iresult = float32_add(iresult, - float32_mul(d->L(1), s->L(1), &env->sse_status), + float32_mul(d->XMM_S(1), s->XMM_S(1), &env->sse_status), &env->sse_status); if (mask & (1 << 6)) iresult = float32_add(iresult, - float32_mul(d->L(2), s->L(2), &env->sse_status), + float32_mul(d->XMM_S(2), s->XMM_S(2), &env->sse_status), &env->sse_status); if (mask & (1 << 7)) iresult = float32_add(iresult, - float32_mul(d->L(3), s->L(3), &env->sse_status), + float32_mul(d->XMM_S(3), s->XMM_S(3), &env->sse_status), &env->sse_status); - d->L(0) = (mask & (1 << 0)) ? iresult : 0 /*float32_zero*/; - d->L(1) = (mask & (1 << 1)) ? iresult : 0 /*float32_zero*/; - d->L(2) = (mask & (1 << 2)) ? iresult : 0 /*float32_zero*/; - d->L(3) = (mask & (1 << 3)) ? iresult : 0 /*float32_zero*/; + d->XMM_S(0) = (mask & (1 << 0)) ? iresult : float32_zero; + d->XMM_S(1) = (mask & (1 << 1)) ? iresult : float32_zero; + d->XMM_S(2) = (mask & (1 << 2)) ? iresult : float32_zero; + d->XMM_S(3) = (mask & (1 << 3)) ? iresult : float32_zero; } void glue(helper_dppd, SUFFIX) (Reg *d, Reg *s, uint32_t mask) { - float64 iresult = 0 /*float64_zero*/; + float64 iresult = float64_zero; if (mask & (1 << 4)) iresult = float64_add(iresult, - float64_mul(d->Q(0), s->Q(0), &env->sse_status), + float64_mul(d->XMM_D(0), s->XMM_D(0), &env->sse_status), &env->sse_status); if (mask & (1 << 5)) iresult = float64_add(iresult, - float64_mul(d->Q(1), s->Q(1), &env->sse_status), + float64_mul(d->XMM_D(1), s->XMM_D(1), &env->sse_status), &env->sse_status); - d->Q(0) = (mask & (1 << 0)) ? iresult : 0 /*float64_zero*/; - d->Q(1) = (mask & (1 << 1)) ? iresult : 0 /*float64_zero*/; + d->XMM_D(0) = (mask & (1 << 0)) ? iresult : float64_zero; + d->XMM_D(1) = (mask & (1 << 1)) ? iresult : float64_zero; } void glue(helper_mpsadbw, SUFFIX) (Reg *d, Reg *s, uint32_t offset) diff --git a/target-i386/translate.c b/target-i386/translate.c index 475614c31b..c97cf844fc 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -7545,7 +7545,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) gen_lea_modrm(s, modrm, ®_addr, &offset_addr); if (op == 2) { gen_op_ld_T0_A0(OT_LONG + s->mem_index); - tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, mxcsr)); + gen_helper_ldmxcsr(cpu_T[0]); } else { tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, mxcsr)); gen_op_st_T0_A0(OT_LONG + s->mem_index); diff --git a/tizen/src/hw/op_helper.c b/tizen/src/hw/op_helper.c index c89e4a49db..2aea71bf85 100755 --- a/tizen/src/hw/op_helper.c +++ b/tizen/src/hw/op_helper.c @@ -52,11 +52,11 @@ static inline target_long lshift(target_long x, int n) } } -#define RC_MASK 0xc00 -#define RC_NEAR 0x000 -#define RC_DOWN 0x400 -#define RC_UP 0x800 -#define RC_CHOP 0xc00 +#define FPU_RC_MASK 0xc00 +#define FPU_RC_NEAR 0x000 +#define FPU_RC_DOWN 0x400 +#define FPU_RC_UP 0x800 +#define FPU_RC_CHOP 0xc00 #define MAXTAN 9223372036854775808.0 @@ -4024,18 +4024,18 @@ static void update_fp_status(void) int rnd_type; /* set rounding mode */ - switch(env->fpuc & RC_MASK) { + switch(env->fpuc & FPU_RC_MASK) { default: - case RC_NEAR: + case FPU_RC_NEAR: rnd_type = float_round_nearest_even; break; - case RC_DOWN: + case FPU_RC_DOWN: rnd_type = float_round_down; break; - case RC_UP: + case FPU_RC_UP: rnd_type = float_round_up; break; - case RC_CHOP: + case FPU_RC_CHOP: rnd_type = float_round_to_zero; break; } @@ -5629,6 +5629,50 @@ void helper_vmexit(uint32_t exit_code, uint64_t exit_info_1) /* MMX/SSE */ /* XXX: optimize by storing fptt and fptags in the static cpu state */ + +#define SSE_DAZ 0x0040 +#define SSE_RC_MASK 0x6000 +#define SSE_RC_NEAR 0x0000 +#define SSE_RC_DOWN 0x2000 +#define SSE_RC_UP 0x4000 +#define SSE_RC_CHOP 0x6000 +#define SSE_FZ 0x8000 + +static void update_sse_status(void) +{ + int rnd_type; + + /* set rounding mode */ + switch(env->mxcsr & SSE_RC_MASK) { + default: + case SSE_RC_NEAR: + rnd_type = float_round_nearest_even; + break; + case SSE_RC_DOWN: + rnd_type = float_round_down; + break; + case SSE_RC_UP: + rnd_type = float_round_up; + break; + case SSE_RC_CHOP: + rnd_type = float_round_to_zero; + break; + } + set_float_rounding_mode(rnd_type, &env->sse_status); + + /* set denormals are zero */ + set_flush_inputs_to_zero((env->mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status); + + /* set flush to zero */ + set_flush_to_zero((env->mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status); +} + +void helper_ldmxcsr(uint32_t val) +{ + env->mxcsr = val; + update_sse_status(); +} + void helper_enter_mmx(void) { env->fpstt = 0;