#define FPU_SUB(size, a, b) float ## size ## _sub(a, b, &env->sse_status)
#define FPU_MUL(size, a, b) float ## size ## _mul(a, b, &env->sse_status)
#define FPU_DIV(size, a, b) float ## size ## _div(a, b, &env->sse_status)
-#define FPU_MIN(size, a, b) (a) < (b) ? (a) : (b)
-#define FPU_MAX(size, a, b) (a) > (b) ? (a) : (b)
#define FPU_SQRT(size, a, b) float ## size ## _sqrt(b, &env->sse_status)
+/* Note that the choice of comparison op here is important to get the
+ * special cases right: for min and max Intel specifies that (-0,0),
+ * (NaN, anything) and (anything, NaN) return the second argument.
+ */
+#define FPU_MIN(size, a, b) float ## size ## _lt(a, b, &env->sse_status) ? (a) : (b)
+#define FPU_MAX(size, a, b) float ## size ## _lt(b, a, &env->sse_status) ? (a) : (b)
+
SSE_HELPER_S(add, FPU_ADD)
SSE_HELPER_S(sub, FPU_SUB)
SSE_HELPER_S(mul, FPU_MUL)
break;
}
- d->L(0) = float64_round_to_int(s->L(0), &env->sse_status);
- d->L(1) = float64_round_to_int(s->L(1), &env->sse_status);
- d->L(2) = float64_round_to_int(s->L(2), &env->sse_status);
- d->L(3) = float64_round_to_int(s->L(3), &env->sse_status);
+ d->XMM_S(0) = float32_round_to_int(s->XMM_S(0), &env->sse_status);
+ d->XMM_S(1) = float32_round_to_int(s->XMM_S(1), &env->sse_status);
+ d->XMM_S(2) = float32_round_to_int(s->XMM_S(2), &env->sse_status);
+ d->XMM_S(3) = float32_round_to_int(s->XMM_S(3), &env->sse_status);
#if 0 /* TODO */
if (mode & (1 << 3))
break;
}
- d->Q(0) = float64_round_to_int(s->Q(0), &env->sse_status);
- d->Q(1) = float64_round_to_int(s->Q(1), &env->sse_status);
+ d->XMM_D(0) = float64_round_to_int(s->XMM_D(0), &env->sse_status);
+ d->XMM_D(1) = float64_round_to_int(s->XMM_D(1), &env->sse_status);
#if 0 /* TODO */
if (mode & (1 << 3))
break;
}
- d->L(0) = float64_round_to_int(s->L(0), &env->sse_status);
+ d->XMM_S(0) = float32_round_to_int(s->XMM_S(0), &env->sse_status);
#if 0 /* TODO */
if (mode & (1 << 3))
break;
}
- d->Q(0) = float64_round_to_int(s->Q(0), &env->sse_status);
+ d->XMM_D(0) = float64_round_to_int(s->XMM_D(0), &env->sse_status);
#if 0 /* TODO */
if (mode & (1 << 3))
void glue(helper_dpps, SUFFIX) (Reg *d, Reg *s, uint32_t mask)
{
- float32 iresult = 0 /*float32_zero*/;
+ float32 iresult = float32_zero;
if (mask & (1 << 4))
iresult = float32_add(iresult,
- float32_mul(d->L(0), s->L(0), &env->sse_status),
+ float32_mul(d->XMM_S(0), s->XMM_S(0), &env->sse_status),
&env->sse_status);
if (mask & (1 << 5))
iresult = float32_add(iresult,
- float32_mul(d->L(1), s->L(1), &env->sse_status),
+ float32_mul(d->XMM_S(1), s->XMM_S(1), &env->sse_status),
&env->sse_status);
if (mask & (1 << 6))
iresult = float32_add(iresult,
- float32_mul(d->L(2), s->L(2), &env->sse_status),
+ float32_mul(d->XMM_S(2), s->XMM_S(2), &env->sse_status),
&env->sse_status);
if (mask & (1 << 7))
iresult = float32_add(iresult,
- float32_mul(d->L(3), s->L(3), &env->sse_status),
+ float32_mul(d->XMM_S(3), s->XMM_S(3), &env->sse_status),
&env->sse_status);
- d->L(0) = (mask & (1 << 0)) ? iresult : 0 /*float32_zero*/;
- d->L(1) = (mask & (1 << 1)) ? iresult : 0 /*float32_zero*/;
- d->L(2) = (mask & (1 << 2)) ? iresult : 0 /*float32_zero*/;
- d->L(3) = (mask & (1 << 3)) ? iresult : 0 /*float32_zero*/;
+ d->XMM_S(0) = (mask & (1 << 0)) ? iresult : float32_zero;
+ d->XMM_S(1) = (mask & (1 << 1)) ? iresult : float32_zero;
+ d->XMM_S(2) = (mask & (1 << 2)) ? iresult : float32_zero;
+ d->XMM_S(3) = (mask & (1 << 3)) ? iresult : float32_zero;
}
void glue(helper_dppd, SUFFIX) (Reg *d, Reg *s, uint32_t mask)
{
- float64 iresult = 0 /*float64_zero*/;
+ float64 iresult = float64_zero;
if (mask & (1 << 4))
iresult = float64_add(iresult,
- float64_mul(d->Q(0), s->Q(0), &env->sse_status),
+ float64_mul(d->XMM_D(0), s->XMM_D(0), &env->sse_status),
&env->sse_status);
if (mask & (1 << 5))
iresult = float64_add(iresult,
- float64_mul(d->Q(1), s->Q(1), &env->sse_status),
+ float64_mul(d->XMM_D(1), s->XMM_D(1), &env->sse_status),
&env->sse_status);
- d->Q(0) = (mask & (1 << 0)) ? iresult : 0 /*float64_zero*/;
- d->Q(1) = (mask & (1 << 1)) ? iresult : 0 /*float64_zero*/;
+ d->XMM_D(0) = (mask & (1 << 0)) ? iresult : float64_zero;
+ d->XMM_D(1) = (mask & (1 << 1)) ? iresult : float64_zero;
}
void glue(helper_mpsadbw, SUFFIX) (Reg *d, Reg *s, uint32_t offset)
}
}
-#define RC_MASK 0xc00
-#define RC_NEAR 0x000
-#define RC_DOWN 0x400
-#define RC_UP 0x800
-#define RC_CHOP 0xc00
+#define FPU_RC_MASK 0xc00
+#define FPU_RC_NEAR 0x000
+#define FPU_RC_DOWN 0x400
+#define FPU_RC_UP 0x800
+#define FPU_RC_CHOP 0xc00
#define MAXTAN 9223372036854775808.0
int rnd_type;
/* set rounding mode */
- switch(env->fpuc & RC_MASK) {
+ switch(env->fpuc & FPU_RC_MASK) {
default:
- case RC_NEAR:
+ case FPU_RC_NEAR:
rnd_type = float_round_nearest_even;
break;
- case RC_DOWN:
+ case FPU_RC_DOWN:
rnd_type = float_round_down;
break;
- case RC_UP:
+ case FPU_RC_UP:
rnd_type = float_round_up;
break;
- case RC_CHOP:
+ case FPU_RC_CHOP:
rnd_type = float_round_to_zero;
break;
}
/* MMX/SSE */
/* XXX: optimize by storing fptt and fptags in the static cpu state */
+
+#define SSE_DAZ 0x0040
+#define SSE_RC_MASK 0x6000
+#define SSE_RC_NEAR 0x0000
+#define SSE_RC_DOWN 0x2000
+#define SSE_RC_UP 0x4000
+#define SSE_RC_CHOP 0x6000
+#define SSE_FZ 0x8000
+
+static void update_sse_status(void)
+{
+ int rnd_type;
+
+ /* set rounding mode */
+ switch(env->mxcsr & SSE_RC_MASK) {
+ default:
+ case SSE_RC_NEAR:
+ rnd_type = float_round_nearest_even;
+ break;
+ case SSE_RC_DOWN:
+ rnd_type = float_round_down;
+ break;
+ case SSE_RC_UP:
+ rnd_type = float_round_up;
+ break;
+ case SSE_RC_CHOP:
+ rnd_type = float_round_to_zero;
+ break;
+ }
+ set_float_rounding_mode(rnd_type, &env->sse_status);
+
+ /* set denormals are zero */
+ set_flush_inputs_to_zero((env->mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
+
+ /* set flush to zero */
+ set_flush_to_zero((env->mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
+}
+
+void helper_ldmxcsr(uint32_t val)
+{
+ env->mxcsr = val;
+ update_sse_status();
+}
+
void helper_enter_mmx(void)
{
env->fpstt = 0;