SSE3 support (Joachim Henke)
authorbellard <bellard@c046a42c-6fe2-441c-8c8c-71466251a162>
Sun, 23 Apr 2006 21:54:01 +0000 (21:54 +0000)
committerbellard <bellard@c046a42c-6fe2-441c-8c8c-71466251a162>
Sun, 23 Apr 2006 21:54:01 +0000 (21:54 +0000)
git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@1839 c046a42c-6fe2-441c-8c8c-71466251a162

target-i386/cpu.h
target-i386/exec.h
target-i386/helper2.c
target-i386/op.c
target-i386/translate.c

index eca3993..2f23617 100644 (file)
 #define CPUID_SSE  (1 << 25)
 #define CPUID_SSE2 (1 << 26)
 
-#define CPUID_EXT_SS     (1 << 0)
+#define CPUID_EXT_SSE3     (1 << 0)
 #define CPUID_EXT_MONITOR  (1 << 3)
 #define CPUID_EXT_CX16     (1 << 13)
 
index 61ba3cd..8299af4 100644 (file)
@@ -260,6 +260,8 @@ static inline void stfl(target_ulong ptr, float v)
 /* use long double functions */
 #define floatx_to_int32 floatx80_to_int32
 #define floatx_to_int64 floatx80_to_int64
+#define floatx_to_int32_round_to_zero floatx80_to_int32_round_to_zero
+#define floatx_to_int64_round_to_zero floatx80_to_int64_round_to_zero
 #define floatx_abs floatx80_abs
 #define floatx_chs floatx80_chs
 #define floatx_round_to_int floatx80_round_to_int
@@ -278,6 +280,8 @@ static inline void stfl(target_ulong ptr, float v)
 #else
 #define floatx_to_int32 float64_to_int32
 #define floatx_to_int64 float64_to_int64
+#define floatx_to_int32_round_to_zero float64_to_int32_round_to_zero
+#define floatx_to_int64_round_to_zero float64_to_int64_round_to_zero
 #define floatx_abs float64_abs
 #define floatx_chs float64_chs
 #define floatx_round_to_int float64_round_to_int
index cb896cb..5195fa3 100644 (file)
@@ -108,7 +108,7 @@ CPUX86State *cpu_x86_init(void)
                                CPUID_CX8 | CPUID_PGE | CPUID_CMOV |
                                CPUID_PAT);
         env->pat = 0x0007040600070406ULL;
-        env->cpuid_ext_features = 0;
+        env->cpuid_ext_features = CPUID_EXT_SSE3;
         env->cpuid_features |= CPUID_FXSR | CPUID_MMX | CPUID_SSE | CPUID_SSE2 | CPUID_PAE | CPUID_SEP;
         env->cpuid_xlevel = 0;
         {
index 93cc3c4..20eeaa4 100644 (file)
@@ -1911,6 +1911,53 @@ void OPPROTO op_fistll_ST0_A0(void)
     FORCE_RET();
 }
 
+void OPPROTO op_fistt_ST0_A0(void)
+{
+#if defined(__sparc__) && !defined(__sparc_v9__)
+    register CPU86_LDouble d asm("o0");
+#else
+    CPU86_LDouble d;
+#endif
+    int val;
+
+    d = ST0;
+    val = floatx_to_int32_round_to_zero(d, &env->fp_status);
+    if (val != (int16_t)val)
+        val = -32768;
+    stw(A0, val);
+    FORCE_RET();
+}
+
+void OPPROTO op_fisttl_ST0_A0(void)
+{
+#if defined(__sparc__) && !defined(__sparc_v9__)
+    register CPU86_LDouble d asm("o0");
+#else
+    CPU86_LDouble d;
+#endif
+    int val;
+
+    d = ST0;
+    val = floatx_to_int32_round_to_zero(d, &env->fp_status);
+    stl(A0, val);
+    FORCE_RET();
+}
+
+void OPPROTO op_fisttll_ST0_A0(void)
+{
+#if defined(__sparc__) && !defined(__sparc_v9__)
+    register CPU86_LDouble d asm("o0");
+#else
+    CPU86_LDouble d;
+#endif
+    int64_t val;
+
+    d = ST0;
+    val = floatx_to_int64_round_to_zero(d, &env->fp_status);
+    stq(A0, val);
+    FORCE_RET();
+}
+
 void OPPROTO op_fbld_ST0_A0(void)
 {
     helper_fbld_ST0_A0();
index cca4560..6dcaf5d 100644 (file)
@@ -2334,7 +2334,7 @@ static GenOpFunc2 *sse_op_table1[256][4] = {
     /* pure SSE operations */
     [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
     [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
-    [0x12] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
+    [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
     [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
     [0x14] = { gen_op_punpckldq_xmm, gen_op_punpcklqdq_xmm },
     [0x15] = { gen_op_punpckhdq_xmm, gen_op_punpckhqdq_xmm },
@@ -2436,7 +2436,7 @@ static GenOpFunc2 *sse_op_table1[256][4] = {
     [0xed] = MMX_OP2(paddsw),
     [0xee] = MMX_OP2(pmaxsw),
     [0xef] = MMX_OP2(pxor),
-    [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu (PNI) */
+    [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
     [0xf1] = MMX_OP2(psllw),
     [0xf2] = MMX_OP2(pslld),
     [0xf3] = MMX_OP2(psllq),
@@ -2563,8 +2563,8 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
         case 0x1e7: /* movntdq */
         case 0x02b: /* movntps */
         case 0x12b: /* movntps */
-        case 0x2f0: /* lddqu */
-            if (mod == 3) 
+        case 0x3f0: /* lddqu */
+            if (mod == 3)
                 goto illegal_op;
             gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
             gen_sto_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg]));
@@ -2642,6 +2642,34 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
                             offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
             }
             break;
+        case 0x212: /* movsldup */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_ldo_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg]));
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
+                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)));
+                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)),
+                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(2)));
+            }
+            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)),
+                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
+            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)),
+                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
+            break;
+        case 0x312: /* movddup */
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_ldq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+            } else {
+                rm = (modrm & 7) | REX_B(s);
+                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
+                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
+            }
+            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)),
+                        offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
+            break;
         case 0x016: /* movhps */
         case 0x116: /* movhpd */
             if (mod != 3) {
@@ -4278,16 +4306,9 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             case 0x08: /* flds */
             case 0x0a: /* fsts */
             case 0x0b: /* fstps */
-            case 0x18: /* fildl */
-            case 0x1a: /* fistl */
-            case 0x1b: /* fistpl */
-            case 0x28: /* fldl */
-            case 0x2a: /* fstl */
-            case 0x2b: /* fstpl */
-            case 0x38: /* filds */
-            case 0x3a: /* fists */
-            case 0x3b: /* fistps */
-                
+            case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
+            case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
+            case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
                 switch(op & 7) {
                 case 0:
                     switch(op >> 4) {
@@ -4306,6 +4327,20 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                         break;
                     }
                     break;
+                case 1:
+                    switch(op >> 4) {
+                    case 1:
+                        gen_op_fisttl_ST0_A0();
+                        break;
+                    case 2:
+                        gen_op_fisttll_ST0_A0();
+                        break;
+                    case 3:
+                    default:
+                        gen_op_fistt_ST0_A0();
+                    }
+                    gen_op_fpop();
+                    break;
                 default:
                     switch(op >> 4) {
                     case 0: