From 14a812635b444ae143726428765480ae536b5533 Mon Sep 17 00:00:00 2001 From: bellard Date: Wed, 21 May 2008 10:12:54 +0000 Subject: [PATCH] converted MUL/IMUL to TCG git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@4508 c046a42c-6fe2-441c-8c8c-71466251a162 --- target-i386/helper.c | 24 --------- target-i386/op.c | 98 --------------------------------- target-i386/translate.c | 140 ++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 129 insertions(+), 133 deletions(-) diff --git a/target-i386/helper.c b/target-i386/helper.c index d888eaf..5322491 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -1609,22 +1609,6 @@ void helper_rsm(void) #endif /* !CONFIG_USER_ONLY */ -#ifdef BUGGY_GCC_DIV64 -/* gcc 2.95.4 on PowerPC does not seem to like using __udivdi3, so we - call it from another function */ -uint32_t div32(uint64_t *q_ptr, uint64_t num, uint32_t den) -{ - *q_ptr = num / den; - return num % den; -} - -int32_t idiv32(int64_t *q_ptr, int64_t num, int32_t den) -{ - *q_ptr = num / den; - return num % den; -} -#endif - /* division, flags are undefined */ void helper_divb_AL(target_ulong t0) @@ -1707,12 +1691,8 @@ void helper_divl_EAX(target_ulong t0) if (den == 0) { raise_exception(EXCP00_DIVZ); } -#ifdef BUGGY_GCC_DIV64 - r = div32(&q, num, den); -#else q = (num / den); r = (num % den); -#endif if (q > 0xffffffff) raise_exception(EXCP00_DIVZ); EAX = (uint32_t)q; @@ -1729,12 +1709,8 @@ void helper_idivl_EAX(target_ulong t0) if (den == 0) { raise_exception(EXCP00_DIVZ); } -#ifdef BUGGY_GCC_DIV64 - r = idiv32(&q, num, den); -#else q = (num / den); r = (num % den); -#endif if (q != (int32_t)q) raise_exception(EXCP00_DIVZ); EAX = (uint32_t)q; diff --git a/target-i386/op.c b/target-i386/op.c index 7bd8949..e8f342c 100644 --- a/target-i386/op.c +++ b/target-i386/op.c @@ -123,104 +123,6 @@ #endif -/* multiply/divide */ - -/* XXX: add eflags optimizations */ -/* XXX: add non P4 style flags */ - -void OPPROTO op_mulb_AL_T0(void) -{ - unsigned int res; - res = (uint8_t)EAX * (uint8_t)T0; - EAX = (EAX & ~0xffff) | res; - CC_DST = res; - CC_SRC = (res & 0xff00); -} - -void OPPROTO op_imulb_AL_T0(void) -{ - int res; - res = (int8_t)EAX * (int8_t)T0; - EAX = (EAX & ~0xffff) | (res & 0xffff); - CC_DST = res; - CC_SRC = (res != (int8_t)res); -} - -void OPPROTO op_mulw_AX_T0(void) -{ - unsigned int res; - res = (uint16_t)EAX * (uint16_t)T0; - EAX = (EAX & ~0xffff) | (res & 0xffff); - EDX = (EDX & ~0xffff) | ((res >> 16) & 0xffff); - CC_DST = res; - CC_SRC = res >> 16; -} - -void OPPROTO op_imulw_AX_T0(void) -{ - int res; - res = (int16_t)EAX * (int16_t)T0; - EAX = (EAX & ~0xffff) | (res & 0xffff); - EDX = (EDX & ~0xffff) | ((res >> 16) & 0xffff); - CC_DST = res; - CC_SRC = (res != (int16_t)res); -} - -void OPPROTO op_mull_EAX_T0(void) -{ - uint64_t res; - res = (uint64_t)((uint32_t)EAX) * (uint64_t)((uint32_t)T0); - EAX = (uint32_t)res; - EDX = (uint32_t)(res >> 32); - CC_DST = (uint32_t)res; - CC_SRC = (uint32_t)(res >> 32); -} - -void OPPROTO op_imull_EAX_T0(void) -{ - int64_t res; - res = (int64_t)((int32_t)EAX) * (int64_t)((int32_t)T0); - EAX = (uint32_t)(res); - EDX = (uint32_t)(res >> 32); - CC_DST = res; - CC_SRC = (res != (int32_t)res); -} - -void OPPROTO op_imulw_T0_T1(void) -{ - int res; - res = (int16_t)T0 * (int16_t)T1; - T0 = res; - CC_DST = res; - CC_SRC = (res != (int16_t)res); -} - -void OPPROTO op_imull_T0_T1(void) -{ - int64_t res; - res = (int64_t)((int32_t)T0) * (int64_t)((int32_t)T1); - T0 = res; - CC_DST = res; - CC_SRC = (res != (int32_t)res); -} - -#ifdef TARGET_X86_64 -void OPPROTO op_mulq_EAX_T0(void) -{ - helper_mulq_EAX_T0(T0); -} - -void OPPROTO op_imulq_EAX_T0(void) -{ - helper_imulq_EAX_T0(T0); -} - -void OPPROTO op_imulq_T0_T1(void) -{ - T0 = helper_imulq_T0_T1(T0, T1); -} -#endif - /* constant load & misc op */ /* XXX: consistent names */ diff --git a/target-i386/translate.c b/target-i386/translate.c index 0755987..c73fac9 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -3799,21 +3799,64 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) case 4: /* mul */ switch(ot) { case OT_BYTE: - gen_op_mulb_AL_T0(); + gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX); + tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]); + tcg_gen_ext8u_tl(cpu_T[1], cpu_T[1]); + /* XXX: use 32 bit mul which could be faster */ + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + gen_op_mov_reg_T0(OT_WORD, R_EAX); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_andi_tl(cpu_cc_src, cpu_T[0], 0xff00); s->cc_op = CC_OP_MULB; break; case OT_WORD: - gen_op_mulw_AX_T0(); + gen_op_mov_TN_reg(OT_WORD, 1, R_EAX); + tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]); + tcg_gen_ext16u_tl(cpu_T[1], cpu_T[1]); + /* XXX: use 32 bit mul which could be faster */ + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + gen_op_mov_reg_T0(OT_WORD, R_EAX); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16); + gen_op_mov_reg_T0(OT_WORD, R_EDX); + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); s->cc_op = CC_OP_MULW; break; default: case OT_LONG: - gen_op_mull_EAX_T0(); +#ifdef TARGET_X86_64 + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX); + tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]); + tcg_gen_ext32u_tl(cpu_T[1], cpu_T[1]); + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + gen_op_mov_reg_T0(OT_LONG, R_EAX); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32); + gen_op_mov_reg_T0(OT_LONG, R_EDX); + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); +#else + { + TCGv t0, t1; + t0 = tcg_temp_new(TCG_TYPE_I64); + t1 = tcg_temp_new(TCG_TYPE_I64); + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX); + tcg_gen_extu_i32_i64(t0, cpu_T[0]); + tcg_gen_extu_i32_i64(t1, cpu_T[1]); + tcg_gen_mul_i64(t0, t0, t1); + tcg_gen_trunc_i64_i32(cpu_T[0], t0); + gen_op_mov_reg_T0(OT_LONG, R_EAX); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_shri_i64(t0, t0, 32); + tcg_gen_trunc_i64_i32(cpu_T[0], t0); + gen_op_mov_reg_T0(OT_LONG, R_EDX); + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); + } +#endif s->cc_op = CC_OP_MULL; break; #ifdef TARGET_X86_64 case OT_QUAD: - gen_op_mulq_EAX_T0(); + tcg_gen_helper_0_1(helper_mulq_EAX_T0, cpu_T[0]); s->cc_op = CC_OP_MULQ; break; #endif @@ -3822,21 +3865,68 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) case 5: /* imul */ switch(ot) { case OT_BYTE: - gen_op_imulb_AL_T0(); + gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX); + tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]); + tcg_gen_ext8s_tl(cpu_T[1], cpu_T[1]); + /* XXX: use 32 bit mul which could be faster */ + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + gen_op_mov_reg_T0(OT_WORD, R_EAX); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_ext8s_tl(cpu_tmp0, cpu_T[0]); + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); s->cc_op = CC_OP_MULB; break; case OT_WORD: - gen_op_imulw_AX_T0(); + gen_op_mov_TN_reg(OT_WORD, 1, R_EAX); + tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]); + tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]); + /* XXX: use 32 bit mul which could be faster */ + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + gen_op_mov_reg_T0(OT_WORD, R_EAX); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]); + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16); + gen_op_mov_reg_T0(OT_WORD, R_EDX); s->cc_op = CC_OP_MULW; break; default: case OT_LONG: - gen_op_imull_EAX_T0(); +#ifdef TARGET_X86_64 + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX); + tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]); + tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]); + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + gen_op_mov_reg_T0(OT_LONG, R_EAX); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]); + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32); + gen_op_mov_reg_T0(OT_LONG, R_EDX); +#else + { + TCGv t0, t1; + t0 = tcg_temp_new(TCG_TYPE_I64); + t1 = tcg_temp_new(TCG_TYPE_I64); + gen_op_mov_TN_reg(OT_LONG, 1, R_EAX); + tcg_gen_ext_i32_i64(t0, cpu_T[0]); + tcg_gen_ext_i32_i64(t1, cpu_T[1]); + tcg_gen_mul_i64(t0, t0, t1); + tcg_gen_trunc_i64_i32(cpu_T[0], t0); + gen_op_mov_reg_T0(OT_LONG, R_EAX); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31); + tcg_gen_shri_i64(t0, t0, 32); + tcg_gen_trunc_i64_i32(cpu_T[0], t0); + gen_op_mov_reg_T0(OT_LONG, R_EDX); + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); + } +#endif s->cc_op = CC_OP_MULL; break; #ifdef TARGET_X86_64 case OT_QUAD: - gen_op_imulq_EAX_T0(); + tcg_gen_helper_0_1(helper_imulq_EAX_T0, cpu_T[0]); s->cc_op = CC_OP_MULQ; break; #endif @@ -4104,13 +4194,41 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) #ifdef TARGET_X86_64 if (ot == OT_QUAD) { - gen_op_imulq_T0_T1(); + tcg_gen_helper_1_2(helper_imulq_T0_T1, cpu_T[0], cpu_T[0], cpu_T[1]); } else #endif if (ot == OT_LONG) { - gen_op_imull_T0_T1(); +#ifdef TARGET_X86_64 + tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]); + tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]); + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]); + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); +#else + { + TCGv t0, t1; + t0 = tcg_temp_new(TCG_TYPE_I64); + t1 = tcg_temp_new(TCG_TYPE_I64); + tcg_gen_ext_i32_i64(t0, cpu_T[0]); + tcg_gen_ext_i32_i64(t1, cpu_T[1]); + tcg_gen_mul_i64(t0, t0, t1); + tcg_gen_trunc_i64_i32(cpu_T[0], t0); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31); + tcg_gen_shri_i64(t0, t0, 32); + tcg_gen_trunc_i64_i32(cpu_T[1], t0); + tcg_gen_sub_tl(cpu_cc_src, cpu_T[1], cpu_tmp0); + } +#endif } else { - gen_op_imulw_T0_T1(); + tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]); + tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]); + /* XXX: use 32 bit mul which could be faster */ + tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]); + tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); } gen_op_mov_reg_T0(ot, reg); s->cc_op = CC_OP_MULB + ot; -- 2.7.4