From bebe80fc78cc91c4225cfb98ef3a916b9c861c60 Mon Sep 17 00:00:00 2001 From: Bastian Koppelmann Date: Fri, 6 Feb 2015 14:48:33 +0000 Subject: [PATCH] target-tricore: Add instructions of RRR1 opcode format, which have 0xc3 as first opcode Add helpers helper_addsur_h/_ssov which adds one halfword and subtracts one halfword, rounds / and saturates each half word independently. Add microcode helper functions: * gen_maddsu_h/sus_h: multiply two halfwords left justified and add to the first one word and subtract from the second one word / and saturate each resulting word independetly. * gen_maddsum_h/sums_h: multiply two halfwords in q-format left justified and add to the first one word and subtract from the second one word / and saturate each resulting word independetly. * gen_maddsur32_h/32s_h: multiply two halfwords in q-format left justified and add to the first one word and subtract from the second one word, round both results / and saturate each resulting word independetly. Signed-off-by: Bastian Koppelmann Reviewed-by: Richard Henderson --- target-tricore/helper.h | 2 + target-tricore/op_helper.c | 84 ++++++++++++ target-tricore/translate.c | 332 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 418 insertions(+) diff --git a/target-tricore/helper.h b/target-tricore/helper.h index 8e9eea5..4c82346 100644 --- a/target-tricore/helper.h +++ b/target-tricore/helper.h @@ -22,6 +22,7 @@ DEF_HELPER_3(add_suov, i32, env, i32, i32) DEF_HELPER_3(add_h_ssov, i32, env, i32, i32) DEF_HELPER_3(add_h_suov, i32, env, i32, i32) DEF_HELPER_4(addr_h_ssov, i32, env, i64, i32, i32) +DEF_HELPER_4(addsur_h_ssov, i32, env, i64, i32, i32) DEF_HELPER_3(sub_ssov, i32, env, i32, i32) DEF_HELPER_3(sub_suov, i32, env, i32, i32) DEF_HELPER_3(sub_h_ssov, i32, env, i32, i32) @@ -50,6 +51,7 @@ DEF_HELPER_2(abs_h, i32, env, i32) DEF_HELPER_3(absdif_b, i32, env, i32, i32) DEF_HELPER_3(absdif_h, i32, env, i32, i32) DEF_HELPER_4(addr_h, i32, env, i64, i32, i32) +DEF_HELPER_4(addsur_h, i32, env, i64, i32, i32) DEF_HELPER_5(maddr_q, i32, env, i32, i32, i32, i32) DEF_HELPER_3(add_b, i32, env, i32, i32) DEF_HELPER_3(add_h, i32, env, i32, i32) diff --git a/target-tricore/op_helper.c b/target-tricore/op_helper.c index 2755d45..40d32af 100644 --- a/target-tricore/op_helper.c +++ b/target-tricore/op_helper.c @@ -265,6 +265,52 @@ uint32_t helper_addr_h_ssov(CPUTriCoreState *env, uint64_t r1, uint32_t r2_l, return (result1 & 0xffff0000ULL) | ((result0 >> 16) & 0xffffULL); } +uint32_t helper_addsur_h_ssov(CPUTriCoreState *env, uint64_t r1, uint32_t r2_l, + uint32_t r2_h) +{ + int64_t mul_res0 = sextract64(r1, 0, 32); + int64_t mul_res1 = sextract64(r1, 32, 32); + int64_t r2_low = sextract64(r2_l, 0, 32); + int64_t r2_high = sextract64(r2_h, 0, 32); + int64_t result0, result1; + uint32_t ovf0, ovf1; + uint32_t avf0, avf1; + + ovf0 = ovf1 = 0; + + result0 = r2_low - mul_res0 + 0x8000; + result1 = r2_high + mul_res1 + 0x8000; + + avf0 = result0 * 2u; + avf0 = result0 ^ avf0; + avf1 = result1 * 2u; + avf1 = result1 ^ avf1; + + if (result0 > INT32_MAX) { + ovf0 = (1 << 31); + result0 = INT32_MAX; + } else if (result0 < INT32_MIN) { + ovf0 = (1 << 31); + result0 = INT32_MIN; + } + + if (result1 > INT32_MAX) { + ovf1 = (1 << 31); + result1 = INT32_MAX; + } else if (result1 < INT32_MIN) { + ovf1 = (1 << 31); + result1 = INT32_MIN; + } + + env->PSW_USB_V = ovf0 | ovf1; + env->PSW_USB_SV |= env->PSW_USB_V; + + env->PSW_USB_AV = avf0 | avf1; + env->PSW_USB_SAV |= env->PSW_USB_AV; + + return (result1 & 0xffff0000ULL) | ((result0 >> 16) & 0xffffULL); +} + target_ulong helper_add_suov(CPUTriCoreState *env, target_ulong r1, target_ulong r2) @@ -854,6 +900,44 @@ uint32_t helper_addr_h(CPUTriCoreState *env, uint64_t r1, uint32_t r2_l, return (result1 & 0xffff0000ULL) | ((result0 >> 16) & 0xffffULL); } +uint32_t helper_addsur_h(CPUTriCoreState *env, uint64_t r1, uint32_t r2_l, + uint32_t r2_h) +{ + int64_t mul_res0 = sextract64(r1, 0, 32); + int64_t mul_res1 = sextract64(r1, 32, 32); + int64_t r2_low = sextract64(r2_l, 0, 32); + int64_t r2_high = sextract64(r2_h, 0, 32); + int64_t result0, result1; + uint32_t ovf0, ovf1; + uint32_t avf0, avf1; + + ovf0 = ovf1 = 0; + + result0 = r2_low - mul_res0 + 0x8000; + result1 = r2_high + mul_res1 + 0x8000; + + if ((result0 > INT32_MAX) || (result0 < INT32_MIN)) { + ovf0 = (1 << 31); + } + + if ((result1 > INT32_MAX) || (result1 < INT32_MIN)) { + ovf1 = (1 << 31); + } + + env->PSW_USB_V = ovf0 | ovf1; + env->PSW_USB_SV |= env->PSW_USB_V; + + avf0 = result0 * 2u; + avf0 = result0 ^ avf0; + avf1 = result1 * 2u; + avf1 = result1 ^ avf1; + + env->PSW_USB_AV = avf0 | avf1; + env->PSW_USB_SAV |= env->PSW_USB_AV; + + return (result1 & 0xffff0000ULL) | ((result0 >> 16) & 0xffffULL); +} + uint32_t helper_maddr_q(CPUTriCoreState *env, uint32_t r1, uint32_t r2, uint32_t r3, uint32_t n) { diff --git a/target-tricore/translate.c b/target-tricore/translate.c index 4f13e4f..f720cd7 100644 --- a/target-tricore/translate.c +++ b/target-tricore/translate.c @@ -641,6 +641,73 @@ gen_madd_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, tcg_temp_free_i64(temp64); } +static inline void +gen_maddsu_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, + TCGv r3, uint32_t n, uint32_t mode) +{ + TCGv temp = tcg_const_i32(n); + TCGv temp2 = tcg_temp_new(); + TCGv_i64 temp64 = tcg_temp_new_i64(); + switch (mode) { + case MODE_LL: + GEN_HELPER_LL(mul_h, temp64, r2, r3, temp); + break; + case MODE_LU: + GEN_HELPER_LU(mul_h, temp64, r2, r3, temp); + break; + case MODE_UL: + GEN_HELPER_UL(mul_h, temp64, r2, r3, temp); + break; + case MODE_UU: + GEN_HELPER_UU(mul_h, temp64, r2, r3, temp); + break; + } + tcg_gen_extr_i64_i32(temp, temp2, temp64); + gen_addsub64_h(ret_low, ret_high, r1_low, r1_high, temp, temp2, + tcg_gen_sub_tl, tcg_gen_add_tl); + tcg_temp_free(temp); + tcg_temp_free(temp2); + tcg_temp_free_i64(temp64); +} + +static inline void +gen_maddsum_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, + TCGv r3, uint32_t n, uint32_t mode) +{ + TCGv temp = tcg_const_i32(n); + TCGv_i64 temp64 = tcg_temp_new_i64(); + TCGv_i64 temp64_2 = tcg_temp_new_i64(); + TCGv_i64 temp64_3 = tcg_temp_new_i64(); + switch (mode) { + case MODE_LL: + GEN_HELPER_LL(mul_h, temp64, r2, r3, temp); + break; + case MODE_LU: + GEN_HELPER_LU(mul_h, temp64, r2, r3, temp); + break; + case MODE_UL: + GEN_HELPER_UL(mul_h, temp64, r2, r3, temp); + break; + case MODE_UU: + GEN_HELPER_UU(mul_h, temp64, r2, r3, temp); + break; + } + tcg_gen_concat_i32_i64(temp64_3, r1_low, r1_high); + tcg_gen_sari_i64(temp64_2, temp64, 32); /* high */ + tcg_gen_ext32s_i64(temp64, temp64); /* low */ + tcg_gen_sub_i64(temp64, temp64_2, temp64); + tcg_gen_shli_i64(temp64, temp64, 16); + + gen_add64_d(temp64_2, temp64_3, temp64); + /* write back result */ + tcg_gen_extr_i64_i32(ret_low, ret_high, temp64_2); + + tcg_temp_free(temp); + tcg_temp_free_i64(temp64); + tcg_temp_free_i64(temp64_2); + tcg_temp_free_i64(temp64_3); +} + static inline void gen_adds(TCGv ret, TCGv r1, TCGv r2); static inline void @@ -683,6 +750,85 @@ gen_madds_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, } +static inline void gen_subs(TCGv ret, TCGv r1, TCGv r2); + +static inline void +gen_maddsus_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, + TCGv r3, uint32_t n, uint32_t mode) +{ + TCGv temp = tcg_const_i32(n); + TCGv temp2 = tcg_temp_new(); + TCGv temp3 = tcg_temp_new(); + TCGv_i64 temp64 = tcg_temp_new_i64(); + + switch (mode) { + case MODE_LL: + GEN_HELPER_LL(mul_h, temp64, r2, r3, temp); + break; + case MODE_LU: + GEN_HELPER_LU(mul_h, temp64, r2, r3, temp); + break; + case MODE_UL: + GEN_HELPER_UL(mul_h, temp64, r2, r3, temp); + break; + case MODE_UU: + GEN_HELPER_UU(mul_h, temp64, r2, r3, temp); + break; + } + tcg_gen_extr_i64_i32(temp, temp2, temp64); + gen_subs(ret_low, r1_low, temp); + tcg_gen_mov_tl(temp, cpu_PSW_V); + tcg_gen_mov_tl(temp3, cpu_PSW_AV); + gen_adds(ret_high, r1_high, temp2); + /* combine v bits */ + tcg_gen_or_tl(cpu_PSW_V, cpu_PSW_V, temp); + /* combine av bits */ + tcg_gen_or_tl(cpu_PSW_AV, cpu_PSW_AV, temp3); + + tcg_temp_free(temp); + tcg_temp_free(temp2); + tcg_temp_free(temp3); + tcg_temp_free_i64(temp64); + +} + +static inline void +gen_maddsums_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, + TCGv r3, uint32_t n, uint32_t mode) +{ + TCGv temp = tcg_const_i32(n); + TCGv_i64 temp64 = tcg_temp_new_i64(); + TCGv_i64 temp64_2 = tcg_temp_new_i64(); + + switch (mode) { + case MODE_LL: + GEN_HELPER_LL(mul_h, temp64, r2, r3, temp); + break; + case MODE_LU: + GEN_HELPER_LU(mul_h, temp64, r2, r3, temp); + break; + case MODE_UL: + GEN_HELPER_UL(mul_h, temp64, r2, r3, temp); + break; + case MODE_UU: + GEN_HELPER_UU(mul_h, temp64, r2, r3, temp); + break; + } + tcg_gen_sari_i64(temp64_2, temp64, 32); /* high */ + tcg_gen_ext32s_i64(temp64, temp64); /* low */ + tcg_gen_sub_i64(temp64, temp64_2, temp64); + tcg_gen_shli_i64(temp64, temp64, 16); + tcg_gen_concat_i32_i64(temp64_2, r1_low, r1_high); + + gen_helper_add64_ssov(temp64, cpu_env, temp64_2, temp64); + tcg_gen_extr_i64_i32(ret_low, ret_high, temp64); + + tcg_temp_free(temp); + tcg_temp_free_i64(temp64); + tcg_temp_free_i64(temp64_2); +} + + static inline void gen_maddm_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2, TCGv r3, uint32_t n, uint32_t mode) @@ -787,6 +933,36 @@ gen_maddr32_h(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n, uint32_t mode) } static inline void +gen_maddsur32_h(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n, uint32_t mode) +{ + TCGv temp = tcg_const_i32(n); + TCGv temp2 = tcg_temp_new(); + TCGv_i64 temp64 = tcg_temp_new_i64(); + switch (mode) { + case MODE_LL: + GEN_HELPER_LL(mul_h, temp64, r2, r3, temp); + break; + case MODE_LU: + GEN_HELPER_LU(mul_h, temp64, r2, r3, temp); + break; + case MODE_UL: + GEN_HELPER_UL(mul_h, temp64, r2, r3, temp); + break; + case MODE_UU: + GEN_HELPER_UU(mul_h, temp64, r2, r3, temp); + break; + } + tcg_gen_andi_tl(temp2, r1, 0xffff0000); + tcg_gen_shli_tl(temp, r1, 16); + gen_helper_addsur_h(ret, cpu_env, temp64, temp, temp2); + + tcg_temp_free(temp); + tcg_temp_free(temp2); + tcg_temp_free_i64(temp64); +} + + +static inline void gen_maddr64s_h(TCGv ret, TCGv r1_low, TCGv r1_high, TCGv r2, TCGv r3, uint32_t n, uint32_t mode) { @@ -827,6 +1003,35 @@ gen_maddr32s_h(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n, uint32_t mode) } static inline void +gen_maddsur32s_h(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n, uint32_t mode) +{ + TCGv temp = tcg_const_i32(n); + TCGv temp2 = tcg_temp_new(); + TCGv_i64 temp64 = tcg_temp_new_i64(); + switch (mode) { + case MODE_LL: + GEN_HELPER_LL(mul_h, temp64, r2, r3, temp); + break; + case MODE_LU: + GEN_HELPER_LU(mul_h, temp64, r2, r3, temp); + break; + case MODE_UL: + GEN_HELPER_UL(mul_h, temp64, r2, r3, temp); + break; + case MODE_UU: + GEN_HELPER_UU(mul_h, temp64, r2, r3, temp); + break; + } + tcg_gen_andi_tl(temp2, r1, 0xffff0000); + tcg_gen_shli_tl(temp, r1, 16); + gen_helper_addsur_h_ssov(ret, cpu_env, temp64, temp, temp2); + + tcg_temp_free(temp); + tcg_temp_free(temp2); + tcg_temp_free_i64(temp64); +} + +static inline void gen_maddr_q(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n) { TCGv temp = tcg_const_i32(n); @@ -6141,6 +6346,130 @@ static void decode_rrr1_maddq_h(CPUTriCoreState *env, DisasContext *ctx) tcg_temp_free(temp2); } +static void decode_rrr1_maddsu_h(CPUTriCoreState *env, DisasContext *ctx) +{ + uint32_t op2; + uint32_t r1, r2, r3, r4, n; + + op2 = MASK_OP_RRR1_OP2(ctx->opcode); + r1 = MASK_OP_RRR1_S1(ctx->opcode); + r2 = MASK_OP_RRR1_S2(ctx->opcode); + r3 = MASK_OP_RRR1_S3(ctx->opcode); + r4 = MASK_OP_RRR1_D(ctx->opcode); + n = MASK_OP_RRR1_N(ctx->opcode); + + switch (op2) { + case OPC2_32_RRR1_MADDSU_H_32_LL: + gen_maddsu_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], + cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL); + break; + case OPC2_32_RRR1_MADDSU_H_32_LU: + gen_maddsu_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], + cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU); + break; + case OPC2_32_RRR1_MADDSU_H_32_UL: + gen_maddsu_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], + cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL); + break; + case OPC2_32_RRR1_MADDSU_H_32_UU: + gen_maddsu_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], + cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU); + break; + case OPC2_32_RRR1_MADDSUS_H_32_LL: + gen_maddsus_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], + cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + n, MODE_LL); + break; + case OPC2_32_RRR1_MADDSUS_H_32_LU: + gen_maddsus_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], + cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + n, MODE_LU); + break; + case OPC2_32_RRR1_MADDSUS_H_32_UL: + gen_maddsus_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], + cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + n, MODE_UL); + break; + case OPC2_32_RRR1_MADDSUS_H_32_UU: + gen_maddsus_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], + cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + n, MODE_UU); + break; + case OPC2_32_RRR1_MADDSUM_H_64_LL: + gen_maddsum_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], + cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + n, MODE_LL); + break; + case OPC2_32_RRR1_MADDSUM_H_64_LU: + gen_maddsum_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], + cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + n, MODE_LU); + break; + case OPC2_32_RRR1_MADDSUM_H_64_UL: + gen_maddsum_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], + cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + n, MODE_UL); + break; + case OPC2_32_RRR1_MADDSUM_H_64_UU: + gen_maddsum_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], + cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + n, MODE_UU); + break; + case OPC2_32_RRR1_MADDSUMS_H_64_LL: + gen_maddsums_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], + cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + n, MODE_LL); + break; + case OPC2_32_RRR1_MADDSUMS_H_64_LU: + gen_maddsums_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], + cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + n, MODE_LU); + break; + case OPC2_32_RRR1_MADDSUMS_H_64_UL: + gen_maddsums_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], + cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + n, MODE_UL); + break; + case OPC2_32_RRR1_MADDSUMS_H_64_UU: + gen_maddsums_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3], + cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], + n, MODE_UU); + break; + case OPC2_32_RRR1_MADDSUR_H_16_LL: + gen_maddsur32_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1], + cpu_gpr_d[r2], n, MODE_LL); + break; + case OPC2_32_RRR1_MADDSUR_H_16_LU: + gen_maddsur32_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1], + cpu_gpr_d[r2], n, MODE_LU); + break; + case OPC2_32_RRR1_MADDSUR_H_16_UL: + gen_maddsur32_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1], + cpu_gpr_d[r2], n, MODE_UL); + break; + case OPC2_32_RRR1_MADDSUR_H_16_UU: + gen_maddsur32_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1], + cpu_gpr_d[r2], n, MODE_UU); + break; + case OPC2_32_RRR1_MADDSURS_H_16_LL: + gen_maddsur32s_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1], + cpu_gpr_d[r2], n, MODE_LL); + break; + case OPC2_32_RRR1_MADDSURS_H_16_LU: + gen_maddsur32s_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1], + cpu_gpr_d[r2], n, MODE_LU); + break; + case OPC2_32_RRR1_MADDSURS_H_16_UL: + gen_maddsur32s_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1], + cpu_gpr_d[r2], n, MODE_UL); + break; + case OPC2_32_RRR1_MADDSURS_H_16_UU: + gen_maddsur32s_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1], + cpu_gpr_d[r2], n, MODE_UU); + break; + } +} + static void decode_32Bit_opc(CPUTriCoreState *env, DisasContext *ctx) { int op1; @@ -6444,6 +6773,9 @@ static void decode_32Bit_opc(CPUTriCoreState *env, DisasContext *ctx) case OPCM_32_RRR1_MADDQ_H: decode_rrr1_maddq_h(env, ctx); break; + case OPCM_32_RRR1_MADDSU_H: + decode_rrr1_maddsu_h(env, ctx); + break; } } -- 2.7.4