target-arm: A64: Add [UF]RSQRTE (reciprocal root estimate)

author Alex Bennée <alex.bennee@linaro.org>

Mon, 17 Mar 2014 16:31:53 +0000 (16:31 +0000)

committer Peter Maydell <peter.maydell@linaro.org>

Mon, 17 Mar 2014 16:31:53 +0000 (16:31 +0000)
author Alex Bennée <alex.bennee@linaro.org>
Mon, 17 Mar 2014 16:31:53 +0000 (16:31 +0000)
committer Peter Maydell <peter.maydell@linaro.org>
Mon, 17 Mar 2014 16:31:53 +0000 (16:31 +0000)
diff --git a/target-arm/helper.c b/target-arm/helper.c

index 535fc8f35e53a97f8b3fa74ab0de4d594d671bb6..55077ed1b68481a5d2c43fc23f8fa3961d009313 100644 (file)
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -4721,12 +4721,12 @@ float64 HELPER(recpe_f64)(float64 input, void *fpstp)
  /* The algorithm that must be used to calculate the estimate
   * is specified by the ARM ARM.
   */
-static float64 recip_sqrt_estimate(float64 a, CPUARMState *env)
+static float64 recip_sqrt_estimate(float64 a, float_status *real_fp_status)
  {
      /* These calculations mustn't set any fp exception flags,
       * so we use a local copy of the fp_status.
       */
-    float_status dummy_status = env->vfp.standard_fp_status;
+    float_status dummy_status = *real_fp_status;
      float_status *s = &dummy_status;
      float64 q;
      int64_t q_int;
@@ -4773,49 +4773,64 @@ static float64 recip_sqrt_estimate(float64 a, CPUARMState *env)
      return float64_div(int64_to_float64(q_int, s), float64_256, s);
  }
  
-float32 HELPER(rsqrte_f32)(float32 a, CPUARMState *env)
+float32 HELPER(rsqrte_f32)(float32 input, void *fpstp)
  {
-    float_status *s = &env->vfp.standard_fp_status;
+    float_status *s = fpstp;
+    float32 f32 = float32_squash_input_denormal(input, s);
+    uint32_t val = float32_val(f32);
+    uint32_t f32_sbit = 0x80000000 & val;
+    int32_t f32_exp = extract32(val, 23, 8);
+    uint32_t f32_frac = extract32(val, 0, 23);
+    uint64_t f64_frac;
+    uint64_t val64;
      int result_exp;
      float64 f64;
-    uint32_t val;
-    uint64_t val64;
  
-    val = float32_val(a);
-
-    if (float32_is_any_nan(a)) {
-        if (float32_is_signaling_nan(a)) {
+    if (float32_is_any_nan(f32)) {
+        float32 nan = f32;
+        if (float32_is_signaling_nan(f32)) {
              float_raise(float_flag_invalid, s);
+            nan = float32_maybe_silence_nan(f32);
          }
-        return float32_default_nan;
-    } else if (float32_is_zero_or_denormal(a)) {
-        if (!float32_is_zero(a)) {
-            float_raise(float_flag_input_denormal, s);
+        if (s->default_nan_mode) {
+            nan =  float32_default_nan;
          }
+        return nan;
+    } else if (float32_is_zero(f32)) {
          float_raise(float_flag_divbyzero, s);
-        return float32_set_sign(float32_infinity, float32_is_neg(a));
-    } else if (float32_is_neg(a)) {
+        return float32_set_sign(float32_infinity, float32_is_neg(f32));
+    } else if (float32_is_neg(f32)) {
          float_raise(float_flag_invalid, s);
          return float32_default_nan;
-    } else if (float32_is_infinity(a)) {
+    } else if (float32_is_infinity(f32)) {
          return float32_zero;
      }
  
-    /* Normalize to a double-precision value between 0.25 and 1.0,
+    /* Scale and normalize to a double-precision value between 0.25 and 1.0,
       * preserving the parity of the exponent.  */
-    if ((val & 0x800000) == 0) {
-        f64 = make_float64(((uint64_t)(val & 0x80000000) << 32)
+
+    f64_frac = ((uint64_t) f32_frac) << 29;
+    if (f32_exp == 0) {
+        while (extract64(f64_frac, 51, 1) == 0) {
+            f64_frac = f64_frac << 1;
+            f32_exp = f32_exp-1;
+        }
+        f64_frac = extract64(f64_frac, 0, 51) << 1;
+    }
+
+    if (extract64(f32_exp, 0, 1) == 0) {
+        f64 = make_float64(((uint64_t) f32_sbit) << 32
                             | (0x3feULL << 52)
-                           | ((uint64_t)(val & 0x7fffff) << 29));
+                           | f64_frac);
      } else {
-        f64 = make_float64(((uint64_t)(val & 0x80000000) << 32)
+        f64 = make_float64(((uint64_t) f32_sbit) << 32
                             | (0x3fdULL << 52)
-                           | ((uint64_t)(val & 0x7fffff) << 29));
+                           | f64_frac);
      }
  
-    result_exp = (380 - ((val & 0x7f800000) >> 23)) / 2;
+    result_exp = (380 - f32_exp) / 2;
  
-    f64 = recip_sqrt_estimate(f64, env);
+    f64 = recip_sqrt_estimate(f64, s);
  
      val64 = float64_val(f64);
  
@@ -4824,6 +4839,69 @@ float32 HELPER(rsqrte_f32)(float32 a, CPUARMState *env)
      return make_float32(val);
  }
  
+float64 HELPER(rsqrte_f64)(float64 input, void *fpstp)
+{
+    float_status *s = fpstp;
+    float64 f64 = float64_squash_input_denormal(input, s);
+    uint64_t val = float64_val(f64);
+    uint64_t f64_sbit = 0x8000000000000000ULL & val;
+    int64_t f64_exp = extract64(val, 52, 11);
+    uint64_t f64_frac = extract64(val, 0, 52);
+    int64_t result_exp;
+    uint64_t result_frac;
+
+    if (float64_is_any_nan(f64)) {
+        float64 nan = f64;
+        if (float64_is_signaling_nan(f64)) {
+            float_raise(float_flag_invalid, s);
+            nan = float64_maybe_silence_nan(f64);
+        }
+        if (s->default_nan_mode) {
+            nan =  float64_default_nan;
+        }
+        return nan;
+    } else if (float64_is_zero(f64)) {
+        float_raise(float_flag_divbyzero, s);
+        return float64_set_sign(float64_infinity, float64_is_neg(f64));
+    } else if (float64_is_neg(f64)) {
+        float_raise(float_flag_invalid, s);
+        return float64_default_nan;
+    } else if (float64_is_infinity(f64)) {
+        return float64_zero;
+    }
+
+    /* Scale and normalize to a double-precision value between 0.25 and 1.0,
+     * preserving the parity of the exponent.  */
+
+    if (f64_exp == 0) {
+        while (extract64(f64_frac, 51, 1) == 0) {
+            f64_frac = f64_frac << 1;
+            f64_exp = f64_exp - 1;
+        }
+        f64_frac = extract64(f64_frac, 0, 51) << 1;
+    }
+
+    if (extract64(f64_exp, 0, 1) == 0) {
+        f64 = make_float64(f64_sbit
+                           | (0x3feULL << 52)
+                           | f64_frac);
+    } else {
+        f64 = make_float64(f64_sbit
+                           | (0x3fdULL << 52)
+                           | f64_frac);
+    }
+
+    result_exp = (3068 - f64_exp) / 2;
+
+    f64 = recip_sqrt_estimate(f64, s);
+
+    result_frac = extract64(float64_val(f64), 0, 52);
+
+    return make_float64(f64_sbit |
+                        ((result_exp & 0x7ff) << 52) |
+                        result_frac);
+}
+
  uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp)
  {
      float_status *s = fpstp;
@@ -4841,8 +4919,9 @@ uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp)
      return 0x80000000 | ((float64_val(f64) >> 21) & 0x7fffffff);
  }
  
-uint32_t HELPER(rsqrte_u32)(uint32_t a, CPUARMState *env)
+uint32_t HELPER(rsqrte_u32)(uint32_t a, void *fpstp)
  {
+    float_status *fpst = fpstp;
      float64 f64;
  
      if ((a & 0xc0000000) == 0) {
@@ -4857,7 +4936,7 @@ uint32_t HELPER(rsqrte_u32)(uint32_t a, CPUARMState *env)
                             | ((uint64_t)(a & 0x3fffffff) << 22));
      }
  
-    f64 = recip_sqrt_estimate(f64, env);
+    f64 = recip_sqrt_estimate(f64, fpst);
  
      return 0x80000000 | ((float64_val(f64) >> 21) & 0x7fffffff);
  }
diff --git a/target-arm/helper.h b/target-arm/helper.h

index f96a82415a8d0e5e796f43c2008195b0aa2222a9..a3d6f32b064217f124f2591faf68a21cb64f9a70 100644 (file)
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -169,9 +169,10 @@ DEF_HELPER_3(recps_f32, f32, f32, f32, env)
  DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env)
  DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
  DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
-DEF_HELPER_2(rsqrte_f32, f32, f32, env)
+DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
+DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
  DEF_HELPER_2(recpe_u32, i32, i32, ptr)
-DEF_HELPER_2(rsqrte_u32, i32, i32, env)
+DEF_HELPER_FLAGS_2(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32, ptr)
  DEF_HELPER_5(neon_tbl, i32, env, i32, i32, i32, i32)
  
  DEF_HELPER_3(shl_cc, i32, env, i32, i32)
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c

index 235f8805895a2ce7d9aaedaca45dcdbdfc15d5df..befffac2e3459203e05d7d091d0de10213e75d49 100644 (file)
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -7146,6 +7146,9 @@ static void handle_2misc_reciprocal(DisasContext *s, int opcode,
              case 0x3f: /* FRECPX */
                  gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
                  break;
+            case 0x7d: /* FRSQRTE */
+                gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
+                break;
              default:
                  g_assert_not_reached();
              }
@@ -7181,6 +7184,9 @@ static void handle_2misc_reciprocal(DisasContext *s, int opcode,
              case 0x3f: /* FRECPX */
                  gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
                  break;
+            case 0x7d: /* FRSQRTE */
+                gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
+                break;
              default:
                  g_assert_not_reached();
              }
@@ -7378,6 +7384,7 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
          }
          case 0x3d: /* FRECPE */
          case 0x3f: /* FRECPX */
+        case 0x7d: /* FRSQRTE */
              handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
              return;
          case 0x1a: /* FCVTNS */
@@ -7404,9 +7411,6 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
              }
              handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
              return;
-        case 0x7d: /* FRSQRTE */
-            unsupported_encoding(s, insn);
-            return;
          default:
              unallocated_encoding(s);
              return;
@@ -9255,6 +9259,11 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
              }
              /* fall through */
          case 0x3d: /* FRECPE */
+        case 0x7d: /* FRSQRTE */
+            if (size == 3 && !is_q) {
+                unallocated_encoding(s);
+                return;
+            }
              handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
              return;
          case 0x56: /* FCVTXN, FCVTXN2 */
@@ -9297,9 +9306,12 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
              }
              break;
          case 0x7c: /* URSQRTE */
-        case 0x7d: /* FRSQRTE */
-            unsupported_encoding(s, insn);
-            return;
+            if (size == 3) {
+                unallocated_encoding(s);
+                return;
+            }
+            need_fpstatus = true;
+            break;
          default:
              unallocated_encoding(s);
              return;
@@ -9432,6 +9444,9 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
                  case 0x59: /* FRINTX */
                      gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
                      break;
+                case 0x7c: /* URSQRTE */
+                    gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
+                    break;
                  default:
                      g_assert_not_reached();
                  }
diff --git a/target-arm/translate.c b/target-arm/translate.c

index 3771953e029dd67d97c61e5c68be42f5f00ad93c..56e3b4bf7f2faa733e9f076ef4a6d99830fd77ec 100644 (file)
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -6689,8 +6689,12 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
                              break;
                          }
                          case NEON_2RM_VRSQRTE:
-                            gen_helper_rsqrte_u32(tmp, tmp, cpu_env);
+                        {
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
+                            gen_helper_rsqrte_u32(tmp, tmp, fpstatus);
+                            tcg_temp_free_ptr(fpstatus);
                              break;
+                        }
                          case NEON_2RM_VRECPE_F:
                          {
                              TCGv_ptr fpstatus = get_fpstatus_ptr(1);
@@ -6699,8 +6703,12 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
                              break;
                          }
                          case NEON_2RM_VRSQRTE_F:
-                            gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, cpu_env);
+                        {
+                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
+                            gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, fpstatus);
+                            tcg_temp_free_ptr(fpstatus);
                              break;
+                        }
                          case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
                              gen_vfp_sito(0, 1);
                              break;
author	Alex Bennée <alex.bennee@linaro.org>
	Mon, 17 Mar 2014 16:31:53 +0000 (16:31 +0000)
committer	Peter Maydell <peter.maydell@linaro.org>
	Mon, 17 Mar 2014 16:31:53 +0000 (16:31 +0000)
target-arm/helper.c		patch \| blob \| history
target-arm/helper.h		patch \| blob \| history
target-arm/translate-a64.c		patch \| blob \| history
target-arm/translate.c		patch \| blob \| history