From 1bccec25e10e9073e21c2f71cd16a0b3ffe06c39 Mon Sep 17 00:00:00 2001
From: Blue Swirl <blauwirbel@gmail.com>
Date: Mon, 1 Aug 2011 07:37:45 +0000
Subject: [PATCH] Sparc: split FPU and VIS op helpers

Move FPU op helpers to fop_helper.c. Move VIS op helpers to vis_helper.c,
compile it only for Sparc64.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
---
 Makefile.target           |   5 +-
 target-sparc/fop_helper.c | 394 ++++++++++++++++++++++++
 target-sparc/op_helper.c  | 743 ----------------------------------------------
 target-sparc/vis_helper.c | 403 +++++++++++++++++++++++++
 4 files changed, 800 insertions(+), 745 deletions(-)
 create mode 100644 target-sparc/fop_helper.c
 create mode 100644 target-sparc/vis_helper.c

diff --git a/Makefile.target b/Makefile.target
index 26c99ca..fc277b3 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -74,10 +74,11 @@ libobj-y += op_helper.o helper.o
 ifeq ($(TARGET_BASE_ARCH), i386)
 libobj-y += cpuid.o
 endif
+libobj-$(TARGET_SPARC64) += vis_helper.o
 libobj-$(CONFIG_NEED_MMU) += mmu.o
 libobj-$(TARGET_ARM) += neon_helper.o iwmmxt_helper.o
 ifeq ($(TARGET_BASE_ARCH), sparc)
-libobj-y += cpu_init.o
+libobj-y += fop_helper.o cpu_init.o
 endif
 libobj-$(TARGET_SPARC) += int32_helper.o
 libobj-$(TARGET_SPARC64) += int64_helper.o
@@ -96,7 +97,7 @@ tcg/tcg.o: cpu.h
 
 # HELPER_CFLAGS is used for all the code compiled with static register
 # variables
-op_helper.o user-exec.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
+op_helper.o fop_helper.o vis_helper.o user-exec.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
 
 # Note: this is a workaround. The real fix is to avoid compiling
 # cpu_signal_handler() in user-exec.c.
diff --git a/target-sparc/fop_helper.c b/target-sparc/fop_helper.c
new file mode 100644
index 0000000..ddd0af9
--- /dev/null
+++ b/target-sparc/fop_helper.c
@@ -0,0 +1,394 @@
+/*
+ * FPU op helpers
+ *
+ *  Copyright (c) 2003-2005 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "cpu.h"
+#include "dyngen-exec.h"
+#include "helper.h"
+
+#define DT0 (env->dt0)
+#define DT1 (env->dt1)
+#define QT0 (env->qt0)
+#define QT1 (env->qt1)
+
+#define F_HELPER(name, p) void helper_f##name##p(void)
+
+#define F_BINOP(name)                                           \
+    float32 helper_f ## name ## s (float32 src1, float32 src2)  \
+    {                                                           \
+        return float32_ ## name (src1, src2, &env->fp_status);  \
+    }                                                           \
+    F_HELPER(name, d)                                           \
+    {                                                           \
+        DT0 = float64_ ## name (DT0, DT1, &env->fp_status);     \
+    }                                                           \
+    F_HELPER(name, q)                                           \
+    {                                                           \
+        QT0 = float128_ ## name (QT0, QT1, &env->fp_status);    \
+    }
+
+F_BINOP(add);
+F_BINOP(sub);
+F_BINOP(mul);
+F_BINOP(div);
+#undef F_BINOP
+
+void helper_fsmuld(float32 src1, float32 src2)
+{
+    DT0 = float64_mul(float32_to_float64(src1, &env->fp_status),
+                      float32_to_float64(src2, &env->fp_status),
+                      &env->fp_status);
+}
+
+void helper_fdmulq(void)
+{
+    QT0 = float128_mul(float64_to_float128(DT0, &env->fp_status),
+                       float64_to_float128(DT1, &env->fp_status),
+                       &env->fp_status);
+}
+
+float32 helper_fnegs(float32 src)
+{
+    return float32_chs(src);
+}
+
+#ifdef TARGET_SPARC64
+F_HELPER(neg, d)
+{
+    DT0 = float64_chs(DT1);
+}
+
+F_HELPER(neg, q)
+{
+    QT0 = float128_chs(QT1);
+}
+#endif
+
+/* Integer to float conversion.  */
+float32 helper_fitos(int32_t src)
+{
+    return int32_to_float32(src, &env->fp_status);
+}
+
+void helper_fitod(int32_t src)
+{
+    DT0 = int32_to_float64(src, &env->fp_status);
+}
+
+void helper_fitoq(int32_t src)
+{
+    QT0 = int32_to_float128(src, &env->fp_status);
+}
+
+#ifdef TARGET_SPARC64
+float32 helper_fxtos(void)
+{
+    return int64_to_float32(*((int64_t *)&DT1), &env->fp_status);
+}
+
+F_HELPER(xto, d)
+{
+    DT0 = int64_to_float64(*((int64_t *)&DT1), &env->fp_status);
+}
+
+F_HELPER(xto, q)
+{
+    QT0 = int64_to_float128(*((int64_t *)&DT1), &env->fp_status);
+}
+#endif
+#undef F_HELPER
+
+/* floating point conversion */
+float32 helper_fdtos(void)
+{
+    return float64_to_float32(DT1, &env->fp_status);
+}
+
+void helper_fstod(float32 src)
+{
+    DT0 = float32_to_float64(src, &env->fp_status);
+}
+
+float32 helper_fqtos(void)
+{
+    return float128_to_float32(QT1, &env->fp_status);
+}
+
+void helper_fstoq(float32 src)
+{
+    QT0 = float32_to_float128(src, &env->fp_status);
+}
+
+void helper_fqtod(void)
+{
+    DT0 = float128_to_float64(QT1, &env->fp_status);
+}
+
+void helper_fdtoq(void)
+{
+    QT0 = float64_to_float128(DT1, &env->fp_status);
+}
+
+/* Float to integer conversion.  */
+int32_t helper_fstoi(float32 src)
+{
+    return float32_to_int32_round_to_zero(src, &env->fp_status);
+}
+
+int32_t helper_fdtoi(void)
+{
+    return float64_to_int32_round_to_zero(DT1, &env->fp_status);
+}
+
+int32_t helper_fqtoi(void)
+{
+    return float128_to_int32_round_to_zero(QT1, &env->fp_status);
+}
+
+#ifdef TARGET_SPARC64
+void helper_fstox(float32 src)
+{
+    *((int64_t *)&DT0) = float32_to_int64_round_to_zero(src, &env->fp_status);
+}
+
+void helper_fdtox(void)
+{
+    *((int64_t *)&DT0) = float64_to_int64_round_to_zero(DT1, &env->fp_status);
+}
+
+void helper_fqtox(void)
+{
+    *((int64_t *)&DT0) = float128_to_int64_round_to_zero(QT1, &env->fp_status);
+}
+#endif
+
+float32 helper_fabss(float32 src)
+{
+    return float32_abs(src);
+}
+
+#ifdef TARGET_SPARC64
+void helper_fabsd(void)
+{
+    DT0 = float64_abs(DT1);
+}
+
+void helper_fabsq(void)
+{
+    QT0 = float128_abs(QT1);
+}
+#endif
+
+float32 helper_fsqrts(float32 src)
+{
+    return float32_sqrt(src, &env->fp_status);
+}
+
+void helper_fsqrtd(void)
+{
+    DT0 = float64_sqrt(DT1, &env->fp_status);
+}
+
+void helper_fsqrtq(void)
+{
+    QT0 = float128_sqrt(QT1, &env->fp_status);
+}
+
+#define GEN_FCMP(name, size, reg1, reg2, FS, E)                         \
+    void glue(helper_, name) (void)                                     \
+    {                                                                   \
+        env->fsr &= FSR_FTT_NMASK;                                      \
+        if (E && (glue(size, _is_any_nan)(reg1) ||                      \
+                  glue(size, _is_any_nan)(reg2)) &&                     \
+            (env->fsr & FSR_NVM)) {                                     \
+            env->fsr |= FSR_NVC;                                        \
+            env->fsr |= FSR_FTT_IEEE_EXCP;                              \
+            helper_raise_exception(env, TT_FP_EXCP);                    \
+        }                                                               \
+        switch (glue(size, _compare) (reg1, reg2, &env->fp_status)) {   \
+        case float_relation_unordered:                                  \
+            if ((env->fsr & FSR_NVM)) {                                 \
+                env->fsr |= FSR_NVC;                                    \
+                env->fsr |= FSR_FTT_IEEE_EXCP;                          \
+                helper_raise_exception(env, TT_FP_EXCP);                \
+            } else {                                                    \
+                env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);             \
+                env->fsr |= (FSR_FCC1 | FSR_FCC0) << FS;                \
+                env->fsr |= FSR_NVA;                                    \
+            }                                                           \
+            break;                                                      \
+        case float_relation_less:                                       \
+            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
+            env->fsr |= FSR_FCC0 << FS;                                 \
+            break;                                                      \
+        case float_relation_greater:                                    \
+            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
+            env->fsr |= FSR_FCC1 << FS;                                 \
+            break;                                                      \
+        default:                                                        \
+            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
+            break;                                                      \
+        }                                                               \
+    }
+#define GEN_FCMPS(name, size, FS, E)                                    \
+    void glue(helper_, name)(float32 src1, float32 src2)                \
+    {                                                                   \
+        env->fsr &= FSR_FTT_NMASK;                                      \
+        if (E && (glue(size, _is_any_nan)(src1) ||                      \
+                  glue(size, _is_any_nan)(src2)) &&                     \
+            (env->fsr & FSR_NVM)) {                                     \
+            env->fsr |= FSR_NVC;                                        \
+            env->fsr |= FSR_FTT_IEEE_EXCP;                              \
+            helper_raise_exception(env, TT_FP_EXCP);                    \
+        }                                                               \
+        switch (glue(size, _compare) (src1, src2, &env->fp_status)) {   \
+        case float_relation_unordered:                                  \
+            if ((env->fsr & FSR_NVM)) {                                 \
+                env->fsr |= FSR_NVC;                                    \
+                env->fsr |= FSR_FTT_IEEE_EXCP;                          \
+                helper_raise_exception(env, TT_FP_EXCP);                \
+            } else {                                                    \
+                env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);             \
+                env->fsr |= (FSR_FCC1 | FSR_FCC0) << FS;                \
+                env->fsr |= FSR_NVA;                                    \
+            }                                                           \
+            break;                                                      \
+        case float_relation_less:                                       \
+            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
+            env->fsr |= FSR_FCC0 << FS;                                 \
+            break;                                                      \
+        case float_relation_greater:                                    \
+            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
+            env->fsr |= FSR_FCC1 << FS;                                 \
+            break;                                                      \
+        default:                                                        \
+            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
+            break;                                                      \
+        }                                                               \
+    }
+
+GEN_FCMPS(fcmps, float32, 0, 0);
+GEN_FCMP(fcmpd, float64, DT0, DT1, 0, 0);
+
+GEN_FCMPS(fcmpes, float32, 0, 1);
+GEN_FCMP(fcmped, float64, DT0, DT1, 0, 1);
+
+GEN_FCMP(fcmpq, float128, QT0, QT1, 0, 0);
+GEN_FCMP(fcmpeq, float128, QT0, QT1, 0, 1);
+
+#ifdef TARGET_SPARC64
+GEN_FCMPS(fcmps_fcc1, float32, 22, 0);
+GEN_FCMP(fcmpd_fcc1, float64, DT0, DT1, 22, 0);
+GEN_FCMP(fcmpq_fcc1, float128, QT0, QT1, 22, 0);
+
+GEN_FCMPS(fcmps_fcc2, float32, 24, 0);
+GEN_FCMP(fcmpd_fcc2, float64, DT0, DT1, 24, 0);
+GEN_FCMP(fcmpq_fcc2, float128, QT0, QT1, 24, 0);
+
+GEN_FCMPS(fcmps_fcc3, float32, 26, 0);
+GEN_FCMP(fcmpd_fcc3, float64, DT0, DT1, 26, 0);
+GEN_FCMP(fcmpq_fcc3, float128, QT0, QT1, 26, 0);
+
+GEN_FCMPS(fcmpes_fcc1, float32, 22, 1);
+GEN_FCMP(fcmped_fcc1, float64, DT0, DT1, 22, 1);
+GEN_FCMP(fcmpeq_fcc1, float128, QT0, QT1, 22, 1);
+
+GEN_FCMPS(fcmpes_fcc2, float32, 24, 1);
+GEN_FCMP(fcmped_fcc2, float64, DT0, DT1, 24, 1);
+GEN_FCMP(fcmpeq_fcc2, float128, QT0, QT1, 24, 1);
+
+GEN_FCMPS(fcmpes_fcc3, float32, 26, 1);
+GEN_FCMP(fcmped_fcc3, float64, DT0, DT1, 26, 1);
+GEN_FCMP(fcmpeq_fcc3, float128, QT0, QT1, 26, 1);
+#endif
+#undef GEN_FCMPS
+
+void helper_check_ieee_exceptions(void)
+{
+    target_ulong status;
+
+    status = get_float_exception_flags(&env->fp_status);
+    if (status) {
+        /* Copy IEEE 754 flags into FSR */
+        if (status & float_flag_invalid) {
+            env->fsr |= FSR_NVC;
+        }
+        if (status & float_flag_overflow) {
+            env->fsr |= FSR_OFC;
+        }
+        if (status & float_flag_underflow) {
+            env->fsr |= FSR_UFC;
+        }
+        if (status & float_flag_divbyzero) {
+            env->fsr |= FSR_DZC;
+        }
+        if (status & float_flag_inexact) {
+            env->fsr |= FSR_NXC;
+        }
+
+        if ((env->fsr & FSR_CEXC_MASK) & ((env->fsr & FSR_TEM_MASK) >> 23)) {
+            /* Unmasked exception, generate a trap */
+            env->fsr |= FSR_FTT_IEEE_EXCP;
+            helper_raise_exception(env, TT_FP_EXCP);
+        } else {
+            /* Accumulate exceptions */
+            env->fsr |= (env->fsr & FSR_CEXC_MASK) << 5;
+        }
+    }
+}
+
+void helper_clear_float_exceptions(void)
+{
+    set_float_exception_flags(0, &env->fp_status);
+}
+
+static inline void set_fsr(void)
+{
+    int rnd_mode;
+
+    switch (env->fsr & FSR_RD_MASK) {
+    case FSR_RD_NEAREST:
+        rnd_mode = float_round_nearest_even;
+        break;
+    default:
+    case FSR_RD_ZERO:
+        rnd_mode = float_round_to_zero;
+        break;
+    case FSR_RD_POS:
+        rnd_mode = float_round_up;
+        break;
+    case FSR_RD_NEG:
+        rnd_mode = float_round_down;
+        break;
+    }
+    set_float_rounding_mode(rnd_mode, &env->fp_status);
+}
+
+void helper_ldfsr(uint32_t new_fsr)
+{
+    env->fsr = (new_fsr & FSR_LDFSR_MASK) | (env->fsr & FSR_LDFSR_OLDMASK);
+    set_fsr();
+}
+
+#ifdef TARGET_SPARC64
+void helper_ldxfsr(uint64_t new_fsr)
+{
+    env->fsr = (new_fsr & FSR_LDXFSR_MASK) | (env->fsr & FSR_LDXFSR_OLDMASK);
+    set_fsr();
+}
+#endif
diff --git a/target-sparc/op_helper.c b/target-sparc/op_helper.c
index f2bca77..fd20366 100644
--- a/target-sparc/op_helper.c
+++ b/target-sparc/op_helper.c
@@ -333,655 +333,6 @@ void helper_check_align(target_ulong addr, uint32_t align)
     }
 }
 
-#define F_HELPER(name, p) void helper_f##name##p(void)
-
-#define F_BINOP(name)                                           \
-    float32 helper_f ## name ## s (float32 src1, float32 src2)  \
-    {                                                           \
-        return float32_ ## name (src1, src2, &env->fp_status);  \
-    }                                                           \
-    F_HELPER(name, d)                                           \
-    {                                                           \
-        DT0 = float64_ ## name (DT0, DT1, &env->fp_status);     \
-    }                                                           \
-    F_HELPER(name, q)                                           \
-    {                                                           \
-        QT0 = float128_ ## name (QT0, QT1, &env->fp_status);    \
-    }
-
-F_BINOP(add);
-F_BINOP(sub);
-F_BINOP(mul);
-F_BINOP(div);
-#undef F_BINOP
-
-void helper_fsmuld(float32 src1, float32 src2)
-{
-    DT0 = float64_mul(float32_to_float64(src1, &env->fp_status),
-                      float32_to_float64(src2, &env->fp_status),
-                      &env->fp_status);
-}
-
-void helper_fdmulq(void)
-{
-    QT0 = float128_mul(float64_to_float128(DT0, &env->fp_status),
-                       float64_to_float128(DT1, &env->fp_status),
-                       &env->fp_status);
-}
-
-float32 helper_fnegs(float32 src)
-{
-    return float32_chs(src);
-}
-
-#ifdef TARGET_SPARC64
-F_HELPER(neg, d)
-{
-    DT0 = float64_chs(DT1);
-}
-
-F_HELPER(neg, q)
-{
-    QT0 = float128_chs(QT1);
-}
-#endif
-
-/* Integer to float conversion.  */
-float32 helper_fitos(int32_t src)
-{
-    return int32_to_float32(src, &env->fp_status);
-}
-
-void helper_fitod(int32_t src)
-{
-    DT0 = int32_to_float64(src, &env->fp_status);
-}
-
-void helper_fitoq(int32_t src)
-{
-    QT0 = int32_to_float128(src, &env->fp_status);
-}
-
-#ifdef TARGET_SPARC64
-float32 helper_fxtos(void)
-{
-    return int64_to_float32(*((int64_t *)&DT1), &env->fp_status);
-}
-
-F_HELPER(xto, d)
-{
-    DT0 = int64_to_float64(*((int64_t *)&DT1), &env->fp_status);
-}
-
-F_HELPER(xto, q)
-{
-    QT0 = int64_to_float128(*((int64_t *)&DT1), &env->fp_status);
-}
-#endif
-#undef F_HELPER
-
-/* floating point conversion */
-float32 helper_fdtos(void)
-{
-    return float64_to_float32(DT1, &env->fp_status);
-}
-
-void helper_fstod(float32 src)
-{
-    DT0 = float32_to_float64(src, &env->fp_status);
-}
-
-float32 helper_fqtos(void)
-{
-    return float128_to_float32(QT1, &env->fp_status);
-}
-
-void helper_fstoq(float32 src)
-{
-    QT0 = float32_to_float128(src, &env->fp_status);
-}
-
-void helper_fqtod(void)
-{
-    DT0 = float128_to_float64(QT1, &env->fp_status);
-}
-
-void helper_fdtoq(void)
-{
-    QT0 = float64_to_float128(DT1, &env->fp_status);
-}
-
-/* Float to integer conversion.  */
-int32_t helper_fstoi(float32 src)
-{
-    return float32_to_int32_round_to_zero(src, &env->fp_status);
-}
-
-int32_t helper_fdtoi(void)
-{
-    return float64_to_int32_round_to_zero(DT1, &env->fp_status);
-}
-
-int32_t helper_fqtoi(void)
-{
-    return float128_to_int32_round_to_zero(QT1, &env->fp_status);
-}
-
-#ifdef TARGET_SPARC64
-void helper_fstox(float32 src)
-{
-    *((int64_t *)&DT0) = float32_to_int64_round_to_zero(src, &env->fp_status);
-}
-
-void helper_fdtox(void)
-{
-    *((int64_t *)&DT0) = float64_to_int64_round_to_zero(DT1, &env->fp_status);
-}
-
-void helper_fqtox(void)
-{
-    *((int64_t *)&DT0) = float128_to_int64_round_to_zero(QT1, &env->fp_status);
-}
-
-void helper_faligndata(void)
-{
-    uint64_t tmp;
-
-    tmp = (*((uint64_t *)&DT0)) << ((env->gsr & 7) * 8);
-    /* on many architectures a shift of 64 does nothing */
-    if ((env->gsr & 7) != 0) {
-        tmp |= (*((uint64_t *)&DT1)) >> (64 - (env->gsr & 7) * 8);
-    }
-    *((uint64_t *)&DT0) = tmp;
-}
-
-#ifdef HOST_WORDS_BIGENDIAN
-#define VIS_B64(n) b[7 - (n)]
-#define VIS_W64(n) w[3 - (n)]
-#define VIS_SW64(n) sw[3 - (n)]
-#define VIS_L64(n) l[1 - (n)]
-#define VIS_B32(n) b[3 - (n)]
-#define VIS_W32(n) w[1 - (n)]
-#else
-#define VIS_B64(n) b[n]
-#define VIS_W64(n) w[n]
-#define VIS_SW64(n) sw[n]
-#define VIS_L64(n) l[n]
-#define VIS_B32(n) b[n]
-#define VIS_W32(n) w[n]
-#endif
-
-typedef union {
-    uint8_t b[8];
-    uint16_t w[4];
-    int16_t sw[4];
-    uint32_t l[2];
-    uint64_t ll;
-    float64 d;
-} VIS64;
-
-typedef union {
-    uint8_t b[4];
-    uint16_t w[2];
-    uint32_t l;
-    float32 f;
-} VIS32;
-
-void helper_fpmerge(void)
-{
-    VIS64 s, d;
-
-    s.d = DT0;
-    d.d = DT1;
-
-    /* Reverse calculation order to handle overlap */
-    d.VIS_B64(7) = s.VIS_B64(3);
-    d.VIS_B64(6) = d.VIS_B64(3);
-    d.VIS_B64(5) = s.VIS_B64(2);
-    d.VIS_B64(4) = d.VIS_B64(2);
-    d.VIS_B64(3) = s.VIS_B64(1);
-    d.VIS_B64(2) = d.VIS_B64(1);
-    d.VIS_B64(1) = s.VIS_B64(0);
-    /* d.VIS_B64(0) = d.VIS_B64(0); */
-
-    DT0 = d.d;
-}
-
-void helper_fmul8x16(void)
-{
-    VIS64 s, d;
-    uint32_t tmp;
-
-    s.d = DT0;
-    d.d = DT1;
-
-#define PMUL(r)                                                 \
-    tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r);       \
-    if ((tmp & 0xff) > 0x7f) {                                  \
-        tmp += 0x100;                                           \
-    }                                                           \
-    d.VIS_W64(r) = tmp >> 8;
-
-    PMUL(0);
-    PMUL(1);
-    PMUL(2);
-    PMUL(3);
-#undef PMUL
-
-    DT0 = d.d;
-}
-
-void helper_fmul8x16al(void)
-{
-    VIS64 s, d;
-    uint32_t tmp;
-
-    s.d = DT0;
-    d.d = DT1;
-
-#define PMUL(r)                                                 \
-    tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r);       \
-    if ((tmp & 0xff) > 0x7f) {                                  \
-        tmp += 0x100;                                           \
-    }                                                           \
-    d.VIS_W64(r) = tmp >> 8;
-
-    PMUL(0);
-    PMUL(1);
-    PMUL(2);
-    PMUL(3);
-#undef PMUL
-
-    DT0 = d.d;
-}
-
-void helper_fmul8x16au(void)
-{
-    VIS64 s, d;
-    uint32_t tmp;
-
-    s.d = DT0;
-    d.d = DT1;
-
-#define PMUL(r)                                                 \
-    tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r);       \
-    if ((tmp & 0xff) > 0x7f) {                                  \
-        tmp += 0x100;                                           \
-    }                                                           \
-    d.VIS_W64(r) = tmp >> 8;
-
-    PMUL(0);
-    PMUL(1);
-    PMUL(2);
-    PMUL(3);
-#undef PMUL
-
-    DT0 = d.d;
-}
-
-void helper_fmul8sux16(void)
-{
-    VIS64 s, d;
-    uint32_t tmp;
-
-    s.d = DT0;
-    d.d = DT1;
-
-#define PMUL(r)                                                         \
-    tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
-    if ((tmp & 0xff) > 0x7f) {                                          \
-        tmp += 0x100;                                                   \
-    }                                                                   \
-    d.VIS_W64(r) = tmp >> 8;
-
-    PMUL(0);
-    PMUL(1);
-    PMUL(2);
-    PMUL(3);
-#undef PMUL
-
-    DT0 = d.d;
-}
-
-void helper_fmul8ulx16(void)
-{
-    VIS64 s, d;
-    uint32_t tmp;
-
-    s.d = DT0;
-    d.d = DT1;
-
-#define PMUL(r)                                                         \
-    tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
-    if ((tmp & 0xff) > 0x7f) {                                          \
-        tmp += 0x100;                                                   \
-    }                                                                   \
-    d.VIS_W64(r) = tmp >> 8;
-
-    PMUL(0);
-    PMUL(1);
-    PMUL(2);
-    PMUL(3);
-#undef PMUL
-
-    DT0 = d.d;
-}
-
-void helper_fmuld8sux16(void)
-{
-    VIS64 s, d;
-    uint32_t tmp;
-
-    s.d = DT0;
-    d.d = DT1;
-
-#define PMUL(r)                                                         \
-    tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
-    if ((tmp & 0xff) > 0x7f) {                                          \
-        tmp += 0x100;                                                   \
-    }                                                                   \
-    d.VIS_L64(r) = tmp;
-
-    /* Reverse calculation order to handle overlap */
-    PMUL(1);
-    PMUL(0);
-#undef PMUL
-
-    DT0 = d.d;
-}
-
-void helper_fmuld8ulx16(void)
-{
-    VIS64 s, d;
-    uint32_t tmp;
-
-    s.d = DT0;
-    d.d = DT1;
-
-#define PMUL(r)                                                         \
-    tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
-    if ((tmp & 0xff) > 0x7f) {                                          \
-        tmp += 0x100;                                                   \
-    }                                                                   \
-    d.VIS_L64(r) = tmp;
-
-    /* Reverse calculation order to handle overlap */
-    PMUL(1);
-    PMUL(0);
-#undef PMUL
-
-    DT0 = d.d;
-}
-
-void helper_fexpand(void)
-{
-    VIS32 s;
-    VIS64 d;
-
-    s.l = (uint32_t)(*(uint64_t *)&DT0 & 0xffffffff);
-    d.d = DT1;
-    d.VIS_W64(0) = s.VIS_B32(0) << 4;
-    d.VIS_W64(1) = s.VIS_B32(1) << 4;
-    d.VIS_W64(2) = s.VIS_B32(2) << 4;
-    d.VIS_W64(3) = s.VIS_B32(3) << 4;
-
-    DT0 = d.d;
-}
-
-#define VIS_HELPER(name, F)                             \
-    void name##16(void)                                 \
-    {                                                   \
-        VIS64 s, d;                                     \
-                                                        \
-        s.d = DT0;                                      \
-        d.d = DT1;                                      \
-                                                        \
-        d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0));   \
-        d.VIS_W64(1) = F(d.VIS_W64(1), s.VIS_W64(1));   \
-        d.VIS_W64(2) = F(d.VIS_W64(2), s.VIS_W64(2));   \
-        d.VIS_W64(3) = F(d.VIS_W64(3), s.VIS_W64(3));   \
-                                                        \
-        DT0 = d.d;                                      \
-    }                                                   \
-                                                        \
-    uint32_t name##16s(uint32_t src1, uint32_t src2)    \
-    {                                                   \
-        VIS32 s, d;                                     \
-                                                        \
-        s.l = src1;                                     \
-        d.l = src2;                                     \
-                                                        \
-        d.VIS_W32(0) = F(d.VIS_W32(0), s.VIS_W32(0));   \
-        d.VIS_W32(1) = F(d.VIS_W32(1), s.VIS_W32(1));   \
-                                                        \
-        return d.l;                                     \
-    }                                                   \
-                                                        \
-    void name##32(void)                                 \
-    {                                                   \
-        VIS64 s, d;                                     \
-                                                        \
-        s.d = DT0;                                      \
-        d.d = DT1;                                      \
-                                                        \
-        d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0));   \
-        d.VIS_L64(1) = F(d.VIS_L64(1), s.VIS_L64(1));   \
-                                                        \
-        DT0 = d.d;                                      \
-    }                                                   \
-                                                        \
-    uint32_t name##32s(uint32_t src1, uint32_t src2)    \
-    {                                                   \
-        VIS32 s, d;                                     \
-                                                        \
-        s.l = src1;                                     \
-        d.l = src2;                                     \
-                                                        \
-        d.l = F(d.l, s.l);                              \
-                                                        \
-        return d.l;                                     \
-    }
-
-#define FADD(a, b) ((a) + (b))
-#define FSUB(a, b) ((a) - (b))
-VIS_HELPER(helper_fpadd, FADD)
-VIS_HELPER(helper_fpsub, FSUB)
-
-#define VIS_CMPHELPER(name, F)                                    \
-    uint64_t name##16(void)                                       \
-    {                                                             \
-        VIS64 s, d;                                               \
-                                                                  \
-        s.d = DT0;                                                \
-        d.d = DT1;                                                \
-                                                                  \
-        d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0;     \
-        d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0;    \
-        d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0;    \
-        d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0;    \
-        d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0;           \
-                                                                  \
-        return d.ll;                                              \
-    }                                                             \
-                                                                  \
-    uint64_t name##32(void)                                       \
-    {                                                             \
-        VIS64 s, d;                                               \
-                                                                  \
-        s.d = DT0;                                                \
-        d.d = DT1;                                                \
-                                                                  \
-        d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0;     \
-        d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0;    \
-        d.VIS_L64(1) = 0;                                         \
-                                                                  \
-        return d.ll;                                              \
-    }
-
-#define FCMPGT(a, b) ((a) > (b))
-#define FCMPEQ(a, b) ((a) == (b))
-#define FCMPLE(a, b) ((a) <= (b))
-#define FCMPNE(a, b) ((a) != (b))
-
-VIS_CMPHELPER(helper_fcmpgt, FCMPGT)
-VIS_CMPHELPER(helper_fcmpeq, FCMPEQ)
-VIS_CMPHELPER(helper_fcmple, FCMPLE)
-VIS_CMPHELPER(helper_fcmpne, FCMPNE)
-#endif
-
-void helper_check_ieee_exceptions(void)
-{
-    target_ulong status;
-
-    status = get_float_exception_flags(&env->fp_status);
-    if (status) {
-        /* Copy IEEE 754 flags into FSR */
-        if (status & float_flag_invalid) {
-            env->fsr |= FSR_NVC;
-        }
-        if (status & float_flag_overflow) {
-            env->fsr |= FSR_OFC;
-        }
-        if (status & float_flag_underflow) {
-            env->fsr |= FSR_UFC;
-        }
-        if (status & float_flag_divbyzero) {
-            env->fsr |= FSR_DZC;
-        }
-        if (status & float_flag_inexact) {
-            env->fsr |= FSR_NXC;
-        }
-
-        if ((env->fsr & FSR_CEXC_MASK) & ((env->fsr & FSR_TEM_MASK) >> 23)) {
-            /* Unmasked exception, generate a trap */
-            env->fsr |= FSR_FTT_IEEE_EXCP;
-            helper_raise_exception(env, TT_FP_EXCP);
-        } else {
-            /* Accumulate exceptions */
-            env->fsr |= (env->fsr & FSR_CEXC_MASK) << 5;
-        }
-    }
-}
-
-void helper_clear_float_exceptions(void)
-{
-    set_float_exception_flags(0, &env->fp_status);
-}
-
-float32 helper_fabss(float32 src)
-{
-    return float32_abs(src);
-}
-
-#ifdef TARGET_SPARC64
-void helper_fabsd(void)
-{
-    DT0 = float64_abs(DT1);
-}
-
-void helper_fabsq(void)
-{
-    QT0 = float128_abs(QT1);
-}
-#endif
-
-float32 helper_fsqrts(float32 src)
-{
-    return float32_sqrt(src, &env->fp_status);
-}
-
-void helper_fsqrtd(void)
-{
-    DT0 = float64_sqrt(DT1, &env->fp_status);
-}
-
-void helper_fsqrtq(void)
-{
-    QT0 = float128_sqrt(QT1, &env->fp_status);
-}
-
-#define GEN_FCMP(name, size, reg1, reg2, FS, E)                         \
-    void glue(helper_, name) (void)                                     \
-    {                                                                   \
-        env->fsr &= FSR_FTT_NMASK;                                      \
-        if (E && (glue(size, _is_any_nan)(reg1) ||                      \
-                  glue(size, _is_any_nan)(reg2)) &&                     \
-            (env->fsr & FSR_NVM)) {                                     \
-            env->fsr |= FSR_NVC;                                        \
-            env->fsr |= FSR_FTT_IEEE_EXCP;                              \
-            helper_raise_exception(env, TT_FP_EXCP);                    \
-        }                                                               \
-        switch (glue(size, _compare) (reg1, reg2, &env->fp_status)) {   \
-        case float_relation_unordered:                                  \
-            if ((env->fsr & FSR_NVM)) {                                 \
-                env->fsr |= FSR_NVC;                                    \
-                env->fsr |= FSR_FTT_IEEE_EXCP;                          \
-                helper_raise_exception(env, TT_FP_EXCP);                \
-            } else {                                                    \
-                env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);             \
-                env->fsr |= (FSR_FCC1 | FSR_FCC0) << FS;                \
-                env->fsr |= FSR_NVA;                                    \
-            }                                                           \
-            break;                                                      \
-        case float_relation_less:                                       \
-            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
-            env->fsr |= FSR_FCC0 << FS;                                 \
-            break;                                                      \
-        case float_relation_greater:                                    \
-            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
-            env->fsr |= FSR_FCC1 << FS;                                 \
-            break;                                                      \
-        default:                                                        \
-            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
-            break;                                                      \
-        }                                                               \
-    }
-#define GEN_FCMPS(name, size, FS, E)                                    \
-    void glue(helper_, name)(float32 src1, float32 src2)                \
-    {                                                                   \
-        env->fsr &= FSR_FTT_NMASK;                                      \
-        if (E && (glue(size, _is_any_nan)(src1) ||                      \
-                  glue(size, _is_any_nan)(src2)) &&                     \
-            (env->fsr & FSR_NVM)) {                                     \
-            env->fsr |= FSR_NVC;                                        \
-            env->fsr |= FSR_FTT_IEEE_EXCP;                              \
-            helper_raise_exception(env, TT_FP_EXCP);                    \
-        }                                                               \
-        switch (glue(size, _compare) (src1, src2, &env->fp_status)) {   \
-        case float_relation_unordered:                                  \
-            if ((env->fsr & FSR_NVM)) {                                 \
-                env->fsr |= FSR_NVC;                                    \
-                env->fsr |= FSR_FTT_IEEE_EXCP;                          \
-                helper_raise_exception(env, TT_FP_EXCP);                \
-            } else {                                                    \
-                env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);             \
-                env->fsr |= (FSR_FCC1 | FSR_FCC0) << FS;                \
-                env->fsr |= FSR_NVA;                                    \
-            }                                                           \
-            break;                                                      \
-        case float_relation_less:                                       \
-            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
-            env->fsr |= FSR_FCC0 << FS;                                 \
-            break;                                                      \
-        case float_relation_greater:                                    \
-            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
-            env->fsr |= FSR_FCC1 << FS;                                 \
-            break;                                                      \
-        default:                                                        \
-            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
-            break;                                                      \
-        }                                                               \
-    }
-
-GEN_FCMPS(fcmps, float32, 0, 0);
-GEN_FCMP(fcmpd, float64, DT0, DT1, 0, 0);
-
-GEN_FCMPS(fcmpes, float32, 0, 1);
-GEN_FCMP(fcmped, float64, DT0, DT1, 0, 1);
-
-GEN_FCMP(fcmpq, float128, QT0, QT1, 0, 0);
-GEN_FCMP(fcmpeq, float128, QT0, QT1, 0, 1);
-
 static uint32_t compute_all_flags(void)
 {
     return env->psr & PSR_ICC;
@@ -1580,33 +931,6 @@ int cpu_cwp_dec(CPUState *env1, int cwp)
     return ret;
 }
 
-#ifdef TARGET_SPARC64
-GEN_FCMPS(fcmps_fcc1, float32, 22, 0);
-GEN_FCMP(fcmpd_fcc1, float64, DT0, DT1, 22, 0);
-GEN_FCMP(fcmpq_fcc1, float128, QT0, QT1, 22, 0);
-
-GEN_FCMPS(fcmps_fcc2, float32, 24, 0);
-GEN_FCMP(fcmpd_fcc2, float64, DT0, DT1, 24, 0);
-GEN_FCMP(fcmpq_fcc2, float128, QT0, QT1, 24, 0);
-
-GEN_FCMPS(fcmps_fcc3, float32, 26, 0);
-GEN_FCMP(fcmpd_fcc3, float64, DT0, DT1, 26, 0);
-GEN_FCMP(fcmpq_fcc3, float128, QT0, QT1, 26, 0);
-
-GEN_FCMPS(fcmpes_fcc1, float32, 22, 1);
-GEN_FCMP(fcmped_fcc1, float64, DT0, DT1, 22, 1);
-GEN_FCMP(fcmpeq_fcc1, float128, QT0, QT1, 22, 1);
-
-GEN_FCMPS(fcmpes_fcc2, float32, 24, 1);
-GEN_FCMP(fcmped_fcc2, float64, DT0, DT1, 24, 1);
-GEN_FCMP(fcmpeq_fcc2, float128, QT0, QT1, 24, 1);
-
-GEN_FCMPS(fcmpes_fcc3, float32, 26, 1);
-GEN_FCMP(fcmped_fcc3, float64, DT0, DT1, 26, 1);
-GEN_FCMP(fcmpeq_fcc3, float128, QT0, QT1, 26, 1);
-#endif
-#undef GEN_FCMPS
-
 #if !defined(TARGET_SPARC64) && !defined(CONFIG_USER_ONLY) &&   \
     defined(DEBUG_MXCC)
 static void dump_mxcc(CPUState *env)
@@ -3748,42 +3072,6 @@ void helper_stqf(target_ulong addr, int mem_idx)
 #endif
 }
 
-static inline void set_fsr(void)
-{
-    int rnd_mode;
-
-    switch (env->fsr & FSR_RD_MASK) {
-    case FSR_RD_NEAREST:
-        rnd_mode = float_round_nearest_even;
-        break;
-    default:
-    case FSR_RD_ZERO:
-        rnd_mode = float_round_to_zero;
-        break;
-    case FSR_RD_POS:
-        rnd_mode = float_round_up;
-        break;
-    case FSR_RD_NEG:
-        rnd_mode = float_round_down;
-        break;
-    }
-    set_float_rounding_mode(rnd_mode, &env->fp_status);
-}
-
-void helper_ldfsr(uint32_t new_fsr)
-{
-    env->fsr = (new_fsr & FSR_LDFSR_MASK) | (env->fsr & FSR_LDFSR_OLDMASK);
-    set_fsr();
-}
-
-#ifdef TARGET_SPARC64
-void helper_ldxfsr(uint64_t new_fsr)
-{
-    env->fsr = (new_fsr & FSR_LDXFSR_MASK) | (env->fsr & FSR_LDXFSR_OLDMASK);
-    set_fsr();
-}
-#endif
-
 #ifndef TARGET_SPARC64
 /* XXX: use another pointer for %iN registers to avoid slow wrapping
    handling ? */
@@ -3993,37 +3281,6 @@ void helper_wrcwp(target_ulong new_cwp)
     put_cwp64(new_cwp);
 }
 
-/* This function uses non-native bit order */
-#define GET_FIELD(X, FROM, TO)                                  \
-    ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1))
-
-/* This function uses the order in the manuals, i.e. bit 0 is 2^0 */
-#define GET_FIELD_SP(X, FROM, TO)               \
-    GET_FIELD(X, 63 - (TO), 63 - (FROM))
-
-target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
-{
-    return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) |
-        (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) |
-        (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) |
-        (GET_FIELD_SP(pixel_addr, 56, 59) << 13) |
-        (GET_FIELD_SP(pixel_addr, 35, 38) << 9) |
-        (GET_FIELD_SP(pixel_addr, 13, 16) << 5) |
-        (((pixel_addr >> 55) & 1) << 4) |
-        (GET_FIELD_SP(pixel_addr, 33, 34) << 2) |
-        GET_FIELD_SP(pixel_addr, 11, 12);
-}
-
-target_ulong helper_alignaddr(target_ulong addr, target_ulong offset)
-{
-    uint64_t tmp;
-
-    tmp = addr + offset;
-    env->gsr &= ~7ULL;
-    env->gsr |= tmp & 7ULL;
-    return tmp & ~7ULL;
-}
-
 static inline uint64_t *get_gregset(uint32_t pstate)
 {
     switch (pstate) {
diff --git a/target-sparc/vis_helper.c b/target-sparc/vis_helper.c
new file mode 100644
index 0000000..87a86ef
--- /dev/null
+++ b/target-sparc/vis_helper.c
@@ -0,0 +1,403 @@
+/*
+ * VIS op helpers
+ *
+ *  Copyright (c) 2003-2005 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "cpu.h"
+#include "dyngen-exec.h"
+#include "helper.h"
+
+#define DT0 (env->dt0)
+#define DT1 (env->dt1)
+#define QT0 (env->qt0)
+#define QT1 (env->qt1)
+
+/* This function uses non-native bit order */
+#define GET_FIELD(X, FROM, TO)                                  \
+    ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1))
+
+/* This function uses the order in the manuals, i.e. bit 0 is 2^0 */
+#define GET_FIELD_SP(X, FROM, TO)               \
+    GET_FIELD(X, 63 - (TO), 63 - (FROM))
+
+target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
+{
+    return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) |
+        (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) |
+        (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) |
+        (GET_FIELD_SP(pixel_addr, 56, 59) << 13) |
+        (GET_FIELD_SP(pixel_addr, 35, 38) << 9) |
+        (GET_FIELD_SP(pixel_addr, 13, 16) << 5) |
+        (((pixel_addr >> 55) & 1) << 4) |
+        (GET_FIELD_SP(pixel_addr, 33, 34) << 2) |
+        GET_FIELD_SP(pixel_addr, 11, 12);
+}
+
+target_ulong helper_alignaddr(target_ulong addr, target_ulong offset)
+{
+    uint64_t tmp;
+
+    tmp = addr + offset;
+    env->gsr &= ~7ULL;
+    env->gsr |= tmp & 7ULL;
+    return tmp & ~7ULL;
+}
+
+void helper_faligndata(void)
+{
+    uint64_t tmp;
+
+    tmp = (*((uint64_t *)&DT0)) << ((env->gsr & 7) * 8);
+    /* on many architectures a shift of 64 does nothing */
+    if ((env->gsr & 7) != 0) {
+        tmp |= (*((uint64_t *)&DT1)) >> (64 - (env->gsr & 7) * 8);
+    }
+    *((uint64_t *)&DT0) = tmp;
+}
+
+#ifdef HOST_WORDS_BIGENDIAN
+#define VIS_B64(n) b[7 - (n)]
+#define VIS_W64(n) w[3 - (n)]
+#define VIS_SW64(n) sw[3 - (n)]
+#define VIS_L64(n) l[1 - (n)]
+#define VIS_B32(n) b[3 - (n)]
+#define VIS_W32(n) w[1 - (n)]
+#else
+#define VIS_B64(n) b[n]
+#define VIS_W64(n) w[n]
+#define VIS_SW64(n) sw[n]
+#define VIS_L64(n) l[n]
+#define VIS_B32(n) b[n]
+#define VIS_W32(n) w[n]
+#endif
+
+typedef union {
+    uint8_t b[8];
+    uint16_t w[4];
+    int16_t sw[4];
+    uint32_t l[2];
+    uint64_t ll;
+    float64 d;
+} VIS64;
+
+typedef union {
+    uint8_t b[4];
+    uint16_t w[2];
+    uint32_t l;
+    float32 f;
+} VIS32;
+
+void helper_fpmerge(void)
+{
+    VIS64 s, d;
+
+    s.d = DT0;
+    d.d = DT1;
+
+    /* Reverse calculation order to handle overlap */
+    d.VIS_B64(7) = s.VIS_B64(3);
+    d.VIS_B64(6) = d.VIS_B64(3);
+    d.VIS_B64(5) = s.VIS_B64(2);
+    d.VIS_B64(4) = d.VIS_B64(2);
+    d.VIS_B64(3) = s.VIS_B64(1);
+    d.VIS_B64(2) = d.VIS_B64(1);
+    d.VIS_B64(1) = s.VIS_B64(0);
+    /* d.VIS_B64(0) = d.VIS_B64(0); */
+
+    DT0 = d.d;
+}
+
+void helper_fmul8x16(void)
+{
+    VIS64 s, d;
+    uint32_t tmp;
+
+    s.d = DT0;
+    d.d = DT1;
+
+#define PMUL(r)                                                 \
+    tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r);       \
+    if ((tmp & 0xff) > 0x7f) {                                  \
+        tmp += 0x100;                                           \
+    }                                                           \
+    d.VIS_W64(r) = tmp >> 8;
+
+    PMUL(0);
+    PMUL(1);
+    PMUL(2);
+    PMUL(3);
+#undef PMUL
+
+    DT0 = d.d;
+}
+
+void helper_fmul8x16al(void)
+{
+    VIS64 s, d;
+    uint32_t tmp;
+
+    s.d = DT0;
+    d.d = DT1;
+
+#define PMUL(r)                                                 \
+    tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r);       \
+    if ((tmp & 0xff) > 0x7f) {                                  \
+        tmp += 0x100;                                           \
+    }                                                           \
+    d.VIS_W64(r) = tmp >> 8;
+
+    PMUL(0);
+    PMUL(1);
+    PMUL(2);
+    PMUL(3);
+#undef PMUL
+
+    DT0 = d.d;
+}
+
+void helper_fmul8x16au(void)
+{
+    VIS64 s, d;
+    uint32_t tmp;
+
+    s.d = DT0;
+    d.d = DT1;
+
+#define PMUL(r)                                                 \
+    tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r);       \
+    if ((tmp & 0xff) > 0x7f) {                                  \
+        tmp += 0x100;                                           \
+    }                                                           \
+    d.VIS_W64(r) = tmp >> 8;
+
+    PMUL(0);
+    PMUL(1);
+    PMUL(2);
+    PMUL(3);
+#undef PMUL
+
+    DT0 = d.d;
+}
+
+void helper_fmul8sux16(void)
+{
+    VIS64 s, d;
+    uint32_t tmp;
+
+    s.d = DT0;
+    d.d = DT1;
+
+#define PMUL(r)                                                         \
+    tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
+    if ((tmp & 0xff) > 0x7f) {                                          \
+        tmp += 0x100;                                                   \
+    }                                                                   \
+    d.VIS_W64(r) = tmp >> 8;
+
+    PMUL(0);
+    PMUL(1);
+    PMUL(2);
+    PMUL(3);
+#undef PMUL
+
+    DT0 = d.d;
+}
+
+void helper_fmul8ulx16(void)
+{
+    VIS64 s, d;
+    uint32_t tmp;
+
+    s.d = DT0;
+    d.d = DT1;
+
+#define PMUL(r)                                                         \
+    tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
+    if ((tmp & 0xff) > 0x7f) {                                          \
+        tmp += 0x100;                                                   \
+    }                                                                   \
+    d.VIS_W64(r) = tmp >> 8;
+
+    PMUL(0);
+    PMUL(1);
+    PMUL(2);
+    PMUL(3);
+#undef PMUL
+
+    DT0 = d.d;
+}
+
+void helper_fmuld8sux16(void)
+{
+    VIS64 s, d;
+    uint32_t tmp;
+
+    s.d = DT0;
+    d.d = DT1;
+
+#define PMUL(r)                                                         \
+    tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
+    if ((tmp & 0xff) > 0x7f) {                                          \
+        tmp += 0x100;                                                   \
+    }                                                                   \
+    d.VIS_L64(r) = tmp;
+
+    /* Reverse calculation order to handle overlap */
+    PMUL(1);
+    PMUL(0);
+#undef PMUL
+
+    DT0 = d.d;
+}
+
+void helper_fmuld8ulx16(void)
+{
+    VIS64 s, d;
+    uint32_t tmp;
+
+    s.d = DT0;
+    d.d = DT1;
+
+#define PMUL(r)                                                         \
+    tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
+    if ((tmp & 0xff) > 0x7f) {                                          \
+        tmp += 0x100;                                                   \
+    }                                                                   \
+    d.VIS_L64(r) = tmp;
+
+    /* Reverse calculation order to handle overlap */
+    PMUL(1);
+    PMUL(0);
+#undef PMUL
+
+    DT0 = d.d;
+}
+
+void helper_fexpand(void)
+{
+    VIS32 s;
+    VIS64 d;
+
+    s.l = (uint32_t)(*(uint64_t *)&DT0 & 0xffffffff);
+    d.d = DT1;
+    d.VIS_W64(0) = s.VIS_B32(0) << 4;
+    d.VIS_W64(1) = s.VIS_B32(1) << 4;
+    d.VIS_W64(2) = s.VIS_B32(2) << 4;
+    d.VIS_W64(3) = s.VIS_B32(3) << 4;
+
+    DT0 = d.d;
+}
+
+#define VIS_HELPER(name, F)                             \
+    void name##16(void)                                 \
+    {                                                   \
+        VIS64 s, d;                                     \
+                                                        \
+        s.d = DT0;                                      \
+        d.d = DT1;                                      \
+                                                        \
+        d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0));   \
+        d.VIS_W64(1) = F(d.VIS_W64(1), s.VIS_W64(1));   \
+        d.VIS_W64(2) = F(d.VIS_W64(2), s.VIS_W64(2));   \
+        d.VIS_W64(3) = F(d.VIS_W64(3), s.VIS_W64(3));   \
+                                                        \
+        DT0 = d.d;                                      \
+    }                                                   \
+                                                        \
+    uint32_t name##16s(uint32_t src1, uint32_t src2)    \
+    {                                                   \
+        VIS32 s, d;                                     \
+                                                        \
+        s.l = src1;                                     \
+        d.l = src2;                                     \
+                                                        \
+        d.VIS_W32(0) = F(d.VIS_W32(0), s.VIS_W32(0));   \
+        d.VIS_W32(1) = F(d.VIS_W32(1), s.VIS_W32(1));   \
+                                                        \
+        return d.l;                                     \
+    }                                                   \
+                                                        \
+    void name##32(void)                                 \
+    {                                                   \
+        VIS64 s, d;                                     \
+                                                        \
+        s.d = DT0;                                      \
+        d.d = DT1;                                      \
+                                                        \
+        d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0));   \
+        d.VIS_L64(1) = F(d.VIS_L64(1), s.VIS_L64(1));   \
+                                                        \
+        DT0 = d.d;                                      \
+    }                                                   \
+                                                        \
+    uint32_t name##32s(uint32_t src1, uint32_t src2)    \
+    {                                                   \
+        VIS32 s, d;                                     \
+                                                        \
+        s.l = src1;                                     \
+        d.l = src2;                                     \
+                                                        \
+        d.l = F(d.l, s.l);                              \
+                                                        \
+        return d.l;                                     \
+    }
+
+#define FADD(a, b) ((a) + (b))
+#define FSUB(a, b) ((a) - (b))
+VIS_HELPER(helper_fpadd, FADD)
+VIS_HELPER(helper_fpsub, FSUB)
+
+#define VIS_CMPHELPER(name, F)                                    \
+    uint64_t name##16(void)                                       \
+    {                                                             \
+        VIS64 s, d;                                               \
+                                                                  \
+        s.d = DT0;                                                \
+        d.d = DT1;                                                \
+                                                                  \
+        d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0;     \
+        d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0;    \
+        d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0;    \
+        d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0;    \
+        d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0;           \
+                                                                  \
+        return d.ll;                                              \
+    }                                                             \
+                                                                  \
+    uint64_t name##32(void)                                       \
+    {                                                             \
+        VIS64 s, d;                                               \
+                                                                  \
+        s.d = DT0;                                                \
+        d.d = DT1;                                                \
+                                                                  \
+        d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0;     \
+        d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0;    \
+        d.VIS_L64(1) = 0;                                         \
+                                                                  \
+        return d.ll;                                              \
+    }
+
+#define FCMPGT(a, b) ((a) > (b))
+#define FCMPEQ(a, b) ((a) == (b))
+#define FCMPLE(a, b) ((a) <= (b))
+#define FCMPNE(a, b) ((a) != (b))
+
+VIS_CMPHELPER(helper_fcmpgt, FCMPGT)
+VIS_CMPHELPER(helper_fcmpeq, FCMPEQ)
+VIS_CMPHELPER(helper_fcmple, FCMPLE)
+VIS_CMPHELPER(helper_fcmpne, FCMPNE)
-- 
2.7.4