valgrind.h: Update to latest upstream version
authorDavid King <amigadave@amigadave.com>
Fri, 16 May 2014 11:32:49 +0000 (12:32 +0100)
committerDavid King <amigadave@amigadave.com>
Wed, 4 Jun 2014 11:49:28 +0000 (12:49 +0100)
Avoid crashes in g_type_free_instance() on mingw64.

https://bugzilla.gnome.org/show_bug.cgi?id=730198

glib/valgrind.h

index 7bb1382..a4380d1 100644 (file)
@@ -12,7 +12,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2000-2012 Julian Seward.  All rights reserved.
+   Copyright (C) 2000-2013 Julian Seward.  All rights reserved.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions
@@ -89,7 +89,7 @@
         || (__VALGRIND_MAJOR__ == 3 && __VALGRIND_MINOR__ >= 6))
 */
 #define __VALGRIND_MAJOR__    3
-#define __VALGRIND_MINOR__    8
+#define __VALGRIND_MINOR__    10
 
 
 #include <stdarg.h>
 #undef PLAT_x86_darwin
 #undef PLAT_amd64_darwin
 #undef PLAT_x86_win32
+#undef PLAT_amd64_win64
 #undef PLAT_x86_linux
 #undef PLAT_amd64_linux
 #undef PLAT_ppc32_linux
 #undef PLAT_ppc64_linux
 #undef PLAT_arm_linux
+#undef PLAT_arm64_linux
 #undef PLAT_s390x_linux
 #undef PLAT_mips32_linux
+#undef PLAT_mips64_linux
 
 
 #if defined(__APPLE__) && defined(__i386__)
 #  define PLAT_x86_darwin 1
 #elif defined(__APPLE__) && defined(__x86_64__)
 #  define PLAT_amd64_darwin 1
-#elif defined(__MINGW32__) || defined(__CYGWIN32__) \
+#elif (defined(__MINGW32__) && !defined(__MINGW64__)) \
+      || defined(__CYGWIN32__) \
       || (defined(_WIN32) && defined(_M_IX86))
 #  define PLAT_x86_win32 1
+#elif defined(__MINGW64__) \
+      || (defined(_WIN64) && defined(_M_X64))
+#  define PLAT_amd64_win64 1
 #elif defined(__linux__) && defined(__i386__)
 #  define PLAT_x86_linux 1
 #elif defined(__linux__) && defined(__x86_64__)
 #  define PLAT_ppc32_linux 1
 #elif defined(__linux__) && defined(__powerpc__) && defined(__powerpc64__)
 #  define PLAT_ppc64_linux 1
-#elif defined(__linux__) && defined(__arm__)
+#elif defined(__linux__) && defined(__arm__) && !defined(__aarch64__)
 #  define PLAT_arm_linux 1
+#elif defined(__linux__) && defined(__aarch64__) && !defined(__arm__)
+#  define PLAT_arm64_linux 1
 #elif defined(__linux__) && defined(__s390__) && defined(__s390x__)
 #  define PLAT_s390x_linux 1
-#elif defined(__linux__) && defined(__mips__)
+#elif defined(__linux__) && defined(__mips__) && (__mips==64)
+#  define PLAT_mips64_linux 1
+#elif defined(__linux__) && defined(__mips__) && (__mips!=64)
 #  define PLAT_mips32_linux 1
 #else
 /* If we're not compiling for our target platform, don't generate
@@ -282,6 +293,15 @@ typedef
                      __SPECIAL_INSTRUCTION_PREAMBLE               \
                      /* call-noredir *%EAX */                     \
                      "xchgl %%edx,%%edx\n\t"
+
+#define VALGRIND_VEX_INJECT_IR()                                 \
+ do {                                                            \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE              \
+                     "xchgl %%edi,%%edi\n\t"                     \
+                     : : : "cc", "memory"                        \
+                    );                                           \
+ } while (0)
+
 #endif /* PLAT_x86_linux || PLAT_x86_darwin || (PLAT_x86_win32 && __GNUC__) */
 
 /* ------------------------- x86-Win32 ------------------------- */
@@ -344,6 +364,13 @@ valgrind_do_client_request_expr(uintptr_t _zzq_default, uintptr_t _zzq_request,
 
 #define VALGRIND_CALL_NOREDIR_EAX ERROR
 
+#define VALGRIND_VEX_INJECT_IR()                                 \
+ do {                                                            \
+    __asm { __SPECIAL_INSTRUCTION_PREAMBLE                       \
+            __asm xchg edi,edi                                   \
+    }                                                            \
+ } while (0)
+
 #else
 #error Unsupported compiler.
 #endif
@@ -352,7 +379,8 @@ valgrind_do_client_request_expr(uintptr_t _zzq_default, uintptr_t _zzq_request,
 
 /* ------------------------ amd64-{linux,darwin} --------------- */
 
-#if defined(PLAT_amd64_linux)  ||  defined(PLAT_amd64_darwin)
+#if defined(PLAT_amd64_linux)  ||  defined(PLAT_amd64_darwin) \
+    ||  (defined(PLAT_amd64_win64) && defined(__GNUC__))
 
 typedef
    struct { 
@@ -403,8 +431,25 @@ typedef
                      __SPECIAL_INSTRUCTION_PREAMBLE               \
                      /* call-noredir *%RAX */                     \
                      "xchgq %%rdx,%%rdx\n\t"
+
+#define VALGRIND_VEX_INJECT_IR()                                 \
+ do {                                                            \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE              \
+                     "xchgq %%rdi,%%rdi\n\t"                     \
+                     : : : "cc", "memory"                        \
+                    );                                           \
+ } while (0)
+
 #endif /* PLAT_amd64_linux || PLAT_amd64_darwin */
 
+/* ------------------------- amd64-Win64 ------------------------- */
+
+#if defined(PLAT_amd64_win64) && !defined(__GNUC__)
+
+#error Unsupported compiler.
+
+#endif /* PLAT_amd64_win64 */
+
 /* ------------------------ ppc32-linux ------------------------ */
 
 #if defined(PLAT_ppc32_linux)
@@ -416,8 +461,8 @@ typedef
    OrigFn;
 
 #define __SPECIAL_INSTRUCTION_PREAMBLE                            \
-                     "rlwinm 0,0,3,0,0  ; rlwinm 0,0,13,0,0\n\t"  \
-                     "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t"
+                    "rlwinm 0,0,3,0,31  ; rlwinm 0,0,13,0,31\n\t" \
+                    "rlwinm 0,0,29,0,31 ; rlwinm 0,0,19,0,31\n\t"
 
 #define VALGRIND_DO_CLIENT_REQUEST_EXPR(                          \
         _zzq_default, _zzq_request,                               \
@@ -442,7 +487,7 @@ typedef
                      "mr %0,3"     /*result*/                     \
                      : "=b" (_zzq_result)                         \
                      : "b" (_zzq_default), "b" (_zzq_ptr)         \
-                     : "cc", "memory", "r3", "r4", "r0");         \
+                     : "cc", "memory", "r3", "r4");               \
     _zzq_result;                                                  \
     })
 
@@ -455,7 +500,7 @@ typedef
                      "mr %0,3"                                    \
                      : "=b" (__addr)                              \
                      :                                            \
-                     : "cc", "memory", "r3", "r0"                 \
+                     : "cc", "memory", "r3"                       \
                     );                                            \
     _zzq_orig->nraddr = __addr;                                   \
   }
@@ -464,6 +509,14 @@ typedef
                      __SPECIAL_INSTRUCTION_PREAMBLE               \
                      /* branch-and-link-to-noredir *%R11 */       \
                      "or 3,3,3\n\t"
+
+#define VALGRIND_VEX_INJECT_IR()                                 \
+ do {                                                            \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE              \
+                     "or 5,5,5\n\t"                              \
+                    );                                           \
+ } while (0)
+
 #endif /* PLAT_ppc32_linux */
 
 /* ------------------------ ppc64-linux ------------------------ */
@@ -504,7 +557,7 @@ typedef
                      "mr %0,3"     /*result*/                     \
                      : "=b" (_zzq_result)                         \
                      : "b" (_zzq_default), "b" (_zzq_ptr)         \
-                     : "cc", "memory", "r3", "r4", "r0");         \
+                     : "cc", "memory", "r3", "r4");               \
     _zzq_result;                                                  \
   })
 
@@ -517,7 +570,7 @@ typedef
                      "mr %0,3"                                    \
                      : "=b" (__addr)                              \
                      :                                            \
-                     : "cc", "memory", "r3", "r0"                 \
+                     : "cc", "memory", "r3"                       \
                     );                                            \
     _zzq_orig->nraddr = __addr;                                   \
     __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
@@ -526,7 +579,7 @@ typedef
                      "mr %0,3"                                    \
                      : "=b" (__addr)                              \
                      :                                            \
-                     : "cc", "memory", "r3", "r0"                 \
+                     : "cc", "memory", "r3"                       \
                     );                                            \
     _zzq_orig->r2 = __addr;                                       \
   }
@@ -536,6 +589,13 @@ typedef
                      /* branch-and-link-to-noredir *%R11 */       \
                      "or 3,3,3\n\t"
 
+#define VALGRIND_VEX_INJECT_IR()                                 \
+ do {                                                            \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE              \
+                     "or 5,5,5\n\t"                              \
+                    );                                           \
+ } while (0)
+
 #endif /* PLAT_ppc64_linux */
 
 /* ------------------------- arm-linux ------------------------- */
@@ -596,8 +656,84 @@ typedef
                      /* branch-and-link-to-noredir *%R4 */        \
                      "orr r12, r12, r12\n\t"
 
+#define VALGRIND_VEX_INJECT_IR()                                 \
+ do {                                                            \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE              \
+                     "orr r9, r9, r9\n\t"                        \
+                     : : : "cc", "memory"                        \
+                    );                                           \
+ } while (0)
+
 #endif /* PLAT_arm_linux */
 
+/* ------------------------ arm64-linux ------------------------- */
+
+#if defined(PLAT_arm64_linux)
+
+typedef
+   struct { 
+      unsigned long long int nraddr; /* where's the code? */
+   }
+   OrigFn;
+
+#define __SPECIAL_INSTRUCTION_PREAMBLE                            \
+            "ror x12, x12, #3  ;  ror x12, x12, #13 \n\t"         \
+            "ror x12, x12, #51 ;  ror x12, x12, #61 \n\t"
+
+#define VALGRIND_DO_CLIENT_REQUEST_EXPR(                          \
+        _zzq_default, _zzq_request,                               \
+        _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)    \
+                                                                  \
+  __extension__                                                   \
+  ({volatile unsigned long long int  _zzq_args[6];                \
+    volatile unsigned long long int  _zzq_result;                 \
+    _zzq_args[0] = (unsigned long long int)(_zzq_request);        \
+    _zzq_args[1] = (unsigned long long int)(_zzq_arg1);           \
+    _zzq_args[2] = (unsigned long long int)(_zzq_arg2);           \
+    _zzq_args[3] = (unsigned long long int)(_zzq_arg3);           \
+    _zzq_args[4] = (unsigned long long int)(_zzq_arg4);           \
+    _zzq_args[5] = (unsigned long long int)(_zzq_arg5);           \
+    __asm__ volatile("mov x3, %1\n\t" /*default*/                 \
+                     "mov x4, %2\n\t" /*ptr*/                     \
+                     __SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* X3 = client_request ( X4 ) */             \
+                     "orr x10, x10, x10\n\t"                      \
+                     "mov %0, x3"     /*result*/                  \
+                     : "=r" (_zzq_result)                         \
+                     : "r" (_zzq_default), "r" (&_zzq_args[0])    \
+                     : "cc","memory", "x3", "x4");                \
+    _zzq_result;                                                  \
+  })
+
+#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                       \
+  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                   \
+    unsigned long long int __addr;                                \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* X3 = guest_NRADDR */                      \
+                     "orr x11, x11, x11\n\t"                      \
+                     "mov %0, x3"                                 \
+                     : "=r" (__addr)                              \
+                     :                                            \
+                     : "cc", "memory", "x3"                       \
+                    );                                            \
+    _zzq_orig->nraddr = __addr;                                   \
+  }
+
+#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                    \
+                     __SPECIAL_INSTRUCTION_PREAMBLE               \
+                     /* branch-and-link-to-noredir X8 */          \
+                     "orr x12, x12, x12\n\t"
+
+#define VALGRIND_VEX_INJECT_IR()                                 \
+ do {                                                            \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE              \
+                     "orr x9, x9, x9\n\t"                        \
+                     : : : "cc", "memory"                        \
+                    );                                           \
+ } while (0)
+
+#endif /* PLAT_arm64_linux */
+
 /* ------------------------ s390x-linux ------------------------ */
 
 #if defined(PLAT_s390x_linux)
@@ -621,6 +757,7 @@ typedef
 #define __CLIENT_REQUEST_CODE "lr 2,2\n\t"
 #define __GET_NR_CONTEXT_CODE "lr 3,3\n\t"
 #define __CALL_NO_REDIR_CODE  "lr 4,4\n\t"
+#define __VEX_INJECT_IR_CODE  "lr 5,5\n\t"
 
 #define VALGRIND_DO_CLIENT_REQUEST_EXPR(                         \
        _zzq_default, _zzq_request,                               \
@@ -666,6 +803,12 @@ typedef
                     __SPECIAL_INSTRUCTION_PREAMBLE               \
                     __CALL_NO_REDIR_CODE
 
+#define VALGRIND_VEX_INJECT_IR()                                 \
+ do {                                                            \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE              \
+                     __VEX_INJECT_IR_CODE);                      \
+ } while (0)
+
 #endif /* PLAT_s390x_linux */
 
 /* ------------------------- mips32-linux ---------------- */
@@ -708,7 +851,7 @@ typedef
                      "move %0, $11\n\t"     /*result*/            \
                      : "=r" (_zzq_result)                         \
                      : "r" (_zzq_default), "r" (&_zzq_args[0])    \
-                     : "cc","memory", "t3", "t4");                \
+                     : "$11", "$12");                             \
     _zzq_result;                                                  \
   })
 
@@ -721,7 +864,7 @@ typedef
                      "move %0, $11"     /*result*/                \
                      : "=r" (__addr)                              \
                      :                                            \
-                     : "cc", "memory" , "t3"                      \
+                     : "$11"                                      \
                     );                                            \
     _zzq_orig->nraddr = __addr;                                   \
   }
@@ -729,9 +872,87 @@ typedef
 #define VALGRIND_CALL_NOREDIR_T9                                 \
                      __SPECIAL_INSTRUCTION_PREAMBLE              \
                      /* call-noredir *%t9 */                     \
-                     "or $15, $15, $15\n\t"                                             
+                     "or $15, $15, $15\n\t"
+
+#define VALGRIND_VEX_INJECT_IR()                                 \
+ do {                                                            \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE              \
+                     "or $11, $11, $11\n\t"                      \
+                    );                                           \
+ } while (0)
+
+
 #endif /* PLAT_mips32_linux */
 
+/* ------------------------- mips64-linux ---------------- */
+
+#if defined(PLAT_mips64_linux)
+
+typedef
+   struct {
+      unsigned long long nraddr; /* where's the code? */
+   }
+   OrigFn;
+
+/* dsll $0,$0, 3
+ * dsll $0,$0, 13
+ * dsll $0,$0, 29
+ * dsll $0,$0, 19*/
+#define __SPECIAL_INSTRUCTION_PREAMBLE                              \
+                     "dsll $0,$0, 3 ; dsll $0,$0,13\n\t"            \
+                     "dsll $0,$0,29 ; dsll $0,$0,19\n\t"
+
+#define VALGRIND_DO_CLIENT_REQUEST_EXPR(                            \
+       _zzq_default, _zzq_request,                                  \
+       _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5)       \
+  __extension__                                                     \
+  ({ volatile unsigned long long int _zzq_args[6];                  \
+    volatile unsigned long long int _zzq_result;                    \
+    _zzq_args[0] = (unsigned long long int)(_zzq_request);          \
+    _zzq_args[1] = (unsigned long long int)(_zzq_arg1);             \
+    _zzq_args[2] = (unsigned long long int)(_zzq_arg2);             \
+    _zzq_args[3] = (unsigned long long int)(_zzq_arg3);             \
+    _zzq_args[4] = (unsigned long long int)(_zzq_arg4);             \
+    _zzq_args[5] = (unsigned long long int)(_zzq_arg5);             \
+        __asm__ volatile("move $11, %1\n\t" /*default*/             \
+                         "move $12, %2\n\t" /*ptr*/                 \
+                         __SPECIAL_INSTRUCTION_PREAMBLE             \
+                         /* $11 = client_request ( $12 ) */         \
+                         "or $13, $13, $13\n\t"                     \
+                         "move %0, $11\n\t"     /*result*/          \
+                         : "=r" (_zzq_result)                       \
+                         : "r" (_zzq_default), "r" (&_zzq_args[0])  \
+                         : "$11", "$12");                           \
+    _zzq_result;                                                    \
+  })
+
+#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval)                         \
+  { volatile OrigFn* _zzq_orig = &(_zzq_rlval);                     \
+    volatile unsigned long long int __addr;                         \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE                 \
+                     /* $11 = guest_NRADDR */                       \
+                     "or $14, $14, $14\n\t"                         \
+                     "move %0, $11"     /*result*/                  \
+                     : "=r" (__addr)                                \
+                     :                                              \
+                     : "$11");                                      \
+    _zzq_orig->nraddr = __addr;                                     \
+  }
+
+#define VALGRIND_CALL_NOREDIR_T9                                    \
+                     __SPECIAL_INSTRUCTION_PREAMBLE                 \
+                     /* call-noredir $25 */                         \
+                     "or $15, $15, $15\n\t"
+
+#define VALGRIND_VEX_INJECT_IR()                                    \
+ do {                                                               \
+    __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE                 \
+                     "or $11, $11, $11\n\t"                         \
+                    );                                              \
+ } while (0)
+
+#endif /* PLAT_mips64_linux */
+
 /* Insert assembly code for other platforms here... */
 
 #endif /* NVALGRIND */
@@ -3329,135 +3550,586 @@ typedef
 
 #endif /* PLAT_arm_linux */
 
-/* ------------------------- s390x-linux ------------------------- */
-
-#if defined(PLAT_s390x_linux)
+/* ------------------------ arm64-linux ------------------------ */
 
-/* Similar workaround as amd64 (see above), but we use r11 as frame
-   pointer and save the old r11 in r7. r11 might be used for
-   argvec, therefore we copy argvec in r1 since r1 is clobbered
-   after the call anyway.  */
-#if defined(__GNUC__) && defined(__GCC_HAVE_DWARF2_CFI_ASM)
-#  define __FRAME_POINTER                                         \
-      ,"d"(__builtin_dwarf_cfa())
-#  define VALGRIND_CFI_PROLOGUE                                   \
-      ".cfi_remember_state\n\t"                                   \
-      "lgr 1,%1\n\t" /* copy the argvec pointer in r1 */          \
-      "lgr 7,11\n\t"                                              \
-      "lgr 11,%2\n\t"                                             \
-      ".cfi_def_cfa r11, 0\n\t"
-#  define VALGRIND_CFI_EPILOGUE                                   \
-      "lgr 11, 7\n\t"                                             \
-      ".cfi_restore_state\n\t"
-#else
-#  define __FRAME_POINTER
-#  define VALGRIND_CFI_PROLOGUE                                   \
-      "lgr 1,%1\n\t"
-#  define VALGRIND_CFI_EPILOGUE
-#endif
+#if defined(PLAT_arm64_linux)
 
-/* Nb: On s390 the stack pointer is properly aligned *at all times*
-   according to the s390 GCC maintainer. (The ABI specification is not
-   precise in this regard.) Therefore, VALGRIND_ALIGN_STACK and
-   VALGRIND_RESTORE_STACK are not defined here. */
+/* These regs are trashed by the hidden call. */
+#define __CALLER_SAVED_REGS \
+     "x0", "x1", "x2", "x3","x4", "x5", "x6", "x7", "x8", "x9",   \
+     "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17",      \
+     "x18", "x19", "x20", "x30",                                  \
+     "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",  \
+     "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",      \
+     "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",      \
+     "v26", "v27", "v28", "v29", "v30", "v31"
+
+/* x21 is callee-saved, so we can use it to save and restore SP around
+   the hidden call. */
+#define VALGRIND_ALIGN_STACK               \
+      "mov x21, sp\n\t"                    \
+      "bic sp, x21, #15\n\t"
+#define VALGRIND_RESTORE_STACK             \
+      "mov sp,  x21\n\t"
 
-/* These regs are trashed by the hidden call. Note that we overwrite
-   r14 in s390_irgen_noredir (VEX/priv/guest_s390_irgen.c) to give the
-   function a proper return address. All others are ABI defined call
-   clobbers. */
-#define __CALLER_SAVED_REGS "0","1","2","3","4","5","14", \
-                           "f0","f1","f2","f3","f4","f5","f6","f7"
+/* These CALL_FN_ macros assume that on arm64-linux,
+   sizeof(unsigned long) == 8. */
 
-/* Nb: Although r11 is modified in the asm snippets below (inside 
-   VALGRIND_CFI_PROLOGUE) it is not listed in the clobber section, for
-   two reasons:
-   (1) r11 is restored in VALGRIND_CFI_EPILOGUE, so effectively it is not
-       modified
-   (2) GCC will complain that r11 cannot appear inside a clobber section,
-       when compiled with -O -fno-omit-frame-pointer
- */
+#define CALL_FN_W_v(lval, orig)                                   \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[1];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mov %0, x0\n"                                           \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21"   \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
 
-#define CALL_FN_W_v(lval, orig)                                  \
-   do {                                                          \
-      volatile OrigFn        _orig = (orig);                     \
-      volatile unsigned long  _argvec[1];                        \
-      volatile unsigned long _res;                               \
-      _argvec[0] = (unsigned long)_orig.nraddr;                  \
-      __asm__ volatile(                                          \
-         VALGRIND_CFI_PROLOGUE                                   \
-         "aghi 15,-160\n\t"                                      \
-         "lg 1, 0(1)\n\t"  /* target->r1 */                      \
-         VALGRIND_CALL_NOREDIR_R1                                \
-         "lgr %0, 2\n\t"                                         \
-         "aghi 15,160\n\t"                                       \
-         VALGRIND_CFI_EPILOGUE                                   \
-         : /*out*/   "=d" (_res)                                 \
-         : /*in*/    "d" (&_argvec[0]) __FRAME_POINTER           \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"7"     \
-      );                                                         \
-      lval = (__typeof__(lval)) _res;                            \
+#define CALL_FN_W_W(lval, orig, arg1)                             \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[2];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "ldr x0, [%1, #8] \n\t"                                  \
+         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mov %0, x0\n"                                           \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21"   \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
    } while (0)
 
-/* The call abi has the arguments in r2-r6 and stack */
-#define CALL_FN_W_W(lval, orig, arg1)                            \
-   do {                                                          \
-      volatile OrigFn        _orig = (orig);                     \
-      volatile unsigned long _argvec[2];                         \
-      volatile unsigned long _res;                               \
-      _argvec[0] = (unsigned long)_orig.nraddr;                  \
-      _argvec[1] = (unsigned long)arg1;                          \
-      __asm__ volatile(                                          \
-         VALGRIND_CFI_PROLOGUE                                   \
-         "aghi 15,-160\n\t"                                      \
-         "lg 2, 8(1)\n\t"                                        \
-         "lg 1, 0(1)\n\t"                                        \
-         VALGRIND_CALL_NOREDIR_R1                                \
-         "lgr %0, 2\n\t"                                         \
-         "aghi 15,160\n\t"                                       \
-         VALGRIND_CFI_EPILOGUE                                   \
-         : /*out*/   "=d" (_res)                                 \
-         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER           \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"7"     \
-      );                                                         \
-      lval = (__typeof__(lval)) _res;                            \
+#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "ldr x0, [%1, #8] \n\t"                                  \
+         "ldr x1, [%1, #16] \n\t"                                 \
+         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mov %0, x0\n"                                           \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21"   \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
    } while (0)
 
-#define CALL_FN_W_WW(lval, orig, arg1, arg2)                     \
-   do {                                                          \
-      volatile OrigFn        _orig = (orig);                     \
-      volatile unsigned long _argvec[3];                         \
-      volatile unsigned long _res;                               \
-      _argvec[0] = (unsigned long)_orig.nraddr;                  \
-      _argvec[1] = (unsigned long)arg1;                          \
-      _argvec[2] = (unsigned long)arg2;                          \
-      __asm__ volatile(                                          \
-         VALGRIND_CFI_PROLOGUE                                   \
-         "aghi 15,-160\n\t"                                      \
-         "lg 2, 8(1)\n\t"                                        \
-         "lg 3,16(1)\n\t"                                        \
-         "lg 1, 0(1)\n\t"                                        \
-         VALGRIND_CALL_NOREDIR_R1                                \
-         "lgr %0, 2\n\t"                                         \
-         "aghi 15,160\n\t"                                       \
-         VALGRIND_CFI_EPILOGUE                                   \
-         : /*out*/   "=d" (_res)                                 \
-         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER           \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"7"     \
-      );                                                         \
-      lval = (__typeof__(lval)) _res;                            \
+#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[4];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "ldr x0, [%1, #8] \n\t"                                  \
+         "ldr x1, [%1, #16] \n\t"                                 \
+         "ldr x2, [%1, #24] \n\t"                                 \
+         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mov %0, x0\n"                                           \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21"   \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
    } while (0)
 
-#define CALL_FN_W_WWW(lval, orig, arg1, arg2, arg3)              \
-   do {                                                          \
-      volatile OrigFn        _orig = (orig);                     \
-      volatile unsigned long _argvec[4];                         \
-      volatile unsigned long _res;                               \
-      _argvec[0] = (unsigned long)_orig.nraddr;                  \
-      _argvec[1] = (unsigned long)arg1;                          \
-      _argvec[2] = (unsigned long)arg2;                          \
-      _argvec[3] = (unsigned long)arg3;                          \
-      __asm__ volatile(                                          \
-         VALGRIND_CFI_PROLOGUE                                   \
+#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[5];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "ldr x0, [%1, #8] \n\t"                                  \
+         "ldr x1, [%1, #16] \n\t"                                 \
+         "ldr x2, [%1, #24] \n\t"                                 \
+         "ldr x3, [%1, #32] \n\t"                                 \
+         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mov %0, x0"                                             \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21"   \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[6];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "ldr x0, [%1, #8] \n\t"                                  \
+         "ldr x1, [%1, #16] \n\t"                                 \
+         "ldr x2, [%1, #24] \n\t"                                 \
+         "ldr x3, [%1, #32] \n\t"                                 \
+         "ldr x4, [%1, #40] \n\t"                                 \
+         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mov %0, x0"                                             \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21"   \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[7];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "ldr x0, [%1, #8] \n\t"                                  \
+         "ldr x1, [%1, #16] \n\t"                                 \
+         "ldr x2, [%1, #24] \n\t"                                 \
+         "ldr x3, [%1, #32] \n\t"                                 \
+         "ldr x4, [%1, #40] \n\t"                                 \
+         "ldr x5, [%1, #48] \n\t"                                 \
+         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mov %0, x0"                                             \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21"   \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+                                 arg7)                            \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[8];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      _argvec[7] = (unsigned long)(arg7);                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "ldr x0, [%1, #8] \n\t"                                  \
+         "ldr x1, [%1, #16] \n\t"                                 \
+         "ldr x2, [%1, #24] \n\t"                                 \
+         "ldr x3, [%1, #32] \n\t"                                 \
+         "ldr x4, [%1, #40] \n\t"                                 \
+         "ldr x5, [%1, #48] \n\t"                                 \
+         "ldr x6, [%1, #56] \n\t"                                 \
+         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mov %0, x0"                                             \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21"   \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+                                 arg7,arg8)                       \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[9];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      _argvec[7] = (unsigned long)(arg7);                         \
+      _argvec[8] = (unsigned long)(arg8);                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "ldr x0, [%1, #8] \n\t"                                  \
+         "ldr x1, [%1, #16] \n\t"                                 \
+         "ldr x2, [%1, #24] \n\t"                                 \
+         "ldr x3, [%1, #32] \n\t"                                 \
+         "ldr x4, [%1, #40] \n\t"                                 \
+         "ldr x5, [%1, #48] \n\t"                                 \
+         "ldr x6, [%1, #56] \n\t"                                 \
+         "ldr x7, [%1, #64] \n\t"                                 \
+         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mov %0, x0"                                             \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21"   \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+                                 arg7,arg8,arg9)                  \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[10];                         \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      _argvec[7] = (unsigned long)(arg7);                         \
+      _argvec[8] = (unsigned long)(arg8);                         \
+      _argvec[9] = (unsigned long)(arg9);                         \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "sub sp, sp, #0x20 \n\t"                                 \
+         "ldr x0, [%1, #8] \n\t"                                  \
+         "ldr x1, [%1, #16] \n\t"                                 \
+         "ldr x2, [%1, #24] \n\t"                                 \
+         "ldr x3, [%1, #32] \n\t"                                 \
+         "ldr x4, [%1, #40] \n\t"                                 \
+         "ldr x5, [%1, #48] \n\t"                                 \
+         "ldr x6, [%1, #56] \n\t"                                 \
+         "ldr x7, [%1, #64] \n\t"                                 \
+         "ldr x8, [%1, #72] \n\t"                                 \
+         "str x8, [sp, #0]  \n\t"                                 \
+         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mov %0, x0"                                             \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21"   \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+                                  arg7,arg8,arg9,arg10)           \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[11];                         \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      _argvec[7] = (unsigned long)(arg7);                         \
+      _argvec[8] = (unsigned long)(arg8);                         \
+      _argvec[9] = (unsigned long)(arg9);                         \
+      _argvec[10] = (unsigned long)(arg10);                       \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "sub sp, sp, #0x20 \n\t"                                 \
+         "ldr x0, [%1, #8] \n\t"                                  \
+         "ldr x1, [%1, #16] \n\t"                                 \
+         "ldr x2, [%1, #24] \n\t"                                 \
+         "ldr x3, [%1, #32] \n\t"                                 \
+         "ldr x4, [%1, #40] \n\t"                                 \
+         "ldr x5, [%1, #48] \n\t"                                 \
+         "ldr x6, [%1, #56] \n\t"                                 \
+         "ldr x7, [%1, #64] \n\t"                                 \
+         "ldr x8, [%1, #72] \n\t"                                 \
+         "str x8, [sp, #0]  \n\t"                                 \
+         "ldr x8, [%1, #80] \n\t"                                 \
+         "str x8, [sp, #8]  \n\t"                                 \
+         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mov %0, x0"                                             \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21"   \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+                                  arg7,arg8,arg9,arg10,arg11)     \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[12];                         \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      _argvec[7] = (unsigned long)(arg7);                         \
+      _argvec[8] = (unsigned long)(arg8);                         \
+      _argvec[9] = (unsigned long)(arg9);                         \
+      _argvec[10] = (unsigned long)(arg10);                       \
+      _argvec[11] = (unsigned long)(arg11);                       \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "sub sp, sp, #0x30 \n\t"                                 \
+         "ldr x0, [%1, #8] \n\t"                                  \
+         "ldr x1, [%1, #16] \n\t"                                 \
+         "ldr x2, [%1, #24] \n\t"                                 \
+         "ldr x3, [%1, #32] \n\t"                                 \
+         "ldr x4, [%1, #40] \n\t"                                 \
+         "ldr x5, [%1, #48] \n\t"                                 \
+         "ldr x6, [%1, #56] \n\t"                                 \
+         "ldr x7, [%1, #64] \n\t"                                 \
+         "ldr x8, [%1, #72] \n\t"                                 \
+         "str x8, [sp, #0]  \n\t"                                 \
+         "ldr x8, [%1, #80] \n\t"                                 \
+         "str x8, [sp, #8]  \n\t"                                 \
+         "ldr x8, [%1, #88] \n\t"                                 \
+         "str x8, [sp, #16] \n\t"                                 \
+         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mov %0, x0"                                             \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21"   \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+                                  arg7,arg8,arg9,arg10,arg11,     \
+                                  arg12)                          \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[13];                         \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      _argvec[7] = (unsigned long)(arg7);                         \
+      _argvec[8] = (unsigned long)(arg8);                         \
+      _argvec[9] = (unsigned long)(arg9);                         \
+      _argvec[10] = (unsigned long)(arg10);                       \
+      _argvec[11] = (unsigned long)(arg11);                       \
+      _argvec[12] = (unsigned long)(arg12);                       \
+      __asm__ volatile(                                           \
+         VALGRIND_ALIGN_STACK                                     \
+         "sub sp, sp, #0x30 \n\t"                                 \
+         "ldr x0, [%1, #8] \n\t"                                  \
+         "ldr x1, [%1, #16] \n\t"                                 \
+         "ldr x2, [%1, #24] \n\t"                                 \
+         "ldr x3, [%1, #32] \n\t"                                 \
+         "ldr x4, [%1, #40] \n\t"                                 \
+         "ldr x5, [%1, #48] \n\t"                                 \
+         "ldr x6, [%1, #56] \n\t"                                 \
+         "ldr x7, [%1, #64] \n\t"                                 \
+         "ldr x8, [%1, #72] \n\t"                                 \
+         "str x8, [sp, #0]  \n\t"                                 \
+         "ldr x8, [%1, #80] \n\t"                                 \
+         "str x8, [sp, #8]  \n\t"                                 \
+         "ldr x8, [%1, #88] \n\t"                                 \
+         "str x8, [sp, #16] \n\t"                                 \
+         "ldr x8, [%1, #96] \n\t"                                 \
+         "str x8, [sp, #24] \n\t"                                 \
+         "ldr x8, [%1] \n\t"  /* target->x8 */                    \
+         VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8                   \
+         VALGRIND_RESTORE_STACK                                   \
+         "mov %0, x0"                                             \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21"   \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#endif /* PLAT_arm64_linux */
+
+/* ------------------------- s390x-linux ------------------------- */
+
+#if defined(PLAT_s390x_linux)
+
+/* Similar workaround as amd64 (see above), but we use r11 as frame
+   pointer and save the old r11 in r7. r11 might be used for
+   argvec, therefore we copy argvec in r1 since r1 is clobbered
+   after the call anyway.  */
+#if defined(__GNUC__) && defined(__GCC_HAVE_DWARF2_CFI_ASM)
+#  define __FRAME_POINTER                                         \
+      ,"d"(__builtin_dwarf_cfa())
+#  define VALGRIND_CFI_PROLOGUE                                   \
+      ".cfi_remember_state\n\t"                                   \
+      "lgr 1,%1\n\t" /* copy the argvec pointer in r1 */          \
+      "lgr 7,11\n\t"                                              \
+      "lgr 11,%2\n\t"                                             \
+      ".cfi_def_cfa r11, 0\n\t"
+#  define VALGRIND_CFI_EPILOGUE                                   \
+      "lgr 11, 7\n\t"                                             \
+      ".cfi_restore_state\n\t"
+#else
+#  define __FRAME_POINTER
+#  define VALGRIND_CFI_PROLOGUE                                   \
+      "lgr 1,%1\n\t"
+#  define VALGRIND_CFI_EPILOGUE
+#endif
+
+/* Nb: On s390 the stack pointer is properly aligned *at all times*
+   according to the s390 GCC maintainer. (The ABI specification is not
+   precise in this regard.) Therefore, VALGRIND_ALIGN_STACK and
+   VALGRIND_RESTORE_STACK are not defined here. */
+
+/* These regs are trashed by the hidden call. Note that we overwrite
+   r14 in s390_irgen_noredir (VEX/priv/guest_s390_irgen.c) to give the
+   function a proper return address. All others are ABI defined call
+   clobbers. */
+#define __CALLER_SAVED_REGS "0","1","2","3","4","5","14", \
+                           "f0","f1","f2","f3","f4","f5","f6","f7"
+
+/* Nb: Although r11 is modified in the asm snippets below (inside 
+   VALGRIND_CFI_PROLOGUE) it is not listed in the clobber section, for
+   two reasons:
+   (1) r11 is restored in VALGRIND_CFI_EPILOGUE, so effectively it is not
+       modified
+   (2) GCC will complain that r11 cannot appear inside a clobber section,
+       when compiled with -O -fno-omit-frame-pointer
+ */
+
+#define CALL_FN_W_v(lval, orig)                                  \
+   do {                                                          \
+      volatile OrigFn        _orig = (orig);                     \
+      volatile unsigned long  _argvec[1];                        \
+      volatile unsigned long _res;                               \
+      _argvec[0] = (unsigned long)_orig.nraddr;                  \
+      __asm__ volatile(                                          \
+         VALGRIND_CFI_PROLOGUE                                   \
+         "aghi 15,-160\n\t"                                      \
+         "lg 1, 0(1)\n\t"  /* target->r1 */                      \
+         VALGRIND_CALL_NOREDIR_R1                                \
+         "lgr %0, 2\n\t"                                         \
+         "aghi 15,160\n\t"                                       \
+         VALGRIND_CFI_EPILOGUE                                   \
+         : /*out*/   "=d" (_res)                                 \
+         : /*in*/    "d" (&_argvec[0]) __FRAME_POINTER           \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"7"     \
+      );                                                         \
+      lval = (__typeof__(lval)) _res;                            \
+   } while (0)
+
+/* The call abi has the arguments in r2-r6 and stack */
+#define CALL_FN_W_W(lval, orig, arg1)                            \
+   do {                                                          \
+      volatile OrigFn        _orig = (orig);                     \
+      volatile unsigned long _argvec[2];                         \
+      volatile unsigned long _res;                               \
+      _argvec[0] = (unsigned long)_orig.nraddr;                  \
+      _argvec[1] = (unsigned long)arg1;                          \
+      __asm__ volatile(                                          \
+         VALGRIND_CFI_PROLOGUE                                   \
+         "aghi 15,-160\n\t"                                      \
+         "lg 2, 8(1)\n\t"                                        \
+         "lg 1, 0(1)\n\t"                                        \
+         VALGRIND_CALL_NOREDIR_R1                                \
+         "lgr %0, 2\n\t"                                         \
+         "aghi 15,160\n\t"                                       \
+         VALGRIND_CFI_EPILOGUE                                   \
+         : /*out*/   "=d" (_res)                                 \
+         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER           \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"7"     \
+      );                                                         \
+      lval = (__typeof__(lval)) _res;                            \
+   } while (0)
+
+#define CALL_FN_W_WW(lval, orig, arg1, arg2)                     \
+   do {                                                          \
+      volatile OrigFn        _orig = (orig);                     \
+      volatile unsigned long _argvec[3];                         \
+      volatile unsigned long _res;                               \
+      _argvec[0] = (unsigned long)_orig.nraddr;                  \
+      _argvec[1] = (unsigned long)arg1;                          \
+      _argvec[2] = (unsigned long)arg2;                          \
+      __asm__ volatile(                                          \
+         VALGRIND_CFI_PROLOGUE                                   \
+         "aghi 15,-160\n\t"                                      \
+         "lg 2, 8(1)\n\t"                                        \
+         "lg 3,16(1)\n\t"                                        \
+         "lg 1, 0(1)\n\t"                                        \
+         VALGRIND_CALL_NOREDIR_R1                                \
+         "lgr %0, 2\n\t"                                         \
+         "aghi 15,160\n\t"                                       \
+         VALGRIND_CFI_EPILOGUE                                   \
+         : /*out*/   "=d" (_res)                                 \
+         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER           \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"7"     \
+      );                                                         \
+      lval = (__typeof__(lval)) _res;                            \
+   } while (0)
+
+#define CALL_FN_W_WWW(lval, orig, arg1, arg2, arg3)              \
+   do {                                                          \
+      volatile OrigFn        _orig = (orig);                     \
+      volatile unsigned long _argvec[4];                         \
+      volatile unsigned long _res;                               \
+      _argvec[0] = (unsigned long)_orig.nraddr;                  \
+      _argvec[1] = (unsigned long)arg1;                          \
+      _argvec[2] = (unsigned long)arg2;                          \
+      _argvec[3] = (unsigned long)arg3;                          \
+      __asm__ volatile(                                          \
+         VALGRIND_CFI_PROLOGUE                                   \
          "aghi 15,-160\n\t"                                      \
          "lg 2, 8(1)\n\t"                                        \
          "lg 3,16(1)\n\t"                                        \
@@ -3595,7 +4267,85 @@ typedef
          "lg 1, 0(1)\n\t"                                        \
          VALGRIND_CALL_NOREDIR_R1                                \
          "lgr %0, 2\n\t"                                         \
-         "aghi 15,176\n\t"                                       \
+         "aghi 15,176\n\t"                                       \
+         VALGRIND_CFI_EPILOGUE                                   \
+         : /*out*/   "=d" (_res)                                 \
+         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER           \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \
+      );                                                         \
+      lval = (__typeof__(lval)) _res;                            \
+   } while (0)
+
+#define CALL_FN_W_8W(lval, orig, arg1, arg2, arg3, arg4, arg5,   \
+                     arg6, arg7 ,arg8)                           \
+   do {                                                          \
+      volatile OrigFn        _orig = (orig);                     \
+      volatile unsigned long _argvec[9];                         \
+      volatile unsigned long _res;                               \
+      _argvec[0] = (unsigned long)_orig.nraddr;                  \
+      _argvec[1] = (unsigned long)arg1;                          \
+      _argvec[2] = (unsigned long)arg2;                          \
+      _argvec[3] = (unsigned long)arg3;                          \
+      _argvec[4] = (unsigned long)arg4;                          \
+      _argvec[5] = (unsigned long)arg5;                          \
+      _argvec[6] = (unsigned long)arg6;                          \
+      _argvec[7] = (unsigned long)arg7;                          \
+      _argvec[8] = (unsigned long)arg8;                          \
+      __asm__ volatile(                                          \
+         VALGRIND_CFI_PROLOGUE                                   \
+         "aghi 15,-184\n\t"                                      \
+         "lg 2, 8(1)\n\t"                                        \
+         "lg 3,16(1)\n\t"                                        \
+         "lg 4,24(1)\n\t"                                        \
+         "lg 5,32(1)\n\t"                                        \
+         "lg 6,40(1)\n\t"                                        \
+         "mvc 160(8,15), 48(1)\n\t"                              \
+         "mvc 168(8,15), 56(1)\n\t"                              \
+         "mvc 176(8,15), 64(1)\n\t"                              \
+         "lg 1, 0(1)\n\t"                                        \
+         VALGRIND_CALL_NOREDIR_R1                                \
+         "lgr %0, 2\n\t"                                         \
+         "aghi 15,184\n\t"                                       \
+         VALGRIND_CFI_EPILOGUE                                   \
+         : /*out*/   "=d" (_res)                                 \
+         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER           \
+         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \
+      );                                                         \
+      lval = (__typeof__(lval)) _res;                            \
+   } while (0)
+
+#define CALL_FN_W_9W(lval, orig, arg1, arg2, arg3, arg4, arg5,   \
+                     arg6, arg7 ,arg8, arg9)                     \
+   do {                                                          \
+      volatile OrigFn        _orig = (orig);                     \
+      volatile unsigned long _argvec[10];                        \
+      volatile unsigned long _res;                               \
+      _argvec[0] = (unsigned long)_orig.nraddr;                  \
+      _argvec[1] = (unsigned long)arg1;                          \
+      _argvec[2] = (unsigned long)arg2;                          \
+      _argvec[3] = (unsigned long)arg3;                          \
+      _argvec[4] = (unsigned long)arg4;                          \
+      _argvec[5] = (unsigned long)arg5;                          \
+      _argvec[6] = (unsigned long)arg6;                          \
+      _argvec[7] = (unsigned long)arg7;                          \
+      _argvec[8] = (unsigned long)arg8;                          \
+      _argvec[9] = (unsigned long)arg9;                          \
+      __asm__ volatile(                                          \
+         VALGRIND_CFI_PROLOGUE                                   \
+         "aghi 15,-192\n\t"                                      \
+         "lg 2, 8(1)\n\t"                                        \
+         "lg 3,16(1)\n\t"                                        \
+         "lg 4,24(1)\n\t"                                        \
+         "lg 5,32(1)\n\t"                                        \
+         "lg 6,40(1)\n\t"                                        \
+         "mvc 160(8,15), 48(1)\n\t"                              \
+         "mvc 168(8,15), 56(1)\n\t"                              \
+         "mvc 176(8,15), 64(1)\n\t"                              \
+         "mvc 184(8,15), 72(1)\n\t"                              \
+         "lg 1, 0(1)\n\t"                                        \
+         VALGRIND_CALL_NOREDIR_R1                                \
+         "lgr %0, 2\n\t"                                         \
+         "aghi 15,192\n\t"                                       \
          VALGRIND_CFI_EPILOGUE                                   \
          : /*out*/   "=d" (_res)                                 \
          : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER           \
@@ -3604,11 +4354,11 @@ typedef
       lval = (__typeof__(lval)) _res;                            \
    } while (0)
 
-#define CALL_FN_W_8W(lval, orig, arg1, arg2, arg3, arg4, arg5,   \
-                     arg6, arg7 ,arg8)                           \
+#define CALL_FN_W_10W(lval, orig, arg1, arg2, arg3, arg4, arg5,  \
+                     arg6, arg7 ,arg8, arg9, arg10)              \
    do {                                                          \
       volatile OrigFn        _orig = (orig);                     \
-      volatile unsigned long _argvec[9];                         \
+      volatile unsigned long _argvec[11];                        \
       volatile unsigned long _res;                               \
       _argvec[0] = (unsigned long)_orig.nraddr;                  \
       _argvec[1] = (unsigned long)arg1;                          \
@@ -3619,9 +4369,11 @@ typedef
       _argvec[6] = (unsigned long)arg6;                          \
       _argvec[7] = (unsigned long)arg7;                          \
       _argvec[8] = (unsigned long)arg8;                          \
+      _argvec[9] = (unsigned long)arg9;                          \
+      _argvec[10] = (unsigned long)arg10;                        \
       __asm__ volatile(                                          \
          VALGRIND_CFI_PROLOGUE                                   \
-         "aghi 15,-184\n\t"                                      \
+         "aghi 15,-200\n\t"                                      \
          "lg 2, 8(1)\n\t"                                        \
          "lg 3,16(1)\n\t"                                        \
          "lg 4,24(1)\n\t"                                        \
@@ -3630,10 +4382,12 @@ typedef
          "mvc 160(8,15), 48(1)\n\t"                              \
          "mvc 168(8,15), 56(1)\n\t"                              \
          "mvc 176(8,15), 64(1)\n\t"                              \
+         "mvc 184(8,15), 72(1)\n\t"                              \
+         "mvc 192(8,15), 80(1)\n\t"                              \
          "lg 1, 0(1)\n\t"                                        \
          VALGRIND_CALL_NOREDIR_R1                                \
          "lgr %0, 2\n\t"                                         \
-         "aghi 15,184\n\t"                                       \
+         "aghi 15,200\n\t"                                       \
          VALGRIND_CFI_EPILOGUE                                   \
          : /*out*/   "=d" (_res)                                 \
          : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER           \
@@ -3642,11 +4396,11 @@ typedef
       lval = (__typeof__(lval)) _res;                            \
    } while (0)
 
-#define CALL_FN_W_9W(lval, orig, arg1, arg2, arg3, arg4, arg5,   \
-                     arg6, arg7 ,arg8, arg9)                     \
+#define CALL_FN_W_11W(lval, orig, arg1, arg2, arg3, arg4, arg5,  \
+                     arg6, arg7 ,arg8, arg9, arg10, arg11)       \
    do {                                                          \
       volatile OrigFn        _orig = (orig);                     \
-      volatile unsigned long _argvec[10];                        \
+      volatile unsigned long _argvec[12];                        \
       volatile unsigned long _res;                               \
       _argvec[0] = (unsigned long)_orig.nraddr;                  \
       _argvec[1] = (unsigned long)arg1;                          \
@@ -3658,9 +4412,11 @@ typedef
       _argvec[7] = (unsigned long)arg7;                          \
       _argvec[8] = (unsigned long)arg8;                          \
       _argvec[9] = (unsigned long)arg9;                          \
+      _argvec[10] = (unsigned long)arg10;                        \
+      _argvec[11] = (unsigned long)arg11;                        \
       __asm__ volatile(                                          \
          VALGRIND_CFI_PROLOGUE                                   \
-         "aghi 15,-192\n\t"                                      \
+         "aghi 15,-208\n\t"                                      \
          "lg 2, 8(1)\n\t"                                        \
          "lg 3,16(1)\n\t"                                        \
          "lg 4,24(1)\n\t"                                        \
@@ -3670,10 +4426,12 @@ typedef
          "mvc 168(8,15), 56(1)\n\t"                              \
          "mvc 176(8,15), 64(1)\n\t"                              \
          "mvc 184(8,15), 72(1)\n\t"                              \
+         "mvc 192(8,15), 80(1)\n\t"                              \
+         "mvc 200(8,15), 88(1)\n\t"                              \
          "lg 1, 0(1)\n\t"                                        \
          VALGRIND_CALL_NOREDIR_R1                                \
          "lgr %0, 2\n\t"                                         \
-         "aghi 15,192\n\t"                                       \
+         "aghi 15,208\n\t"                                       \
          VALGRIND_CFI_EPILOGUE                                   \
          : /*out*/   "=d" (_res)                                 \
          : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER           \
@@ -3682,11 +4440,11 @@ typedef
       lval = (__typeof__(lval)) _res;                            \
    } while (0)
 
-#define CALL_FN_W_10W(lval, orig, arg1, arg2, arg3, arg4, arg5,  \
-                     arg6, arg7 ,arg8, arg9, arg10)              \
+#define CALL_FN_W_12W(lval, orig, arg1, arg2, arg3, arg4, arg5,  \
+                     arg6, arg7 ,arg8, arg9, arg10, arg11, arg12)\
    do {                                                          \
       volatile OrigFn        _orig = (orig);                     \
-      volatile unsigned long _argvec[11];                        \
+      volatile unsigned long _argvec[13];                        \
       volatile unsigned long _res;                               \
       _argvec[0] = (unsigned long)_orig.nraddr;                  \
       _argvec[1] = (unsigned long)arg1;                          \
@@ -3699,9 +4457,11 @@ typedef
       _argvec[8] = (unsigned long)arg8;                          \
       _argvec[9] = (unsigned long)arg9;                          \
       _argvec[10] = (unsigned long)arg10;                        \
+      _argvec[11] = (unsigned long)arg11;                        \
+      _argvec[12] = (unsigned long)arg12;                        \
       __asm__ volatile(                                          \
          VALGRIND_CFI_PROLOGUE                                   \
-         "aghi 15,-200\n\t"                                      \
+         "aghi 15,-216\n\t"                                      \
          "lg 2, 8(1)\n\t"                                        \
          "lg 3,16(1)\n\t"                                        \
          "lg 4,24(1)\n\t"                                        \
@@ -3712,10 +4472,12 @@ typedef
          "mvc 176(8,15), 64(1)\n\t"                              \
          "mvc 184(8,15), 72(1)\n\t"                              \
          "mvc 192(8,15), 80(1)\n\t"                              \
+         "mvc 200(8,15), 88(1)\n\t"                              \
+         "mvc 208(8,15), 96(1)\n\t"                              \
          "lg 1, 0(1)\n\t"                                        \
          VALGRIND_CALL_NOREDIR_R1                                \
          "lgr %0, 2\n\t"                                         \
-         "aghi 15,200\n\t"                                       \
+         "aghi 15,216\n\t"                                       \
          VALGRIND_CFI_EPILOGUE                                   \
          : /*out*/   "=d" (_res)                                 \
          : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER           \
@@ -3724,102 +4486,551 @@ typedef
       lval = (__typeof__(lval)) _res;                            \
    } while (0)
 
-#define CALL_FN_W_11W(lval, orig, arg1, arg2, arg3, arg4, arg5,  \
-                     arg6, arg7 ,arg8, arg9, arg10, arg11)       \
-   do {                                                          \
-      volatile OrigFn        _orig = (orig);                     \
-      volatile unsigned long _argvec[12];                        \
-      volatile unsigned long _res;                               \
-      _argvec[0] = (unsigned long)_orig.nraddr;                  \
-      _argvec[1] = (unsigned long)arg1;                          \
-      _argvec[2] = (unsigned long)arg2;                          \
-      _argvec[3] = (unsigned long)arg3;                          \
-      _argvec[4] = (unsigned long)arg4;                          \
-      _argvec[5] = (unsigned long)arg5;                          \
-      _argvec[6] = (unsigned long)arg6;                          \
-      _argvec[7] = (unsigned long)arg7;                          \
-      _argvec[8] = (unsigned long)arg8;                          \
-      _argvec[9] = (unsigned long)arg9;                          \
-      _argvec[10] = (unsigned long)arg10;                        \
-      _argvec[11] = (unsigned long)arg11;                        \
-      __asm__ volatile(                                          \
-         VALGRIND_CFI_PROLOGUE                                   \
-         "aghi 15,-208\n\t"                                      \
-         "lg 2, 8(1)\n\t"                                        \
-         "lg 3,16(1)\n\t"                                        \
-         "lg 4,24(1)\n\t"                                        \
-         "lg 5,32(1)\n\t"                                        \
-         "lg 6,40(1)\n\t"                                        \
-         "mvc 160(8,15), 48(1)\n\t"                              \
-         "mvc 168(8,15), 56(1)\n\t"                              \
-         "mvc 176(8,15), 64(1)\n\t"                              \
-         "mvc 184(8,15), 72(1)\n\t"                              \
-         "mvc 192(8,15), 80(1)\n\t"                              \
-         "mvc 200(8,15), 88(1)\n\t"                              \
-         "lg 1, 0(1)\n\t"                                        \
-         VALGRIND_CALL_NOREDIR_R1                                \
-         "lgr %0, 2\n\t"                                         \
-         "aghi 15,208\n\t"                                       \
-         VALGRIND_CFI_EPILOGUE                                   \
-         : /*out*/   "=d" (_res)                                 \
-         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER           \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \
-      );                                                         \
-      lval = (__typeof__(lval)) _res;                            \
+
+#endif /* PLAT_s390x_linux */
+
+/* ------------------------- mips32-linux ----------------------- */
+#if defined(PLAT_mips32_linux)
+
+/* These regs are trashed by the hidden call. */
+#define __CALLER_SAVED_REGS "$2", "$3", "$4", "$5", "$6",       \
+"$7", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24", \
+"$25", "$31"
+
+/* These CALL_FN_ macros assume that on mips-linux, sizeof(unsigned
+   long) == 4. */
+
+#define CALL_FN_W_v(lval, orig)                                   \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[1];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      __asm__ volatile(                                           \
+         "subu $29, $29, 8 \n\t"                                  \
+         "sw $28, 0($29) \n\t"                                    \
+         "sw $31, 4($29) \n\t"                                    \
+         "subu $29, $29, 16 \n\t"                                 \
+         "lw $25, 0(%1) \n\t"  /* target->t9 */                   \
+         VALGRIND_CALL_NOREDIR_T9                                 \
+         "addu $29, $29, 16\n\t"                                  \
+         "lw $28, 0($29) \n\t"                                    \
+         "lw $31, 4($29) \n\t"                                    \
+         "addu $29, $29, 8 \n\t"                                  \
+         "move %0, $2\n"                                          \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_W(lval, orig, arg1)                             \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+     volatile unsigned long _argvec[2];                           \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      __asm__ volatile(                                           \
+         "subu $29, $29, 8 \n\t"                                  \
+         "sw $28, 0($29) \n\t"                                    \
+         "sw $31, 4($29) \n\t"                                    \
+         "subu $29, $29, 16 \n\t"                                 \
+         "lw $4, 4(%1) \n\t"   /* arg1*/                          \
+         "lw $25, 0(%1) \n\t"  /* target->t9 */                   \
+         VALGRIND_CALL_NOREDIR_T9                                 \
+         "addu $29, $29, 16 \n\t"                                 \
+         "lw $28, 0($29) \n\t"                                    \
+         "lw $31, 4($29) \n\t"                                    \
+         "addu $29, $29, 8 \n\t"                                  \
+         "move %0, $2\n"                                          \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "memory",  __CALLER_SAVED_REGS               \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_WW(lval, orig, arg1,arg2)                       \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[3];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      __asm__ volatile(                                           \
+         "subu $29, $29, 8 \n\t"                                  \
+         "sw $28, 0($29) \n\t"                                    \
+         "sw $31, 4($29) \n\t"                                    \
+         "subu $29, $29, 16 \n\t"                                 \
+         "lw $4, 4(%1) \n\t"                                      \
+         "lw $5, 8(%1) \n\t"                                      \
+         "lw $25, 0(%1) \n\t"  /* target->t9 */                   \
+         VALGRIND_CALL_NOREDIR_T9                                 \
+         "addu $29, $29, 16 \n\t"                                 \
+         "lw $28, 0($29) \n\t"                                    \
+         "lw $31, 4($29) \n\t"                                    \
+         "addu $29, $29, 8 \n\t"                                  \
+         "move %0, $2\n"                                          \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3)                 \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[4];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      __asm__ volatile(                                           \
+         "subu $29, $29, 8 \n\t"                                  \
+         "sw $28, 0($29) \n\t"                                    \
+         "sw $31, 4($29) \n\t"                                    \
+         "subu $29, $29, 16 \n\t"                                 \
+         "lw $4, 4(%1) \n\t"                                      \
+         "lw $5, 8(%1) \n\t"                                      \
+         "lw $6, 12(%1) \n\t"                                     \
+         "lw $25, 0(%1) \n\t"  /* target->t9 */                   \
+         VALGRIND_CALL_NOREDIR_T9                                 \
+         "addu $29, $29, 16 \n\t"                                 \
+         "lw $28, 0($29) \n\t"                                    \
+         "lw $31, 4($29) \n\t"                                    \
+         "addu $29, $29, 8 \n\t"                                  \
+         "move %0, $2\n"                                          \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4)           \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[5];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      __asm__ volatile(                                           \
+         "subu $29, $29, 8 \n\t"                                  \
+         "sw $28, 0($29) \n\t"                                    \
+         "sw $31, 4($29) \n\t"                                    \
+         "subu $29, $29, 16 \n\t"                                 \
+         "lw $4, 4(%1) \n\t"                                      \
+         "lw $5, 8(%1) \n\t"                                      \
+         "lw $6, 12(%1) \n\t"                                     \
+         "lw $7, 16(%1) \n\t"                                     \
+         "lw $25, 0(%1) \n\t"  /* target->t9 */                   \
+         VALGRIND_CALL_NOREDIR_T9                                 \
+         "addu $29, $29, 16 \n\t"                                 \
+         "lw $28, 0($29) \n\t"                                    \
+         "lw $31, 4($29) \n\t"                                    \
+         "addu $29, $29, 8 \n\t"                                  \
+         "move %0, $2\n"                                          \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5)        \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[6];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      __asm__ volatile(                                           \
+         "subu $29, $29, 8 \n\t"                                  \
+         "sw $28, 0($29) \n\t"                                    \
+         "sw $31, 4($29) \n\t"                                    \
+         "lw $4, 20(%1) \n\t"                                     \
+         "subu $29, $29, 24\n\t"                                  \
+         "sw $4, 16($29) \n\t"                                    \
+         "lw $4, 4(%1) \n\t"                                      \
+         "lw $5, 8(%1) \n\t"                                      \
+         "lw $6, 12(%1) \n\t"                                     \
+         "lw $7, 16(%1) \n\t"                                     \
+         "lw $25, 0(%1) \n\t"  /* target->t9 */                   \
+         VALGRIND_CALL_NOREDIR_T9                                 \
+         "addu $29, $29, 24 \n\t"                                 \
+         "lw $28, 0($29) \n\t"                                    \
+         "lw $31, 4($29) \n\t"                                    \
+         "addu $29, $29, 8 \n\t"                                  \
+         "move %0, $2\n"                                          \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[7];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      __asm__ volatile(                                           \
+         "subu $29, $29, 8 \n\t"                                  \
+         "sw $28, 0($29) \n\t"                                    \
+         "sw $31, 4($29) \n\t"                                    \
+         "lw $4, 20(%1) \n\t"                                     \
+         "subu $29, $29, 32\n\t"                                  \
+         "sw $4, 16($29) \n\t"                                    \
+         "lw $4, 24(%1) \n\t"                                     \
+         "nop\n\t"                                                \
+         "sw $4, 20($29) \n\t"                                    \
+         "lw $4, 4(%1) \n\t"                                      \
+         "lw $5, 8(%1) \n\t"                                      \
+         "lw $6, 12(%1) \n\t"                                     \
+         "lw $7, 16(%1) \n\t"                                     \
+         "lw $25, 0(%1) \n\t"  /* target->t9 */                   \
+         VALGRIND_CALL_NOREDIR_T9                                 \
+         "addu $29, $29, 32 \n\t"                                 \
+         "lw $28, 0($29) \n\t"                                    \
+         "lw $31, 4($29) \n\t"                                    \
+         "addu $29, $29, 8 \n\t"                                  \
+         "move %0, $2\n"                                          \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+                                 arg7)                            \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[8];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      _argvec[7] = (unsigned long)(arg7);                         \
+      __asm__ volatile(                                           \
+         "subu $29, $29, 8 \n\t"                                  \
+         "sw $28, 0($29) \n\t"                                    \
+         "sw $31, 4($29) \n\t"                                    \
+         "lw $4, 20(%1) \n\t"                                     \
+         "subu $29, $29, 32\n\t"                                  \
+         "sw $4, 16($29) \n\t"                                    \
+         "lw $4, 24(%1) \n\t"                                     \
+         "sw $4, 20($29) \n\t"                                    \
+         "lw $4, 28(%1) \n\t"                                     \
+         "sw $4, 24($29) \n\t"                                    \
+         "lw $4, 4(%1) \n\t"                                      \
+         "lw $5, 8(%1) \n\t"                                      \
+         "lw $6, 12(%1) \n\t"                                     \
+         "lw $7, 16(%1) \n\t"                                     \
+         "lw $25, 0(%1) \n\t"  /* target->t9 */                   \
+         VALGRIND_CALL_NOREDIR_T9                                 \
+         "addu $29, $29, 32 \n\t"                                 \
+         "lw $28, 0($29) \n\t"                                    \
+         "lw $31, 4($29) \n\t"                                    \
+         "addu $29, $29, 8 \n\t"                                  \
+         "move %0, $2\n"                                          \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+                                 arg7,arg8)                       \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[9];                          \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      _argvec[7] = (unsigned long)(arg7);                         \
+      _argvec[8] = (unsigned long)(arg8);                         \
+      __asm__ volatile(                                           \
+         "subu $29, $29, 8 \n\t"                                  \
+         "sw $28, 0($29) \n\t"                                    \
+         "sw $31, 4($29) \n\t"                                    \
+         "lw $4, 20(%1) \n\t"                                     \
+         "subu $29, $29, 40\n\t"                                  \
+         "sw $4, 16($29) \n\t"                                    \
+         "lw $4, 24(%1) \n\t"                                     \
+         "sw $4, 20($29) \n\t"                                    \
+         "lw $4, 28(%1) \n\t"                                     \
+         "sw $4, 24($29) \n\t"                                    \
+         "lw $4, 32(%1) \n\t"                                     \
+         "sw $4, 28($29) \n\t"                                    \
+         "lw $4, 4(%1) \n\t"                                      \
+         "lw $5, 8(%1) \n\t"                                      \
+         "lw $6, 12(%1) \n\t"                                     \
+         "lw $7, 16(%1) \n\t"                                     \
+         "lw $25, 0(%1) \n\t"  /* target->t9 */                   \
+         VALGRIND_CALL_NOREDIR_T9                                 \
+         "addu $29, $29, 40 \n\t"                                 \
+         "lw $28, 0($29) \n\t"                                    \
+         "lw $31, 4($29) \n\t"                                    \
+         "addu $29, $29, 8 \n\t"                                  \
+         "move %0, $2\n"                                          \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,   \
+                                 arg7,arg8,arg9)                  \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[10];                         \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      _argvec[7] = (unsigned long)(arg7);                         \
+      _argvec[8] = (unsigned long)(arg8);                         \
+      _argvec[9] = (unsigned long)(arg9);                         \
+      __asm__ volatile(                                           \
+         "subu $29, $29, 8 \n\t"                                  \
+         "sw $28, 0($29) \n\t"                                    \
+         "sw $31, 4($29) \n\t"                                    \
+         "lw $4, 20(%1) \n\t"                                     \
+         "subu $29, $29, 40\n\t"                                  \
+         "sw $4, 16($29) \n\t"                                    \
+         "lw $4, 24(%1) \n\t"                                     \
+         "sw $4, 20($29) \n\t"                                    \
+         "lw $4, 28(%1) \n\t"                                     \
+         "sw $4, 24($29) \n\t"                                    \
+         "lw $4, 32(%1) \n\t"                                     \
+         "sw $4, 28($29) \n\t"                                    \
+         "lw $4, 36(%1) \n\t"                                     \
+         "sw $4, 32($29) \n\t"                                    \
+         "lw $4, 4(%1) \n\t"                                      \
+         "lw $5, 8(%1) \n\t"                                      \
+         "lw $6, 12(%1) \n\t"                                     \
+         "lw $7, 16(%1) \n\t"                                     \
+         "lw $25, 0(%1) \n\t"  /* target->t9 */                   \
+         VALGRIND_CALL_NOREDIR_T9                                 \
+         "addu $29, $29, 40 \n\t"                                 \
+         "lw $28, 0($29) \n\t"                                    \
+         "lw $31, 4($29) \n\t"                                    \
+         "addu $29, $29, 8 \n\t"                                  \
+         "move %0, $2\n"                                          \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
+
+#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6,  \
+                                  arg7,arg8,arg9,arg10)           \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[11];                         \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      _argvec[7] = (unsigned long)(arg7);                         \
+      _argvec[8] = (unsigned long)(arg8);                         \
+      _argvec[9] = (unsigned long)(arg9);                         \
+      _argvec[10] = (unsigned long)(arg10);                       \
+      __asm__ volatile(                                           \
+         "subu $29, $29, 8 \n\t"                                  \
+         "sw $28, 0($29) \n\t"                                    \
+         "sw $31, 4($29) \n\t"                                    \
+         "lw $4, 20(%1) \n\t"                                     \
+         "subu $29, $29, 48\n\t"                                  \
+         "sw $4, 16($29) \n\t"                                    \
+         "lw $4, 24(%1) \n\t"                                     \
+         "sw $4, 20($29) \n\t"                                    \
+         "lw $4, 28(%1) \n\t"                                     \
+         "sw $4, 24($29) \n\t"                                    \
+         "lw $4, 32(%1) \n\t"                                     \
+         "sw $4, 28($29) \n\t"                                    \
+         "lw $4, 36(%1) \n\t"                                     \
+         "sw $4, 32($29) \n\t"                                    \
+         "lw $4, 40(%1) \n\t"                                     \
+         "sw $4, 36($29) \n\t"                                    \
+         "lw $4, 4(%1) \n\t"                                      \
+         "lw $5, 8(%1) \n\t"                                      \
+         "lw $6, 12(%1) \n\t"                                     \
+         "lw $7, 16(%1) \n\t"                                     \
+         "lw $25, 0(%1) \n\t"  /* target->t9 */                   \
+         VALGRIND_CALL_NOREDIR_T9                                 \
+         "addu $29, $29, 48 \n\t"                                 \
+         "lw $28, 0($29) \n\t"                                    \
+         "lw $31, 4($29) \n\t"                                    \
+         "addu $29, $29, 8 \n\t"                                  \
+         "move %0, $2\n"                                          \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
    } while (0)
 
-#define CALL_FN_W_12W(lval, orig, arg1, arg2, arg3, arg4, arg5,  \
-                     arg6, arg7 ,arg8, arg9, arg10, arg11, arg12)\
-   do {                                                          \
-      volatile OrigFn        _orig = (orig);                     \
-      volatile unsigned long _argvec[13];                        \
-      volatile unsigned long _res;                               \
-      _argvec[0] = (unsigned long)_orig.nraddr;                  \
-      _argvec[1] = (unsigned long)arg1;                          \
-      _argvec[2] = (unsigned long)arg2;                          \
-      _argvec[3] = (unsigned long)arg3;                          \
-      _argvec[4] = (unsigned long)arg4;                          \
-      _argvec[5] = (unsigned long)arg5;                          \
-      _argvec[6] = (unsigned long)arg6;                          \
-      _argvec[7] = (unsigned long)arg7;                          \
-      _argvec[8] = (unsigned long)arg8;                          \
-      _argvec[9] = (unsigned long)arg9;                          \
-      _argvec[10] = (unsigned long)arg10;                        \
-      _argvec[11] = (unsigned long)arg11;                        \
-      _argvec[12] = (unsigned long)arg12;                        \
-      __asm__ volatile(                                          \
-         VALGRIND_CFI_PROLOGUE                                   \
-         "aghi 15,-216\n\t"                                      \
-         "lg 2, 8(1)\n\t"                                        \
-         "lg 3,16(1)\n\t"                                        \
-         "lg 4,24(1)\n\t"                                        \
-         "lg 5,32(1)\n\t"                                        \
-         "lg 6,40(1)\n\t"                                        \
-         "mvc 160(8,15), 48(1)\n\t"                              \
-         "mvc 168(8,15), 56(1)\n\t"                              \
-         "mvc 176(8,15), 64(1)\n\t"                              \
-         "mvc 184(8,15), 72(1)\n\t"                              \
-         "mvc 192(8,15), 80(1)\n\t"                              \
-         "mvc 200(8,15), 88(1)\n\t"                              \
-         "mvc 208(8,15), 96(1)\n\t"                              \
-         "lg 1, 0(1)\n\t"                                        \
-         VALGRIND_CALL_NOREDIR_R1                                \
-         "lgr %0, 2\n\t"                                         \
-         "aghi 15,216\n\t"                                       \
-         VALGRIND_CFI_EPILOGUE                                   \
-         : /*out*/   "=d" (_res)                                 \
-         : /*in*/    "a" (&_argvec[0]) __FRAME_POINTER           \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \
-      );                                                         \
-      lval = (__typeof__(lval)) _res;                            \
+#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,       \
+                                  arg6,arg7,arg8,arg9,arg10,      \
+                                  arg11)                          \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[12];                         \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      _argvec[7] = (unsigned long)(arg7);                         \
+      _argvec[8] = (unsigned long)(arg8);                         \
+      _argvec[9] = (unsigned long)(arg9);                         \
+      _argvec[10] = (unsigned long)(arg10);                       \
+      _argvec[11] = (unsigned long)(arg11);                       \
+      __asm__ volatile(                                           \
+         "subu $29, $29, 8 \n\t"                                  \
+         "sw $28, 0($29) \n\t"                                    \
+         "sw $31, 4($29) \n\t"                                    \
+         "lw $4, 20(%1) \n\t"                                     \
+         "subu $29, $29, 48\n\t"                                  \
+         "sw $4, 16($29) \n\t"                                    \
+         "lw $4, 24(%1) \n\t"                                     \
+         "sw $4, 20($29) \n\t"                                    \
+         "lw $4, 28(%1) \n\t"                                     \
+         "sw $4, 24($29) \n\t"                                    \
+         "lw $4, 32(%1) \n\t"                                     \
+         "sw $4, 28($29) \n\t"                                    \
+         "lw $4, 36(%1) \n\t"                                     \
+         "sw $4, 32($29) \n\t"                                    \
+         "lw $4, 40(%1) \n\t"                                     \
+         "sw $4, 36($29) \n\t"                                    \
+         "lw $4, 44(%1) \n\t"                                     \
+         "sw $4, 40($29) \n\t"                                    \
+         "lw $4, 4(%1) \n\t"                                      \
+         "lw $5, 8(%1) \n\t"                                      \
+         "lw $6, 12(%1) \n\t"                                     \
+         "lw $7, 16(%1) \n\t"                                     \
+         "lw $25, 0(%1) \n\t"  /* target->t9 */                   \
+         VALGRIND_CALL_NOREDIR_T9                                 \
+         "addu $29, $29, 48 \n\t"                                 \
+         "lw $28, 0($29) \n\t"                                    \
+         "lw $31, 4($29) \n\t"                                    \
+         "addu $29, $29, 8 \n\t"                                  \
+         "move %0, $2\n"                                          \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "0" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
    } while (0)
 
+#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,       \
+                                  arg6,arg7,arg8,arg9,arg10,      \
+                                  arg11,arg12)                    \
+   do {                                                           \
+      volatile OrigFn        _orig = (orig);                      \
+      volatile unsigned long _argvec[13];                         \
+      volatile unsigned long _res;                                \
+      _argvec[0] = (unsigned long)_orig.nraddr;                   \
+      _argvec[1] = (unsigned long)(arg1);                         \
+      _argvec[2] = (unsigned long)(arg2);                         \
+      _argvec[3] = (unsigned long)(arg3);                         \
+      _argvec[4] = (unsigned long)(arg4);                         \
+      _argvec[5] = (unsigned long)(arg5);                         \
+      _argvec[6] = (unsigned long)(arg6);                         \
+      _argvec[7] = (unsigned long)(arg7);                         \
+      _argvec[8] = (unsigned long)(arg8);                         \
+      _argvec[9] = (unsigned long)(arg9);                         \
+      _argvec[10] = (unsigned long)(arg10);                       \
+      _argvec[11] = (unsigned long)(arg11);                       \
+      _argvec[12] = (unsigned long)(arg12);                       \
+      __asm__ volatile(                                           \
+         "subu $29, $29, 8 \n\t"                                  \
+         "sw $28, 0($29) \n\t"                                    \
+         "sw $31, 4($29) \n\t"                                    \
+         "lw $4, 20(%1) \n\t"                                     \
+         "subu $29, $29, 56\n\t"                                  \
+         "sw $4, 16($29) \n\t"                                    \
+         "lw $4, 24(%1) \n\t"                                     \
+         "sw $4, 20($29) \n\t"                                    \
+         "lw $4, 28(%1) \n\t"                                     \
+         "sw $4, 24($29) \n\t"                                    \
+         "lw $4, 32(%1) \n\t"                                     \
+         "sw $4, 28($29) \n\t"                                    \
+         "lw $4, 36(%1) \n\t"                                     \
+         "sw $4, 32($29) \n\t"                                    \
+         "lw $4, 40(%1) \n\t"                                     \
+         "sw $4, 36($29) \n\t"                                    \
+         "lw $4, 44(%1) \n\t"                                     \
+         "sw $4, 40($29) \n\t"                                    \
+         "lw $4, 48(%1) \n\t"                                     \
+         "sw $4, 44($29) \n\t"                                    \
+         "lw $4, 4(%1) \n\t"                                      \
+         "lw $5, 8(%1) \n\t"                                      \
+         "lw $6, 12(%1) \n\t"                                     \
+         "lw $7, 16(%1) \n\t"                                     \
+         "lw $25, 0(%1) \n\t"  /* target->t9 */                   \
+         VALGRIND_CALL_NOREDIR_T9                                 \
+         "addu $29, $29, 56 \n\t"                                 \
+         "lw $28, 0($29) \n\t"                                    \
+         "lw $31, 4($29) \n\t"                                    \
+         "addu $29, $29, 8 \n\t"                                  \
+         "move %0, $2\n"                                          \
+         : /*out*/   "=r" (_res)                                  \
+         : /*in*/    "r" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
+      );                                                          \
+      lval = (__typeof__(lval)) _res;                             \
+   } while (0)
 
-#endif /* PLAT_s390x_linux */
+#endif /* PLAT_mips32_linux */
 
-/* ------------------------- mips-linux ------------------------- */
-#if defined(PLAT_mips32_linux)
+/* ------------------------- mips64-linux ------------------------- */
+
+#if defined(PLAT_mips64_linux)
 
 /* These regs are trashed by the hidden call. */
 #define __CALLER_SAVED_REGS "$2", "$3", "$4", "$5", "$6",       \
@@ -3836,20 +5047,12 @@ typedef
       volatile unsigned long _res;                                \
       _argvec[0] = (unsigned long)_orig.nraddr;                   \
       __asm__ volatile(                                           \
-         "subu $29, $29, 8 \n\t"                                  \
-         "sw $gp, 0($sp) \n\t"                                    \
-         "sw $ra, 4($sp) \n\t"                                    \
-        "subu $29, $29, 16 \n\t"                                 \
-         "lw $t9, 0(%1) \n\t"  /* target->t9 */                   \
+         "ld $25, 0(%1)\n\t"  /* target->t9 */                    \
          VALGRIND_CALL_NOREDIR_T9                                 \
-         "addu $29, $29, 16\n\t"                                  \
-         "lw $gp, 0($sp) \n\t"                                    \
-         "lw $ra, 4($sp) \n\t"                                    \
-         "addu $29, $29, 8 \n\t"                                  \
-         "move %0, $v0\n"                                         \
+         "move %0, $2\n"                                          \
          : /*out*/   "=r" (_res)                                  \
          : /*in*/    "0" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
       );                                                          \
       lval = (__typeof__(lval)) _res;                             \
    } while (0)
@@ -3857,26 +5060,18 @@ typedef
 #define CALL_FN_W_W(lval, orig, arg1)                             \
    do {                                                           \
       volatile OrigFn        _orig = (orig);                      \
-     volatile unsigned long _argvec[2];                           \
+      volatile unsigned long _argvec[2];                          \
       volatile unsigned long _res;                                \
       _argvec[0] = (unsigned long)_orig.nraddr;                   \
       _argvec[1] = (unsigned long)(arg1);                         \
       __asm__ volatile(                                           \
-         "subu $29, $29, 8 \n\t"                                  \
-         "sw $gp, 0($sp) \n\t"                                    \
-         "sw $ra, 4($sp) \n\t"                                    \
-         "subu $29, $29, 16 \n\t"                                 \
-         "lw $a0, 4(%1) \n\t"   /* arg1*/                         \
-         "lw $t9, 0(%1) \n\t"  /* target->t9 */                   \
+         "ld $4, 8(%1)\n\t"   /* arg1*/                           \
+         "ld $25, 0(%1)\n\t"  /* target->t9 */                    \
          VALGRIND_CALL_NOREDIR_T9                                 \
-         "addu $29, $29, 16 \n\t"                                 \
-         "lw $gp, 0($sp) \n\t"                                    \
-         "lw $ra, 4($sp) \n\t"                                    \
-         "addu $29, $29, 8 \n\t"                                  \
-         "move %0, $v0\n"                                         \
+         "move %0, $2\n"                                          \
          : /*out*/   "=r" (_res)                                  \
-        : /*in*/    "0" (&_argvec[0])                             \
-         : /*trash*/ "cc", "memory",  __CALLER_SAVED_REGS         \
+         : /*in*/    "r" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
       );                                                          \
       lval = (__typeof__(lval)) _res;                             \
    } while (0)
@@ -3890,22 +5085,14 @@ typedef
       _argvec[1] = (unsigned long)(arg1);                         \
       _argvec[2] = (unsigned long)(arg2);                         \
       __asm__ volatile(                                           \
-         "subu $29, $29, 8 \n\t"                                  \
-         "sw $gp, 0($sp) \n\t"                                    \
-         "sw $ra, 4($sp) \n\t"                                    \
-         "subu $29, $29, 16 \n\t"                                 \
-         "lw $a0, 4(%1) \n\t"                                     \
-         "lw $a1, 8(%1) \n\t"                                     \
-         "lw $t9, 0(%1) \n\t"  /* target->t9 */                   \
+         "ld $4, 8(%1)\n\t"                                       \
+         "ld $5, 16(%1)\n\t"                                      \
+         "ld $25, 0(%1)\n\t"  /* target->t9 */                    \
          VALGRIND_CALL_NOREDIR_T9                                 \
-         "addu $29, $29, 16 \n\t"                                 \
-         "lw $gp, 0($sp) \n\t"                                    \
-         "lw $ra, 4($sp) \n\t"                                    \
-         "addu $29, $29, 8 \n\t"                                  \
-         "move %0, $v0\n"                                         \
+         "move %0, $2\n"                                          \
          : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "0" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+         : /*in*/    "r" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
       );                                                          \
       lval = (__typeof__(lval)) _res;                             \
    } while (0)
@@ -3920,23 +5107,15 @@ typedef
       _argvec[2] = (unsigned long)(arg2);                         \
       _argvec[3] = (unsigned long)(arg3);                         \
       __asm__ volatile(                                           \
-         "subu $29, $29, 8 \n\t"                                  \
-         "sw $gp, 0($sp) \n\t"                                    \
-         "sw $ra, 4($sp) \n\t"                                    \
-         "subu $29, $29, 16 \n\t"                                 \
-         "lw $a0, 4(%1) \n\t"                                     \
-         "lw $a1, 8(%1) \n\t"                                     \
-         "lw $a2, 12(%1) \n\t"                                    \
-         "lw $t9, 0(%1) \n\t"  /* target->t9 */                   \
+         "ld $4, 8(%1)\n\t"                                       \
+         "ld $5, 16(%1)\n\t"                                      \
+         "ld $6, 24(%1)\n\t"                                      \
+         "ld $25, 0(%1)\n\t"  /* target->t9 */                    \
          VALGRIND_CALL_NOREDIR_T9                                 \
-         "addu $29, $29, 16 \n\t"                                 \
-         "lw $gp, 0($sp) \n\t"                                    \
-         "lw $ra, 4($sp) \n\t"                                    \
-         "addu $29, $29, 8 \n\t"                                  \
-         "move %0, $v0\n"                                         \
+         "move %0, $2\n"                                          \
          : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "0" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+         : /*in*/    "r" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
       );                                                          \
       lval = (__typeof__(lval)) _res;                             \
    } while (0)
@@ -3952,24 +5131,16 @@ typedef
       _argvec[3] = (unsigned long)(arg3);                         \
       _argvec[4] = (unsigned long)(arg4);                         \
       __asm__ volatile(                                           \
-         "subu $29, $29, 8 \n\t"                                  \
-         "sw $gp, 0($sp) \n\t"                                    \
-         "sw $ra, 4($sp) \n\t"                                    \
-         "subu $29, $29, 16 \n\t"                                 \
-         "lw $a0, 4(%1) \n\t"                                     \
-         "lw $a1, 8(%1) \n\t"                                     \
-         "lw $a2, 12(%1) \n\t"                                    \
-         "lw $a3, 16(%1) \n\t"                                    \
-         "lw $t9, 0(%1) \n\t"  /* target->t9 */                   \
+         "ld $4, 8(%1)\n\t"                                       \
+         "ld $5, 16(%1)\n\t"                                      \
+         "ld $6, 24(%1)\n\t"                                      \
+         "ld $7, 32(%1)\n\t"                                      \
+         "ld $25, 0(%1)\n\t"  /* target->t9 */                    \
          VALGRIND_CALL_NOREDIR_T9                                 \
-         "addu $29, $29, 16 \n\t"                                 \
-         "lw $gp, 0($sp) \n\t"                                    \
-         "lw $ra, 4($sp) \n\t"                                    \
-         "addu $29, $29, 8 \n\t"                                  \
-         "move %0, $v0\n"                                         \
+         "move %0, $2\n"                                          \
          : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "0" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+         : /*in*/    "r" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
       );                                                          \
       lval = (__typeof__(lval)) _res;                             \
    } while (0)
@@ -3986,29 +5157,21 @@ typedef
       _argvec[4] = (unsigned long)(arg4);                         \
       _argvec[5] = (unsigned long)(arg5);                         \
       __asm__ volatile(                                           \
-         "subu $29, $29, 8 \n\t"                                  \
-         "sw $gp, 0($sp) \n\t"                                    \
-         "sw $ra, 4($sp) \n\t"                                    \
-         "lw $a0, 20(%1) \n\t"                                    \
-         "subu $sp, $sp, 24\n\t"                                  \
-         "sw $a0, 16($sp) \n\t"                                   \
-         "lw $a0, 4(%1) \n\t"                                     \
-         "lw $a1, 8(%1) \n\t"                                     \
-         "lw $a2, 12(%1) \n\t"                                    \
-         "lw $a3, 16(%1) \n\t"                                    \
-         "lw $t9, 0(%1) \n\t"  /* target->t9 */                   \
+         "ld $4, 8(%1)\n\t"                                       \
+         "ld $5, 16(%1)\n\t"                                      \
+         "ld $6, 24(%1)\n\t"                                      \
+         "ld $7, 32(%1)\n\t"                                      \
+         "ld $8, 40(%1)\n\t"                                      \
+         "ld $25, 0(%1)\n\t"  /* target->t9 */                    \
          VALGRIND_CALL_NOREDIR_T9                                 \
-         "addu $29, $29, 24 \n\t"                                 \
-         "lw $gp, 0($sp) \n\t"                                    \
-         "lw $ra, 4($sp) \n\t"                                    \
-         "addu $sp, $sp, 8 \n\t"                                  \
-         "move %0, $v0\n"                                         \
+         "move %0, $2\n"                                          \
          : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "0" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+         : /*in*/    "r" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
       );                                                          \
       lval = (__typeof__(lval)) _res;                             \
    } while (0)
+
 #define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6)   \
    do {                                                           \
       volatile OrigFn        _orig = (orig);                      \
@@ -4022,29 +5185,18 @@ typedef
       _argvec[5] = (unsigned long)(arg5);                         \
       _argvec[6] = (unsigned long)(arg6);                         \
       __asm__ volatile(                                           \
-         "subu $29, $29, 8 \n\t"                                  \
-         "sw $gp, 0($sp) \n\t"                                    \
-         "sw $ra, 4($sp) \n\t"                                    \
-         "lw $a0, 20(%1) \n\t"                                    \
-         "subu $sp, $sp, 32\n\t"                                  \
-         "sw $a0, 16($sp) \n\t"                                   \
-         "lw $a0, 24(%1) \n\t"                                    \
-         "nop\n\t"                                                \
-         "sw $a0, 20($sp) \n\t"                                   \
-         "lw $a0, 4(%1) \n\t"                                     \
-         "lw $a1, 8(%1) \n\t"                                     \
-         "lw $a2, 12(%1) \n\t"                                    \
-         "lw $a3, 16(%1) \n\t"                                    \
-         "lw $t9, 0(%1) \n\t"  /* target->t9 */                   \
+         "ld $4, 8(%1)\n\t"                                       \
+         "ld $5, 16(%1)\n\t"                                      \
+         "ld $6, 24(%1)\n\t"                                      \
+         "ld $7, 32(%1)\n\t"                                      \
+         "ld $8, 40(%1)\n\t"                                      \
+         "ld $9, 48(%1)\n\t"                                      \
+         "ld $25, 0(%1)\n\t"  /* target->t9 */                    \
          VALGRIND_CALL_NOREDIR_T9                                 \
-         "addu $sp, $sp, 32 \n\t"                                 \
-         "lw $gp, 0($sp) \n\t"                                    \
-         "lw $ra, 4($sp) \n\t"                                    \
-         "addu $sp, $sp, 8 \n\t"                                  \
-         "move %0, $v0\n"                                         \
+         "move %0, $2\n"                                          \
          : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "0" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+         : /*in*/    "r" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
       );                                                          \
       lval = (__typeof__(lval)) _res;                             \
    } while (0)
@@ -4064,30 +5216,19 @@ typedef
       _argvec[6] = (unsigned long)(arg6);                         \
       _argvec[7] = (unsigned long)(arg7);                         \
       __asm__ volatile(                                           \
-         "subu $29, $29, 8 \n\t"                                  \
-         "sw $gp, 0($sp) \n\t"                                    \
-         "sw $ra, 4($sp) \n\t"                                    \
-         "lw $a0, 20(%1) \n\t"                                    \
-         "subu $sp, $sp, 32\n\t"                                  \
-         "sw $a0, 16($sp) \n\t"                                   \
-         "lw $a0, 24(%1) \n\t"                                    \
-         "sw $a0, 20($sp) \n\t"                                   \
-         "lw $a0, 28(%1) \n\t"                                    \
-         "sw $a0, 24($sp) \n\t"                                   \
-         "lw $a0, 4(%1) \n\t"                                     \
-         "lw $a1, 8(%1) \n\t"                                     \
-         "lw $a2, 12(%1) \n\t"                                    \
-         "lw $a3, 16(%1) \n\t"                                    \
-         "lw $t9, 0(%1) \n\t"  /* target->t9 */                   \
+         "ld $4, 8(%1)\n\t"                                       \
+         "ld $5, 16(%1)\n\t"                                      \
+         "ld $6, 24(%1)\n\t"                                      \
+         "ld $7, 32(%1)\n\t"                                      \
+         "ld $8, 40(%1)\n\t"                                      \
+         "ld $9, 48(%1)\n\t"                                      \
+         "ld $10, 56(%1)\n\t"                                     \
+         "ld $25, 0(%1) \n\t"  /* target->t9 */                   \
          VALGRIND_CALL_NOREDIR_T9                                 \
-         "addu $sp, $sp, 32 \n\t"                                 \
-         "lw $gp, 0($sp) \n\t"                                    \
-         "lw $ra, 4($sp) \n\t"                                    \
-         "addu $sp, $sp, 8 \n\t"                                  \
-         "move %0, $v0\n"                                         \
+         "move %0, $2\n"                                          \
          : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "0" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+         : /*in*/    "r" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
       );                                                          \
       lval = (__typeof__(lval)) _res;                             \
    } while (0)
@@ -4108,32 +5249,20 @@ typedef
       _argvec[7] = (unsigned long)(arg7);                         \
       _argvec[8] = (unsigned long)(arg8);                         \
       __asm__ volatile(                                           \
-         "subu $29, $29, 8 \n\t"                                  \
-         "sw $gp, 0($sp) \n\t"                                    \
-         "sw $ra, 4($sp) \n\t"                                    \
-         "lw $a0, 20(%1) \n\t"                                    \
-         "subu $sp, $sp, 40\n\t"                                  \
-         "sw $a0, 16($sp) \n\t"                                   \
-         "lw $a0, 24(%1) \n\t"                                    \
-         "sw $a0, 20($sp) \n\t"                                   \
-         "lw $a0, 28(%1) \n\t"                                    \
-         "sw $a0, 24($sp) \n\t"                                   \
-         "lw $a0, 32(%1) \n\t"                                    \
-         "sw $a0, 28($sp) \n\t"                                   \
-         "lw $a0, 4(%1) \n\t"                                     \
-         "lw $a1, 8(%1) \n\t"                                     \
-         "lw $a2, 12(%1) \n\t"                                    \
-         "lw $a3, 16(%1) \n\t"                                    \
-         "lw $t9, 0(%1) \n\t"  /* target->t9 */                   \
+         "ld $4, 8(%1)\n\t"                                       \
+         "ld $5, 16(%1)\n\t"                                      \
+         "ld $6, 24(%1)\n\t"                                      \
+         "ld $7, 32(%1)\n\t"                                      \
+         "ld $8, 40(%1)\n\t"                                      \
+         "ld $9, 48(%1)\n\t"                                      \
+         "ld $10, 56(%1)\n\t"                                     \
+         "ld $11, 64(%1)\n\t"                                     \
+         "ld $25, 0(%1) \n\t"  /* target->t9 */                   \
          VALGRIND_CALL_NOREDIR_T9                                 \
-         "addu $sp, $sp, 40 \n\t"                                 \
-         "lw $gp, 0($sp) \n\t"                                    \
-         "lw $ra, 4($sp) \n\t"                                    \
-         "addu $sp, $sp, 8 \n\t"                                  \
-         "move %0, $v0\n"                                         \
+         "move %0, $2\n"                                          \
          : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "0" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+         : /*in*/    "r" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
       );                                                          \
       lval = (__typeof__(lval)) _res;                             \
    } while (0)
@@ -4155,34 +5284,24 @@ typedef
       _argvec[8] = (unsigned long)(arg8);                         \
       _argvec[9] = (unsigned long)(arg9);                         \
       __asm__ volatile(                                           \
-         "subu $29, $29, 8 \n\t"                                  \
-         "sw $gp, 0($sp) \n\t"                                    \
-         "sw $ra, 4($sp) \n\t"                                    \
-         "lw $a0, 20(%1) \n\t"                                    \
-         "subu $sp, $sp, 40\n\t"                                  \
-         "sw $a0, 16($sp) \n\t"                                   \
-         "lw $a0, 24(%1) \n\t"                                    \
-         "sw $a0, 20($sp) \n\t"                                   \
-         "lw $a0, 28(%1) \n\t"                                    \
-         "sw $a0, 24($sp) \n\t"                                   \
-         "lw $a0, 32(%1) \n\t"                                    \
-         "sw $a0, 28($sp) \n\t"                                   \
-         "lw $a0, 36(%1) \n\t"                                    \
-         "sw $a0, 32($sp) \n\t"                                   \
-         "lw $a0, 4(%1) \n\t"                                     \
-         "lw $a1, 8(%1) \n\t"                                     \
-         "lw $a2, 12(%1) \n\t"                                    \
-         "lw $a3, 16(%1) \n\t"                                    \
-         "lw $t9, 0(%1) \n\t"  /* target->t9 */                   \
+         "dsubu $29, $29, 8\n\t"                                  \
+         "ld $4, 72(%1)\n\t"                                      \
+         "sd $4, 0($29)\n\t"                                      \
+         "ld $4, 8(%1)\n\t"                                       \
+         "ld $5, 16(%1)\n\t"                                      \
+         "ld $6, 24(%1)\n\t"                                      \
+         "ld $7, 32(%1)\n\t"                                      \
+         "ld $8, 40(%1)\n\t"                                      \
+         "ld $9, 48(%1)\n\t"                                      \
+         "ld $10, 56(%1)\n\t"                                     \
+         "ld $11, 64(%1)\n\t"                                     \
+         "ld $25, 0(%1)\n\t"  /* target->t9 */                    \
          VALGRIND_CALL_NOREDIR_T9                                 \
-         "addu $sp, $sp, 40 \n\t"                                 \
-         "lw $gp, 0($sp) \n\t"                                    \
-         "lw $ra, 4($sp) \n\t"                                    \
-         "addu $sp, $sp, 8 \n\t"                                  \
-         "move %0, $v0\n"                                         \
+         "daddu $29, $29, 8\n\t"                                  \
+         "move %0, $2\n"                                          \
          : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "0" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+         : /*in*/    "r" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
       );                                                          \
       lval = (__typeof__(lval)) _res;                             \
    } while (0)
@@ -4205,36 +5324,26 @@ typedef
       _argvec[9] = (unsigned long)(arg9);                         \
       _argvec[10] = (unsigned long)(arg10);                       \
       __asm__ volatile(                                           \
-         "subu $29, $29, 8 \n\t"                                  \
-         "sw $gp, 0($sp) \n\t"                                    \
-         "sw $ra, 4($sp) \n\t"                                    \
-         "lw $a0, 20(%1) \n\t"                                    \
-         "subu $sp, $sp, 48\n\t"                                  \
-         "sw $a0, 16($sp) \n\t"                                   \
-         "lw $a0, 24(%1) \n\t"                                    \
-         "sw $a0, 20($sp) \n\t"                                   \
-         "lw $a0, 28(%1) \n\t"                                    \
-         "sw $a0, 24($sp) \n\t"                                   \
-         "lw $a0, 32(%1) \n\t"                                    \
-         "sw $a0, 28($sp) \n\t"                                   \
-         "lw $a0, 36(%1) \n\t"                                    \
-         "sw $a0, 32($sp) \n\t"                                   \
-         "lw $a0, 40(%1) \n\t"                                    \
-         "sw $a0, 36($sp) \n\t"                                   \
-         "lw $a0, 4(%1) \n\t"                                     \
-         "lw $a1, 8(%1) \n\t"                                     \
-         "lw $a2, 12(%1) \n\t"                                    \
-         "lw $a3, 16(%1) \n\t"                                    \
-         "lw $t9, 0(%1) \n\t"  /* target->t9 */                   \
+         "dsubu $29, $29, 16\n\t"                                 \
+         "ld $4, 72(%1)\n\t"                                      \
+         "sd $4, 0($29)\n\t"                                      \
+         "ld $4, 80(%1)\n\t"                                      \
+         "sd $4, 8($29)\n\t"                                      \
+         "ld $4, 8(%1)\n\t"                                       \
+         "ld $5, 16(%1)\n\t"                                      \
+         "ld $6, 24(%1)\n\t"                                      \
+         "ld $7, 32(%1)\n\t"                                      \
+         "ld $8, 40(%1)\n\t"                                      \
+         "ld $9, 48(%1)\n\t"                                      \
+         "ld $10, 56(%1)\n\t"                                     \
+         "ld $11, 64(%1)\n\t"                                     \
+         "ld $25, 0(%1)\n\t"  /* target->t9 */                    \
          VALGRIND_CALL_NOREDIR_T9                                 \
-         "addu $sp, $sp, 48 \n\t"                                 \
-         "lw $gp, 0($sp) \n\t"                                    \
-         "lw $ra, 4($sp) \n\t"                                    \
-         "addu $sp, $sp, 8 \n\t"                                  \
-         "move %0, $v0\n"                                         \
+         "daddu $29, $29, 16\n\t"                                 \
+         "move %0, $2\n"                                          \
          : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "0" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+         : /*in*/    "r" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
       );                                                          \
       lval = (__typeof__(lval)) _res;                             \
    } while (0)
@@ -4259,38 +5368,28 @@ typedef
       _argvec[10] = (unsigned long)(arg10);                       \
       _argvec[11] = (unsigned long)(arg11);                       \
       __asm__ volatile(                                           \
-         "subu $29, $29, 8 \n\t"                                  \
-         "sw $gp, 0($sp) \n\t"                                    \
-         "sw $ra, 4($sp) \n\t"                                    \
-         "lw $a0, 20(%1) \n\t"                                    \
-         "subu $sp, $sp, 48\n\t"                                  \
-         "sw $a0, 16($sp) \n\t"                                   \
-         "lw $a0, 24(%1) \n\t"                                    \
-         "sw $a0, 20($sp) \n\t"                                   \
-         "lw $a0, 28(%1) \n\t"                                    \
-         "sw $a0, 24($sp) \n\t"                                   \
-         "lw $a0, 32(%1) \n\t"                                    \
-         "sw $a0, 28($sp) \n\t"                                   \
-         "lw $a0, 36(%1) \n\t"                                    \
-         "sw $a0, 32($sp) \n\t"                                   \
-         "lw $a0, 40(%1) \n\t"                                    \
-         "sw $a0, 36($sp) \n\t"                                   \
-         "lw $a0, 44(%1) \n\t"                                    \
-         "sw $a0, 40($sp) \n\t"                                   \
-         "lw $a0, 4(%1) \n\t"                                     \
-         "lw $a1, 8(%1) \n\t"                                     \
-         "lw $a2, 12(%1) \n\t"                                    \
-         "lw $a3, 16(%1) \n\t"                                    \
-         "lw $t9, 0(%1) \n\t"  /* target->t9 */                   \
+         "dsubu $29, $29, 24\n\t"                                 \
+         "ld $4, 72(%1)\n\t"                                      \
+         "sd $4, 0($29)\n\t"                                      \
+         "ld $4, 80(%1)\n\t"                                      \
+         "sd $4, 8($29)\n\t"                                      \
+         "ld $4, 88(%1)\n\t"                                      \
+         "sd $4, 16($29)\n\t"                                     \
+         "ld $4, 8(%1)\n\t"                                       \
+         "ld $5, 16(%1)\n\t"                                      \
+         "ld $6, 24(%1)\n\t"                                      \
+         "ld $7, 32(%1)\n\t"                                      \
+         "ld $8, 40(%1)\n\t"                                      \
+         "ld $9, 48(%1)\n\t"                                      \
+         "ld $10, 56(%1)\n\t"                                     \
+         "ld $11, 64(%1)\n\t"                                     \
+         "ld $25, 0(%1)\n\t"  /* target->t9 */                    \
          VALGRIND_CALL_NOREDIR_T9                                 \
-         "addu $sp, $sp, 48 \n\t"                                 \
-         "lw $gp, 0($sp) \n\t"                                    \
-         "lw $ra, 4($sp) \n\t"                                    \
-         "addu $sp, $sp, 8 \n\t"                                  \
-         "move %0, $v0\n"                                         \
+         "daddu $29, $29, 24\n\t"                                 \
+         "move %0, $2\n"                                          \
          : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "0" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+         : /*in*/    "r" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
       );                                                          \
       lval = (__typeof__(lval)) _res;                             \
    } while (0)
@@ -4316,45 +5415,35 @@ typedef
       _argvec[11] = (unsigned long)(arg11);                       \
       _argvec[12] = (unsigned long)(arg12);                       \
       __asm__ volatile(                                           \
-         "subu $29, $29, 8 \n\t"                                  \
-         "sw $gp, 0($sp) \n\t"                                    \
-         "sw $ra, 4($sp) \n\t"                                    \
-         "lw $a0, 20(%1) \n\t"                                    \
-         "subu $sp, $sp, 56\n\t"                                  \
-         "sw $a0, 16($sp) \n\t"                                   \
-         "lw $a0, 24(%1) \n\t"                                    \
-         "sw $a0, 20($sp) \n\t"                                   \
-         "lw $a0, 28(%1) \n\t"                                    \
-         "sw $a0, 24($sp) \n\t"                                   \
-         "lw $a0, 32(%1) \n\t"                                    \
-         "sw $a0, 28($sp) \n\t"                                   \
-         "lw $a0, 36(%1) \n\t"                                    \
-         "sw $a0, 32($sp) \n\t"                                   \
-         "lw $a0, 40(%1) \n\t"                                    \
-         "sw $a0, 36($sp) \n\t"                                   \
-         "lw $a0, 44(%1) \n\t"                                    \
-         "sw $a0, 40($sp) \n\t"                                   \
-         "lw $a0, 48(%1) \n\t"                                    \
-         "sw $a0, 44($sp) \n\t"                                   \
-         "lw $a0, 4(%1) \n\t"                                     \
-         "lw $a1, 8(%1) \n\t"                                     \
-         "lw $a2, 12(%1) \n\t"                                    \
-         "lw $a3, 16(%1) \n\t"                                    \
-         "lw $t9, 0(%1) \n\t"  /* target->t9 */                   \
+         "dsubu $29, $29, 32\n\t"                                 \
+         "ld $4, 72(%1)\n\t"                                      \
+         "sd $4, 0($29)\n\t"                                      \
+         "ld $4, 80(%1)\n\t"                                      \
+         "sd $4, 8($29)\n\t"                                      \
+         "ld $4, 88(%1)\n\t"                                      \
+         "sd $4, 16($29)\n\t"                                     \
+         "ld $4, 96(%1)\n\t"                                      \
+         "sd $4, 24($29)\n\t"                                     \
+         "ld $4, 8(%1)\n\t"                                       \
+         "ld $5, 16(%1)\n\t"                                      \
+         "ld $6, 24(%1)\n\t"                                      \
+         "ld $7, 32(%1)\n\t"                                      \
+         "ld $8, 40(%1)\n\t"                                      \
+         "ld $9, 48(%1)\n\t"                                      \
+         "ld $10, 56(%1)\n\t"                                     \
+         "ld $11, 64(%1)\n\t"                                     \
+         "ld $25, 0(%1)\n\t"  /* target->t9 */                    \
          VALGRIND_CALL_NOREDIR_T9                                 \
-         "addu $sp, $sp, 56 \n\t"                                 \
-         "lw $gp, 0($sp) \n\t"                                    \
-         "lw $ra, 4($sp) \n\t"                                    \
-         "addu $sp, $sp, 8 \n\t"                                  \
-         "move %0, $v0\n"                                         \
+         "daddu $29, $29, 32\n\t"                                 \
+         "move %0, $2\n"                                          \
          : /*out*/   "=r" (_res)                                  \
-         : /*in*/    "0" (&_argvec[0])                            \
-         : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS          \
+         : /*in*/    "r" (&_argvec[0])                            \
+         : /*trash*/ "memory", __CALLER_SAVED_REGS                \
       );                                                          \
       lval = (__typeof__(lval)) _res;                             \
    } while (0)
 
-#endif /* PLAT_mips32_linux */
+#endif /* PLAT_mips64_linux */
 
 
 /* ------------------------------------------------------------------ */
@@ -4400,8 +5489,8 @@ typedef
              errors. */
           VG_USERREQ__COUNT_ERRORS = 0x1201,
 
-          /* Allows a string (gdb monitor command) to be passed to the tool
-             Used for interaction with vgdb/gdb */
+          /* Allows the client program and/or gdbserver to execute a monitor
+             command. */
           VG_USERREQ__GDB_MONITOR_COMMAND = 0x1202,
 
           /* These are useful and can be interpreted by any tool that
@@ -4448,7 +5537,10 @@ typedef
              disablement indicator.  Hence 1 disables or further
              disables errors, and -1 moves back towards enablement.
              Other values are not allowed. */
-          VG_USERREQ__CHANGE_ERR_DISABLEMENT = 0x1801
+          VG_USERREQ__CHANGE_ERR_DISABLEMENT = 0x1801,
+
+          /* Initialise IR injection */
+          VG_USERREQ__VEX_INIT_FOR_IRI = 0x1901
    } Vg_ClientRequest;
 
 #if !defined(__GNUC__)
@@ -4495,14 +5587,14 @@ VALGRIND_PRINTF(const char *format, ...)
 #if defined(NVALGRIND)
    return 0;
 #else /* NVALGRIND */
-#if defined(_MSC_VER)
+#if defined(_MSC_VER) || defined(__MINGW64__)
    uintptr_t _qzz_res;
 #else
    unsigned long _qzz_res;
 #endif
    va_list vargs;
    va_start(vargs, format);
-#if defined(_MSC_VER)
+#if defined(_MSC_VER) || defined(__MINGW64__)
    _qzz_res = VALGRIND_DO_CLIENT_REQUEST_EXPR(0,
                               VG_USERREQ__PRINTF_VALIST_BY_REF,
                               (uintptr_t)format,
@@ -4533,14 +5625,14 @@ VALGRIND_PRINTF_BACKTRACE(const char *format, ...)
 #if defined(NVALGRIND)
    return 0;
 #else /* NVALGRIND */
-#if defined(_MSC_VER)
+#if defined(_MSC_VER) || defined(__MINGW64__)
    uintptr_t _qzz_res;
 #else
    unsigned long _qzz_res;
 #endif
    va_list vargs;
    va_start(vargs, format);
-#if defined(_MSC_VER)
+#if defined(_MSC_VER) || defined(__MINGW64__)
    _qzz_res = VALGRIND_DO_CLIENT_REQUEST_EXPR(0,
                               VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF,
                               (uintptr_t)format,
@@ -4826,9 +5918,20 @@ VALGRIND_PRINTF_BACKTRACE(const char *format, ...)
     VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__CHANGE_ERR_DISABLEMENT, \
                                     -1, 0, 0, 0, 0)
 
+/* Execute a monitor command from the client program.
+   If a connection is opened with GDB, the output will be sent
+   according to the output mode set for vgdb.
+   If no connection is opened, output will go to the log output.
+   Returns 1 if command not recognised, 0 otherwise. */
+#define VALGRIND_MONITOR_COMMAND(command)                               \
+   VALGRIND_DO_CLIENT_REQUEST_EXPR(0, VG_USERREQ__GDB_MONITOR_COMMAND, \
+                                   command, 0, 0, 0, 0)
+
+
 #undef PLAT_x86_darwin
 #undef PLAT_amd64_darwin
 #undef PLAT_x86_win32
+#undef PLAT_amd64_win64
 #undef PLAT_x86_linux
 #undef PLAT_amd64_linux
 #undef PLAT_ppc32_linux
@@ -4836,5 +5939,6 @@ VALGRIND_PRINTF_BACKTRACE(const char *format, ...)
 #undef PLAT_arm_linux
 #undef PLAT_s390x_linux
 #undef PLAT_mips32_linux
+#undef PLAT_mips64_linux
 
 #endif   /* __VALGRIND_H */