UPDATE : valgrind version update and add files for SDK package
[sdk/tools/upstream/valgrind.git] / VEX / priv / main_main.c
index 1e80972..22305ef 100644 (file)
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2013 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
 */
 
 #include "libvex.h"
-#include "libvex_emwarn.h"
+#include "libvex_emnote.h"
 #include "libvex_guest_x86.h"
 #include "libvex_guest_amd64.h"
 #include "libvex_guest_arm.h"
 #include "libvex_guest_ppc32.h"
 #include "libvex_guest_ppc64.h"
+#include "libvex_guest_s390x.h"
+#include "libvex_guest_mips32.h"
+#include "libvex_guest_mips64.h"
 
 #include "main_globals.h"
 #include "main_util.h"
 #include "host_amd64_defs.h"
 #include "host_ppc_defs.h"
 #include "host_arm_defs.h"
+#include "host_s390_defs.h"
+#include "host_mips_defs.h"
 
 #include "guest_generic_bb_to_IR.h"
 #include "guest_x86_defs.h"
 #include "guest_amd64_defs.h"
 #include "guest_arm_defs.h"
 #include "guest_ppc_defs.h"
+#include "guest_s390_defs.h"
+#include "guest_mips_defs.h"
 
 #include "host_generic_simd128.h"
 
 /* --------- fwds ... --------- */
 
 static Bool   are_valid_hwcaps ( VexArch arch, UInt hwcaps );
-static HChar* show_hwcaps ( VexArch arch, UInt hwcaps );
+static const HChar* show_hwcaps ( VexArch arch, UInt hwcaps );
+
+
+/* --------- helpers --------- */
+
+__attribute__((noinline))
+static UInt udiv32 ( UInt x, UInt y ) { return x/y; }
+__attribute__((noinline))
+static  Int sdiv32 (  Int x,  Int y ) { return x/y; }
 
 
 /* --------- Initialise the library. --------- */
@@ -76,7 +91,7 @@ void LibVEX_default_VexControl ( /*OUT*/ VexControl* vcon )
 {
    vcon->iropt_verbosity            = 0;
    vcon->iropt_level                = 2;
-   vcon->iropt_precise_memory_exns  = False;
+   vcon->iropt_register_updates     = VexRegUpdUnwindregsAtMemAccess;
    vcon->iropt_unroll_thresh        = 120;
    vcon->guest_max_insns            = 60;
    vcon->guest_chase_thresh         = 10;
@@ -144,6 +159,7 @@ void LibVEX_Init (
    vassert(8 == sizeof(Addr64));
    vassert(16 == sizeof(U128));
    vassert(16 == sizeof(V128));
+   vassert(32 == sizeof(U256));
 
    vassert(sizeof(void*) == 4 || sizeof(void*) == 8);
    vassert(sizeof(void*) == sizeof(int*));
@@ -152,6 +168,27 @@ void LibVEX_Init (
    vassert(VEX_HOST_WORDSIZE == sizeof(void*));
    vassert(VEX_HOST_WORDSIZE == sizeof(HWord));
 
+   /* These take a lot of space, so make sure we don't have
+      any unnoticed size regressions. */
+   if (VEX_HOST_WORDSIZE == 4) {
+      vassert(sizeof(IRExpr) == 16);
+      vassert(sizeof(IRStmt) == 20 /* x86 */
+              || sizeof(IRStmt) == 24 /* arm */);
+   } else {
+      vassert(sizeof(IRExpr) == 32);
+      vassert(sizeof(IRStmt) == 32);
+   }
+
+   /* Check that signed integer division on the host rounds towards
+      zero.  If not, h_calc_sdiv32_w_arm_semantics() won't work
+      correctly. */
+   /* 100.0 / 7.0 == 14.2857 */
+   vassert(udiv32(100, 7) == 14);
+   vassert(sdiv32(100, 7) == 14);
+   vassert(sdiv32(-100, 7) == -14); /* and not -15 */
+   vassert(sdiv32(100, -7) == -14); /* ditto */
+   vassert(sdiv32(-100, -7) == 14); /* not sure what this proves */
+
    /* Really start up .. */
    vex_debuglevel         = debuglevel;
    vex_valgrind_support   = valgrind_support;
@@ -180,10 +217,12 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta )
    HInstr*      (*directReload) ( HInstr*, HReg, Short );
    void         (*ppInstr)      ( HInstr*, Bool );
    void         (*ppReg)        ( HReg );
-   HInstrArray* (*iselSB)       ( IRSB*, VexArch, VexArchInfo*, 
-                                                  VexAbiInfo* );
-   Int          (*emit)         ( UChar*, Int, HInstr*, Bool, void* );
-   IRExpr*      (*specHelper)   ( HChar*, IRExpr**, IRStmt**, Int );
+   HInstrArray* (*iselSB)       ( IRSB*, VexArch, VexArchInfo*, VexAbiInfo*,
+                                  Int, Int, Bool, Bool, Addr64 );
+   Int          (*emit)         ( /*MB_MOD*/Bool*,
+                                  UChar*, Int, HInstr*, Bool,
+                                  void*, void*, void*, void* );
+   IRExpr*      (*specHelper)   ( const HChar*, IRExpr**, IRStmt**, Int );
    Bool         (*preciseMemExnsFn) ( Int, Int );
 
    DisOneInstrFn disInstrFn;
@@ -194,11 +233,13 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta )
    HInstrArray*    vcode;
    HInstrArray*    rcode;
    Int             i, j, k, out_used, guest_sizeB;
-   Int             offB_TISTART, offB_TILEN;
-   UChar           insn_bytes[32];
+   Int             offB_TISTART, offB_TILEN, offB_GUEST_IP, szB_GUEST_IP;
+   Int             offB_HOST_EvC_COUNTER, offB_HOST_EvC_FAILADDR;
+   UChar           insn_bytes[128];
    IRType          guest_word_type;
    IRType          host_word_type;
-   Bool            mode64;
+   Bool            mode64, chainingAllowed;
+   Addr64          max_ga;
 
    guest_layout           = NULL;
    available_real_regs    = NULL;
@@ -220,11 +261,28 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta )
    host_word_type         = Ity_INVALID;
    offB_TISTART           = 0;
    offB_TILEN             = 0;
+   offB_GUEST_IP          = 0;
+   szB_GUEST_IP           = 0;
+   offB_HOST_EvC_COUNTER  = 0;
+   offB_HOST_EvC_FAILADDR = 0;
    mode64                 = False;
+   chainingAllowed        = False;
 
    vex_traceflags = vta->traceflags;
 
    vassert(vex_initdone);
+   vassert(vta->needs_self_check  != NULL);
+   vassert(vta->disp_cp_xassisted != NULL);
+   /* Both the chainers and the indir are either NULL or non-NULL. */
+   if (vta->disp_cp_chain_me_to_slowEP        != NULL) {
+      vassert(vta->disp_cp_chain_me_to_fastEP != NULL);
+      vassert(vta->disp_cp_xindir             != NULL);
+      chainingAllowed = True;
+   } else {
+      vassert(vta->disp_cp_chain_me_to_fastEP == NULL);
+      vassert(vta->disp_cp_xindir             == NULL);
+   }
+
    vexSetAllocModeTEMP_and_clear();
    vexAllocSanityCheck();
 
@@ -249,11 +307,12 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta )
          ppInstr      = (void(*)(HInstr*, Bool)) ppX86Instr;
          ppReg        = (void(*)(HReg)) ppHRegX86;
          iselSB       = iselSB_X86;
-         emit         = (Int(*)(UChar*,Int,HInstr*,Bool,void*)) emit_X86Instr;
+         emit         = (Int(*)(Bool*,UChar*,Int,HInstr*,Bool,
+                               void*,void*,void*,void*))
+                        emit_X86Instr;
          host_is_bigendian = False;
          host_word_type    = Ity_I32;
          vassert(are_valid_hwcaps(VexArchX86, vta->archinfo_host.hwcaps));
-         vassert(vta->dispatch != NULL); /* jump-to-dispatcher scheme */
          break;
 
       case VexArchAMD64:
@@ -271,11 +330,12 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta )
          ppInstr     = (void(*)(HInstr*, Bool)) ppAMD64Instr;
          ppReg       = (void(*)(HReg)) ppHRegAMD64;
          iselSB      = iselSB_AMD64;
-         emit        = (Int(*)(UChar*,Int,HInstr*,Bool,void*)) emit_AMD64Instr;
+         emit        = (Int(*)(Bool*,UChar*,Int,HInstr*,Bool,
+                               void*,void*,void*,void*))
+                       emit_AMD64Instr;
          host_is_bigendian = False;
          host_word_type    = Ity_I64;
          vassert(are_valid_hwcaps(VexArchAMD64, vta->archinfo_host.hwcaps));
-         vassert(vta->dispatch != NULL); /* jump-to-dispatcher scheme */
          break;
 
       case VexArchPPC32:
@@ -290,11 +350,12 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta )
          ppInstr     = (void(*)(HInstr*,Bool)) ppPPCInstr;
          ppReg       = (void(*)(HReg)) ppHRegPPC;
          iselSB      = iselSB_PPC;
-         emit        = (Int(*)(UChar*,Int,HInstr*,Bool,void*)) emit_PPCInstr;
+         emit        = (Int(*)(Bool*,UChar*,Int,HInstr*,Bool,
+                               void*,void*,void*,void*))
+                       emit_PPCInstr;
          host_is_bigendian = True;
          host_word_type    = Ity_I32;
          vassert(are_valid_hwcaps(VexArchPPC32, vta->archinfo_host.hwcaps));
-         vassert(vta->dispatch == NULL); /* return-to-dispatcher scheme */
          break;
 
       case VexArchPPC64:
@@ -309,11 +370,31 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta )
          ppInstr     = (void(*)(HInstr*, Bool)) ppPPCInstr;
          ppReg       = (void(*)(HReg)) ppHRegPPC;
          iselSB      = iselSB_PPC;
-         emit        = (Int(*)(UChar*,Int,HInstr*,Bool,void*)) emit_PPCInstr;
+         emit        = (Int(*)(Bool*,UChar*,Int,HInstr*,Bool,
+                               void*,void*,void*,void*))
+                       emit_PPCInstr;
          host_is_bigendian = True;
          host_word_type    = Ity_I64;
          vassert(are_valid_hwcaps(VexArchPPC64, vta->archinfo_host.hwcaps));
-         vassert(vta->dispatch == NULL); /* return-to-dispatcher scheme */
+         break;
+
+      case VexArchS390X:
+         mode64      = True;
+         getAllocableRegs_S390 ( &n_available_real_regs,
+                                 &available_real_regs, mode64 );
+         isMove      = (Bool(*)(HInstr*,HReg*,HReg*)) isMove_S390Instr;
+         getRegUsage = (void(*)(HRegUsage*,HInstr*, Bool)) getRegUsage_S390Instr;
+         mapRegs     = (void(*)(HRegRemap*,HInstr*, Bool)) mapRegs_S390Instr;
+         genSpill    = (void(*)(HInstr**,HInstr**,HReg,Int,Bool)) genSpill_S390;
+         genReload   = (void(*)(HInstr**,HInstr**,HReg,Int,Bool)) genReload_S390;
+         ppInstr     = (void(*)(HInstr*, Bool)) ppS390Instr;
+         ppReg       = (void(*)(HReg)) ppHRegS390;
+         iselSB      = iselSB_S390;
+         emit        = (Int(*)(Bool*,UChar*,Int,HInstr*,Bool,
+                               void*,void*,void*,void*)) emit_S390Instr;
+         host_is_bigendian = True;
+         host_word_type    = Ity_I64;
+         vassert(are_valid_hwcaps(VexArchS390X, vta->archinfo_host.hwcaps));
          break;
 
       case VexArchARM:
@@ -328,11 +409,60 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta )
          ppInstr     = (void(*)(HInstr*, Bool)) ppARMInstr;
          ppReg       = (void(*)(HReg)) ppHRegARM;
          iselSB      = iselSB_ARM;
-         emit        = (Int(*)(UChar*,Int,HInstr*,Bool,void*)) emit_ARMInstr;
+         emit        = (Int(*)(Bool*,UChar*,Int,HInstr*,Bool,
+                               void*,void*,void*,void*))
+                       emit_ARMInstr;
          host_is_bigendian = False;
          host_word_type    = Ity_I32;
          vassert(are_valid_hwcaps(VexArchARM, vta->archinfo_host.hwcaps));
-         vassert(vta->dispatch == NULL); /* return-to-dispatcher scheme */
+         break;
+
+      case VexArchMIPS32:
+         mode64      = False;
+         getAllocableRegs_MIPS ( &n_available_real_regs,
+                                &available_real_regs, mode64 );
+         isMove      = (Bool(*)(HInstr*,HReg*,HReg*)) isMove_MIPSInstr;
+         getRegUsage = (void(*)(HRegUsage*,HInstr*, Bool)) getRegUsage_MIPSInstr;
+         mapRegs     = (void(*)(HRegRemap*,HInstr*, Bool)) mapRegs_MIPSInstr;
+         genSpill    = (void(*)(HInstr**,HInstr**,HReg,Int,Bool)) genSpill_MIPS;
+         genReload   = (void(*)(HInstr**,HInstr**,HReg,Int,Bool)) genReload_MIPS;
+         ppInstr     = (void(*)(HInstr*, Bool)) ppMIPSInstr;
+         ppReg       = (void(*)(HReg)) ppHRegMIPS;
+         iselSB      = iselSB_MIPS;
+         emit        = (Int(*)(Bool*,UChar*,Int,HInstr*,Bool,
+                               void*,void*,void*,void*))
+                       emit_MIPSInstr;
+#        if defined(VKI_LITTLE_ENDIAN)
+         host_is_bigendian = False;
+#        elif defined(VKI_BIG_ENDIAN)
+         host_is_bigendian = True;
+#        endif
+         host_word_type    = Ity_I32;
+         vassert(are_valid_hwcaps(VexArchMIPS32, vta->archinfo_host.hwcaps));
+         break;
+
+      case VexArchMIPS64:
+         mode64      = True;
+         getAllocableRegs_MIPS ( &n_available_real_regs,
+                                 &available_real_regs, mode64 );
+         isMove      = (Bool(*)(HInstr*,HReg*,HReg*)) isMove_MIPSInstr;
+         getRegUsage = (void(*)(HRegUsage*,HInstr*, Bool)) getRegUsage_MIPSInstr;
+         mapRegs     = (void(*)(HRegRemap*,HInstr*, Bool)) mapRegs_MIPSInstr;
+         genSpill    = (void(*)(HInstr**,HInstr**,HReg,Int,Bool)) genSpill_MIPS;
+         genReload   = (void(*)(HInstr**,HInstr**,HReg,Int,Bool)) genReload_MIPS;
+         ppInstr     = (void(*)(HInstr*, Bool)) ppMIPSInstr;
+         ppReg       = (void(*)(HReg)) ppHRegMIPS;
+         iselSB      = iselSB_MIPS;
+         emit        = (Int(*)(Bool*,UChar*,Int,HInstr*,Bool,
+                               void*,void*,void*,void*))
+                       emit_MIPSInstr;
+#        if defined(VKI_LITTLE_ENDIAN)
+         host_is_bigendian = False;
+#        elif defined(VKI_BIG_ENDIAN)
+         host_is_bigendian = True;
+#        endif
+         host_word_type    = Ity_I64;
+         vassert(are_valid_hwcaps(VexArchMIPS64, vta->archinfo_host.hwcaps));
          break;
 
       default:
@@ -343,14 +473,18 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta )
    switch (vta->arch_guest) {
 
       case VexArchX86:
-         preciseMemExnsFn = guest_x86_state_requires_precise_mem_exns;
-         disInstrFn       = disInstr_X86;
-         specHelper       = guest_x86_spechelper;
-         guest_sizeB      = sizeof(VexGuestX86State);
-         guest_word_type  = Ity_I32;
-         guest_layout     = &x86guest_layout;
-         offB_TISTART     = offsetof(VexGuestX86State,guest_TISTART);
-         offB_TILEN       = offsetof(VexGuestX86State,guest_TILEN);
+         preciseMemExnsFn       = guest_x86_state_requires_precise_mem_exns;
+         disInstrFn             = disInstr_X86;
+         specHelper             = guest_x86_spechelper;
+         guest_sizeB            = sizeof(VexGuestX86State);
+         guest_word_type        = Ity_I32;
+         guest_layout           = &x86guest_layout;
+         offB_TISTART           = offsetof(VexGuestX86State,guest_TISTART);
+         offB_TILEN             = offsetof(VexGuestX86State,guest_TILEN);
+         offB_GUEST_IP          = offsetof(VexGuestX86State,guest_EIP);
+         szB_GUEST_IP           = sizeof( ((VexGuestX86State*)0)->guest_EIP );
+         offB_HOST_EvC_COUNTER  = offsetof(VexGuestX86State,host_EvC_COUNTER);
+         offB_HOST_EvC_FAILADDR = offsetof(VexGuestX86State,host_EvC_FAILADDR);
          vassert(are_valid_hwcaps(VexArchX86, vta->archinfo_guest.hwcaps));
          vassert(0 == sizeof(VexGuestX86State) % 16);
          vassert(sizeof( ((VexGuestX86State*)0)->guest_TISTART) == 4);
@@ -359,14 +493,18 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta )
          break;
 
       case VexArchAMD64:
-         preciseMemExnsFn = guest_amd64_state_requires_precise_mem_exns;
-         disInstrFn       = disInstr_AMD64;
-         specHelper       = guest_amd64_spechelper;
-         guest_sizeB      = sizeof(VexGuestAMD64State);
-         guest_word_type  = Ity_I64;
-         guest_layout     = &amd64guest_layout;
-         offB_TISTART     = offsetof(VexGuestAMD64State,guest_TISTART);
-         offB_TILEN       = offsetof(VexGuestAMD64State,guest_TILEN);
+         preciseMemExnsFn       = guest_amd64_state_requires_precise_mem_exns;
+         disInstrFn             = disInstr_AMD64;
+         specHelper             = guest_amd64_spechelper;
+         guest_sizeB            = sizeof(VexGuestAMD64State);
+         guest_word_type        = Ity_I64;
+         guest_layout           = &amd64guest_layout;
+         offB_TISTART           = offsetof(VexGuestAMD64State,guest_TISTART);
+         offB_TILEN             = offsetof(VexGuestAMD64State,guest_TILEN);
+         offB_GUEST_IP          = offsetof(VexGuestAMD64State,guest_RIP);
+         szB_GUEST_IP           = sizeof( ((VexGuestAMD64State*)0)->guest_RIP );
+         offB_HOST_EvC_COUNTER  = offsetof(VexGuestAMD64State,host_EvC_COUNTER);
+         offB_HOST_EvC_FAILADDR = offsetof(VexGuestAMD64State,host_EvC_FAILADDR);
          vassert(are_valid_hwcaps(VexArchAMD64, vta->archinfo_guest.hwcaps));
          vassert(0 == sizeof(VexGuestAMD64State) % 16);
          vassert(sizeof( ((VexGuestAMD64State*)0)->guest_TISTART ) == 8);
@@ -375,14 +513,18 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta )
          break;
 
       case VexArchPPC32:
-         preciseMemExnsFn = guest_ppc32_state_requires_precise_mem_exns;
-         disInstrFn       = disInstr_PPC;
-         specHelper       = guest_ppc32_spechelper;
-         guest_sizeB      = sizeof(VexGuestPPC32State);
-         guest_word_type  = Ity_I32;
-         guest_layout     = &ppc32Guest_layout;
-         offB_TISTART     = offsetof(VexGuestPPC32State,guest_TISTART);
-         offB_TILEN       = offsetof(VexGuestPPC32State,guest_TILEN);
+         preciseMemExnsFn       = guest_ppc32_state_requires_precise_mem_exns;
+         disInstrFn             = disInstr_PPC;
+         specHelper             = guest_ppc32_spechelper;
+         guest_sizeB            = sizeof(VexGuestPPC32State);
+         guest_word_type        = Ity_I32;
+         guest_layout           = &ppc32Guest_layout;
+         offB_TISTART           = offsetof(VexGuestPPC32State,guest_TISTART);
+         offB_TILEN             = offsetof(VexGuestPPC32State,guest_TILEN);
+         offB_GUEST_IP          = offsetof(VexGuestPPC32State,guest_CIA);
+         szB_GUEST_IP           = sizeof( ((VexGuestPPC32State*)0)->guest_CIA );
+         offB_HOST_EvC_COUNTER  = offsetof(VexGuestPPC32State,host_EvC_COUNTER);
+         offB_HOST_EvC_FAILADDR = offsetof(VexGuestPPC32State,host_EvC_FAILADDR);
          vassert(are_valid_hwcaps(VexArchPPC32, vta->archinfo_guest.hwcaps));
          vassert(0 == sizeof(VexGuestPPC32State) % 16);
          vassert(sizeof( ((VexGuestPPC32State*)0)->guest_TISTART ) == 4);
@@ -391,14 +533,18 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta )
          break;
 
       case VexArchPPC64:
-         preciseMemExnsFn = guest_ppc64_state_requires_precise_mem_exns;
-         disInstrFn       = disInstr_PPC;
-         specHelper       = guest_ppc64_spechelper;
-         guest_sizeB      = sizeof(VexGuestPPC64State);
-         guest_word_type  = Ity_I64;
-         guest_layout     = &ppc64Guest_layout;
-         offB_TISTART     = offsetof(VexGuestPPC64State,guest_TISTART);
-         offB_TILEN       = offsetof(VexGuestPPC64State,guest_TILEN);
+         preciseMemExnsFn       = guest_ppc64_state_requires_precise_mem_exns;
+         disInstrFn             = disInstr_PPC;
+         specHelper             = guest_ppc64_spechelper;
+         guest_sizeB            = sizeof(VexGuestPPC64State);
+         guest_word_type        = Ity_I64;
+         guest_layout           = &ppc64Guest_layout;
+         offB_TISTART           = offsetof(VexGuestPPC64State,guest_TISTART);
+         offB_TILEN             = offsetof(VexGuestPPC64State,guest_TILEN);
+         offB_GUEST_IP          = offsetof(VexGuestPPC64State,guest_CIA);
+         szB_GUEST_IP           = sizeof( ((VexGuestPPC64State*)0)->guest_CIA );
+         offB_HOST_EvC_COUNTER  = offsetof(VexGuestPPC64State,host_EvC_COUNTER);
+         offB_HOST_EvC_FAILADDR = offsetof(VexGuestPPC64State,host_EvC_FAILADDR);
          vassert(are_valid_hwcaps(VexArchPPC64, vta->archinfo_guest.hwcaps));
          vassert(0 == sizeof(VexGuestPPC64State) % 16);
          vassert(sizeof( ((VexGuestPPC64State*)0)->guest_TISTART    ) == 8);
@@ -407,15 +553,39 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta )
          vassert(sizeof( ((VexGuestPPC64State*)0)->guest_NRADDR_GPR2) == 8);
          break;
 
+      case VexArchS390X:
+         preciseMemExnsFn = guest_s390x_state_requires_precise_mem_exns;
+         disInstrFn       = disInstr_S390;
+         specHelper       = guest_s390x_spechelper;
+         guest_sizeB      = sizeof(VexGuestS390XState);
+         guest_word_type  = Ity_I64;
+         guest_layout     = &s390xGuest_layout;
+         offB_TISTART     = offsetof(VexGuestS390XState,guest_TISTART);
+         offB_TILEN       = offsetof(VexGuestS390XState,guest_TILEN);
+         offB_GUEST_IP          = offsetof(VexGuestS390XState,guest_IA);
+         szB_GUEST_IP           = sizeof( ((VexGuestS390XState*)0)->guest_IA);
+         offB_HOST_EvC_COUNTER  = offsetof(VexGuestS390XState,host_EvC_COUNTER);
+         offB_HOST_EvC_FAILADDR = offsetof(VexGuestS390XState,host_EvC_FAILADDR);
+         vassert(are_valid_hwcaps(VexArchS390X, vta->archinfo_guest.hwcaps));
+         vassert(0 == sizeof(VexGuestS390XState) % 16);
+         vassert(sizeof( ((VexGuestS390XState*)0)->guest_TISTART    ) == 8);
+         vassert(sizeof( ((VexGuestS390XState*)0)->guest_TILEN      ) == 8);
+         vassert(sizeof( ((VexGuestS390XState*)0)->guest_NRADDR     ) == 8);
+         break;
+
       case VexArchARM:
-         preciseMemExnsFn = guest_arm_state_requires_precise_mem_exns;
-         disInstrFn       = disInstr_ARM;
-         specHelper       = guest_arm_spechelper;
-         guest_sizeB      = sizeof(VexGuestARMState);
-         guest_word_type  = Ity_I32;
-         guest_layout     = &armGuest_layout;
-         offB_TISTART     = offsetof(VexGuestARMState,guest_TISTART);
-         offB_TILEN       = offsetof(VexGuestARMState,guest_TILEN);
+         preciseMemExnsFn       = guest_arm_state_requires_precise_mem_exns;
+         disInstrFn             = disInstr_ARM;
+         specHelper             = guest_arm_spechelper;
+         guest_sizeB            = sizeof(VexGuestARMState);
+         guest_word_type        = Ity_I32;
+         guest_layout           = &armGuest_layout;
+         offB_TISTART           = offsetof(VexGuestARMState,guest_TISTART);
+         offB_TILEN             = offsetof(VexGuestARMState,guest_TILEN);
+         offB_GUEST_IP          = offsetof(VexGuestARMState,guest_R15T);
+         szB_GUEST_IP           = sizeof( ((VexGuestARMState*)0)->guest_R15T );
+         offB_HOST_EvC_COUNTER  = offsetof(VexGuestARMState,host_EvC_COUNTER);
+         offB_HOST_EvC_FAILADDR = offsetof(VexGuestARMState,host_EvC_FAILADDR);
          vassert(are_valid_hwcaps(VexArchARM, vta->archinfo_guest.hwcaps));
          vassert(0 == sizeof(VexGuestARMState) % 16);
          vassert(sizeof( ((VexGuestARMState*)0)->guest_TISTART) == 4);
@@ -423,10 +593,57 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta )
          vassert(sizeof( ((VexGuestARMState*)0)->guest_NRADDR ) == 4);
          break;
 
+      case VexArchMIPS32:
+         preciseMemExnsFn       = guest_mips32_state_requires_precise_mem_exns;
+         disInstrFn             = disInstr_MIPS;
+         specHelper             = guest_mips32_spechelper;
+         guest_sizeB            = sizeof(VexGuestMIPS32State);
+         guest_word_type        = Ity_I32;
+         guest_layout           = &mips32Guest_layout;
+         offB_TISTART           = offsetof(VexGuestMIPS32State,guest_TISTART);
+         offB_TILEN             = offsetof(VexGuestMIPS32State,guest_TILEN);
+         offB_GUEST_IP          = offsetof(VexGuestMIPS32State,guest_PC);
+         szB_GUEST_IP           = sizeof( ((VexGuestMIPS32State*)0)->guest_PC );
+         offB_HOST_EvC_COUNTER  = offsetof(VexGuestMIPS32State,host_EvC_COUNTER);
+         offB_HOST_EvC_FAILADDR = offsetof(VexGuestMIPS32State,host_EvC_FAILADDR);
+         vassert(are_valid_hwcaps(VexArchMIPS32, vta->archinfo_guest.hwcaps));
+         vassert(0 == sizeof(VexGuestMIPS32State) % 16);
+         vassert(sizeof( ((VexGuestMIPS32State*)0)->guest_TISTART) == 4);
+         vassert(sizeof( ((VexGuestMIPS32State*)0)->guest_TILEN  ) == 4);
+         vassert(sizeof( ((VexGuestMIPS32State*)0)->guest_NRADDR ) == 4);
+         break;
+
+      case VexArchMIPS64:
+         preciseMemExnsFn       = guest_mips64_state_requires_precise_mem_exns;
+         disInstrFn             = disInstr_MIPS;
+         specHelper             = guest_mips64_spechelper;
+         guest_sizeB            = sizeof(VexGuestMIPS64State);
+         guest_word_type        = Ity_I64;
+         guest_layout           = &mips64Guest_layout;
+         offB_TISTART           = offsetof(VexGuestMIPS64State,guest_TISTART);
+         offB_TILEN             = offsetof(VexGuestMIPS64State,guest_TILEN);
+         offB_GUEST_IP          = offsetof(VexGuestMIPS64State,guest_PC);
+         szB_GUEST_IP           = sizeof( ((VexGuestMIPS64State*)0)->guest_PC );
+         offB_HOST_EvC_COUNTER  = offsetof(VexGuestMIPS64State,host_EvC_COUNTER);
+         offB_HOST_EvC_FAILADDR = offsetof(VexGuestMIPS64State,host_EvC_FAILADDR);
+         vassert(are_valid_hwcaps(VexArchMIPS64, vta->archinfo_guest.hwcaps));
+         vassert(0 == sizeof(VexGuestMIPS64State) % 16);
+         vassert(sizeof( ((VexGuestMIPS64State*)0)->guest_TISTART) == 8);
+         vassert(sizeof( ((VexGuestMIPS64State*)0)->guest_TILEN  ) == 8);
+         vassert(sizeof( ((VexGuestMIPS64State*)0)->guest_NRADDR ) == 8);
+         break;
+
       default:
          vpanic("LibVEX_Translate: unsupported guest insn set");
    }
 
+   /* Set up result struct. */
+   VexTranslateResult res;
+   res.status         = VexTransOK;
+   res.n_sc_extents   = 0;
+   res.offs_profInc   = -1;
+   res.n_guest_instrs = 0;
+
    /* yet more sanity checks ... */
    if (vta->arch_guest == vta->arch_host) {
       /* doesn't necessarily have to be true, but if it isn't it means
@@ -443,20 +660,25 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta )
                    "------------------------\n\n");
 
    irsb = bb_to_IR ( vta->guest_extents,
+                     &res.n_sc_extents,
+                     &res.n_guest_instrs,
                      vta->callback_opaque,
                      disInstrFn,
                      vta->guest_bytes, 
                      vta->guest_bytes_addr,
                      vta->chase_into_ok,
                      host_is_bigendian,
+                     vta->sigill_diag,
                      vta->arch_guest,
                      &vta->archinfo_guest,
                      &vta->abiinfo_both,
                      guest_word_type,
-                     vta->do_self_check,
+                     vta->needs_self_check,
                      vta->preamble_function,
                      offB_TISTART,
-                     offB_TILEN );
+                     offB_TILEN,
+                     offB_GUEST_IP,
+                     szB_GUEST_IP );
 
    vexAllocSanityCheck();
 
@@ -464,7 +686,7 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta )
       /* Access failure. */
       vexSetAllocModeTEMP_and_clear();
       vex_traceflags = 0;
-      return VexTransAccessFail;
+      res.status = VexTransAccessFail; return res;
    }
 
    vassert(vta->guest_extents->n_used >= 1 && vta->guest_extents->n_used <= 3);
@@ -521,6 +743,7 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta )
       irsb = vta->instrument1(vta->callback_opaque,
                               irsb, guest_layout, 
                               vta->guest_extents,
+                              &vta->archinfo_host,
                               guest_word_type, host_word_type);
    vexAllocSanityCheck();
 
@@ -528,6 +751,7 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta )
       irsb = vta->instrument2(vta->callback_opaque,
                               irsb, guest_layout,
                               vta->guest_extents,
+                              &vta->archinfo_host,
                               guest_word_type, host_word_type);
       
    if (vex_traceflags & VEX_TRACE_INST) {
@@ -563,7 +787,7 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta )
 
    /* Turn it into virtual-registerised code.  Build trees -- this
       also throws away any dead bindings. */
-   ado_treebuild_BB( irsb );
+   max_ga = ado_treebuild_BB( irsb, preciseMemExnsFn );
 
    if (vta->finaltidy) {
       irsb = vta->finaltidy(irsb);
@@ -580,7 +804,10 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta )
    }
 
    /* HACK */
-   if (0) { *(vta->host_bytes_used) = 0; return VexTransOK; }
+   if (0) {
+      *(vta->host_bytes_used) = 0;
+      res.status = VexTransOK; return res;
+   }
    /* end HACK */
 
    if (vex_traceflags & VEX_TRACE_VCODE)
@@ -588,8 +815,19 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta )
                    " Instruction selection "
                    "------------------------\n");
 
-   vcode = iselSB ( irsb, vta->arch_host, &vta->archinfo_host, 
-                                          &vta->abiinfo_both );
+   /* No guest has its IP field at offset zero.  If this fails it
+      means some transformation pass somewhere failed to update/copy
+      irsb->offsIP properly. */
+   vassert(irsb->offsIP >= 16);
+
+   vcode = iselSB ( irsb, vta->arch_host,
+                    &vta->archinfo_host, 
+                    &vta->abiinfo_both,
+                    offB_HOST_EvC_COUNTER,
+                    offB_HOST_EvC_FAILADDR,
+                    chainingAllowed,
+                    vta->addProfInc,
+                    max_ga );
 
    vexAllocSanityCheck();
 
@@ -628,7 +866,10 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta )
    }
 
    /* HACK */
-   if (0) { *(vta->host_bytes_used) = 0; return VexTransOK; }
+   if (0) { 
+      *(vta->host_bytes_used) = 0;
+      res.status = VexTransOK; return res;
+   }
    /* end HACK */
 
    /* Assemble */
@@ -640,12 +881,19 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta )
 
    out_used = 0; /* tracks along the host_bytes array */
    for (i = 0; i < rcode->arr_used; i++) {
-      if (vex_traceflags & VEX_TRACE_ASM) {
-         ppInstr(rcode->arr[i], mode64);
+      HInstr* hi           = rcode->arr[i];
+      Bool    hi_isProfInc = False;
+      if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) {
+         ppInstr(hi, mode64);
          vex_printf("\n");
       }
-      j = (*emit)( insn_bytes, 32, rcode->arr[i], mode64, vta->dispatch );
-      if (vex_traceflags & VEX_TRACE_ASM) {
+      j = emit( &hi_isProfInc,
+                insn_bytes, sizeof insn_bytes, hi, mode64,
+                vta->disp_cp_chain_me_to_slowEP,
+                vta->disp_cp_chain_me_to_fastEP,
+                vta->disp_cp_xindir,
+                vta->disp_cp_xassisted );
+      if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) {
          for (k = 0; k < j; k++)
             if (insn_bytes[k] < 16)
                vex_printf("0%x ",  (UInt)insn_bytes[k]);
@@ -653,14 +901,23 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta )
                vex_printf("%x ", (UInt)insn_bytes[k]);
          vex_printf("\n\n");
       }
-      if (out_used + j > vta->host_bytes_size) {
+      if (UNLIKELY(out_used + j > vta->host_bytes_size)) {
          vexSetAllocModeTEMP_and_clear();
          vex_traceflags = 0;
-         return VexTransOutputFull;
+         res.status = VexTransOutputFull;
+         return res;
       }
-      for (k = 0; k < j; k++) {
-         vta->host_bytes[out_used] = insn_bytes[k];
-         out_used++;
+      if (UNLIKELY(hi_isProfInc)) {
+         vassert(vta->addProfInc); /* else where did it come from? */
+         vassert(res.offs_profInc == -1); /* there can be only one (tm) */
+         vassert(out_used >= 0);
+         res.offs_profInc = out_used;
+      }
+      { UChar* dst = &vta->host_bytes[out_used];
+        for (k = 0; k < j; k++) {
+           dst[k] = insn_bytes[k];
+        }
+        out_used += j;
       }
       vassert(out_used <= vta->host_bytes_size);
    }
@@ -670,17 +927,174 @@ VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta )
 
    vexSetAllocModeTEMP_and_clear();
 
+   if (vex_traceflags) {
+      /* Print the expansion ratio for this SB. */
+      j = 0; /* total guest bytes */
+      for (i = 0; i < vta->guest_extents->n_used; i++) {
+         j += vta->guest_extents->len[i];
+      }
+      if (1) vex_printf("VexExpansionRatio %d %d   %d :10\n\n",
+                        j, out_used, (10 * out_used) / (j == 0 ? 1 : j));
+   }
+
    vex_traceflags = 0;
-   return VexTransOK;
+   res.status = VexTransOK;
+   return res;
+}
+
+
+/* --------- Chain/Unchain XDirects. --------- */
+
+VexInvalRange LibVEX_Chain ( VexArch arch_host,
+                             void*   place_to_chain,
+                             void*   disp_cp_chain_me_EXPECTED,
+                             void*   place_to_jump_to )
+{
+   VexInvalRange (*chainXDirect)(void*, void*, void*) = NULL;
+   switch (arch_host) {
+      case VexArchX86:
+         chainXDirect = chainXDirect_X86; break;
+      case VexArchAMD64:
+         chainXDirect = chainXDirect_AMD64; break;
+      case VexArchARM:
+         chainXDirect = chainXDirect_ARM; break;
+      case VexArchS390X:
+         chainXDirect = chainXDirect_S390; break;
+      case VexArchPPC32:
+         return chainXDirect_PPC(place_to_chain,
+                                 disp_cp_chain_me_EXPECTED,
+                                 place_to_jump_to, False/*!mode64*/);
+      case VexArchPPC64:
+         return chainXDirect_PPC(place_to_chain,
+                                 disp_cp_chain_me_EXPECTED,
+                                 place_to_jump_to, True/*mode64*/);
+      case VexArchMIPS32:
+         return chainXDirect_MIPS(place_to_chain,
+                                  disp_cp_chain_me_EXPECTED,
+                                  place_to_jump_to, False/*!mode64*/);
+      case VexArchMIPS64:
+         return chainXDirect_MIPS(place_to_chain,
+                                  disp_cp_chain_me_EXPECTED,
+                                  place_to_jump_to, True/*!mode64*/);
+      default:
+         vassert(0);
+   }
+   vassert(chainXDirect);
+   VexInvalRange vir
+      = chainXDirect(place_to_chain, disp_cp_chain_me_EXPECTED,
+                     place_to_jump_to);
+   return vir;
+}
+
+VexInvalRange LibVEX_UnChain ( VexArch arch_host,
+                               void*   place_to_unchain,
+                               void*   place_to_jump_to_EXPECTED,
+                               void*   disp_cp_chain_me )
+{
+   VexInvalRange (*unchainXDirect)(void*, void*, void*) = NULL;
+   switch (arch_host) {
+      case VexArchX86:
+         unchainXDirect = unchainXDirect_X86; break;
+      case VexArchAMD64:
+         unchainXDirect = unchainXDirect_AMD64; break;
+      case VexArchARM:
+         unchainXDirect = unchainXDirect_ARM; break;
+      case VexArchS390X:
+         unchainXDirect = unchainXDirect_S390; break;
+      case VexArchPPC32:
+         return unchainXDirect_PPC(place_to_unchain,
+                                   place_to_jump_to_EXPECTED,
+                                   disp_cp_chain_me, False/*!mode64*/);
+      case VexArchPPC64:
+         return unchainXDirect_PPC(place_to_unchain,
+                                   place_to_jump_to_EXPECTED,
+                                   disp_cp_chain_me, True/*mode64*/);
+      case VexArchMIPS32:
+         return unchainXDirect_MIPS(place_to_unchain,
+                                    place_to_jump_to_EXPECTED,
+                                    disp_cp_chain_me, False/*!mode64*/);
+      case VexArchMIPS64:
+         return unchainXDirect_MIPS(place_to_unchain,
+                                    place_to_jump_to_EXPECTED,
+                                    disp_cp_chain_me, True/*!mode64*/);
+      default:
+         vassert(0);
+   }
+   vassert(unchainXDirect);
+   VexInvalRange vir
+      = unchainXDirect(place_to_unchain, place_to_jump_to_EXPECTED,
+                       disp_cp_chain_me);
+   return vir;
+}
+
+Int LibVEX_evCheckSzB ( VexArch arch_host )
+{
+   static Int cached = 0; /* DO NOT MAKE NON-STATIC */
+   if (UNLIKELY(cached == 0)) {
+      switch (arch_host) {
+         case VexArchX86:
+            cached = evCheckSzB_X86(); break;
+         case VexArchAMD64:
+            cached = evCheckSzB_AMD64(); break;
+         case VexArchARM:
+            cached = evCheckSzB_ARM(); break;
+         case VexArchS390X:
+            cached = evCheckSzB_S390(); break;
+         case VexArchPPC32:
+         case VexArchPPC64:
+            cached = evCheckSzB_PPC(); break;
+         case VexArchMIPS32:
+         case VexArchMIPS64:
+            cached = evCheckSzB_MIPS(); break;
+         default:
+            vassert(0);
+      }
+   }
+   return cached;
+}
+
+VexInvalRange LibVEX_PatchProfInc ( VexArch arch_host,
+                                    void*   place_to_patch,
+                                    ULong*  location_of_counter )
+{
+   VexInvalRange (*patchProfInc)(void*,ULong*) = NULL;
+   switch (arch_host) {
+      case VexArchX86:
+         patchProfInc = patchProfInc_X86; break;
+      case VexArchAMD64:
+         patchProfInc = patchProfInc_AMD64; break;
+      case VexArchARM:
+         patchProfInc = patchProfInc_ARM; break;
+      case VexArchS390X:
+         patchProfInc = patchProfInc_S390; break;
+      case VexArchPPC32:
+         return patchProfInc_PPC(place_to_patch,
+                                 location_of_counter, False/*!mode64*/);
+      case VexArchPPC64:
+         return patchProfInc_PPC(place_to_patch,
+                                 location_of_counter, True/*mode64*/);
+      case VexArchMIPS32:
+         return patchProfInc_MIPS(place_to_patch,
+                                  location_of_counter, False/*!mode64*/);
+      case VexArchMIPS64:
+         return patchProfInc_MIPS(place_to_patch,
+                                  location_of_counter, True/*!mode64*/);
+      default:
+         vassert(0);
+   }
+   vassert(patchProfInc);
+   VexInvalRange vir
+      = patchProfInc(place_to_patch, location_of_counter);
+   return vir;
 }
 
 
 /* --------- Emulation warnings. --------- */
 
-HChar* LibVEX_EmWarn_string ( VexEmWarn ew )
+const HChar* LibVEX_EmNote_string ( VexEmNote ew )
 {
    switch (ew) {
-     case EmWarn_NONE: 
+     case EmNote_NONE: 
         return "none";
      case EmWarn_X86_x87exns:
         return "Unmasking x87 FP exceptions";
@@ -700,8 +1114,32 @@ HChar* LibVEX_EmWarn_string ( VexEmWarn ew )
         return "PPC64 function redirection stack overflow";
      case EmWarn_PPC64_redir_underflow:
         return "PPC64 function redirection stack underflow";
+     case EmWarn_S390X_fpext_rounding:
+        return "The specified rounding mode cannot be supported. That\n"
+               "  feature requires the floating point extension facility.\n"
+               "  which is not available on this host. Continuing using\n"
+               "  the rounding mode from FPC. Results may differ!";
+     case EmWarn_S390X_invalid_rounding:
+        return "The specified rounding mode is invalid.\n"
+               "  Continuing using 'round to nearest'. Results may differ!";
+     case EmFail_S390X_stfle:
+        return "Instruction stfle is not supported on this host";
+     case EmFail_S390X_stckf:
+        return "Instruction stckf is not supported on this host";
+     case EmFail_S390X_ecag:
+        return "Instruction ecag is not supported on this host";
+     case EmFail_S390X_fpext:
+        return "Encountered an instruction that requires the floating "
+               "point extension facility.\n"
+               "  That facility is not available on this host";
+     case EmFail_S390X_invalid_PFPO_rounding_mode:
+        return "The rounding mode specified in GPR 0 for PFPO instruction"
+               " is invalid";
+     case EmFail_S390X_invalid_PFPO_function:
+        return "The function code specified in GPR 0 for PFPO instruction"
+               " is invalid";
      default: 
-        vpanic("LibVEX_EmWarn_string: unknown warning");
+        vpanic("LibVEX_EmNote_string: unknown warning");
    }
 }
 
@@ -716,13 +1154,16 @@ const HChar* LibVEX_ppVexArch ( VexArch arch )
       case VexArchARM:      return "ARM";
       case VexArchPPC32:    return "PPC32";
       case VexArchPPC64:    return "PPC64";
+      case VexArchS390X:    return "S390X";
+      case VexArchMIPS32:   return "MIPS32";
+      case VexArchMIPS64:   return "MIPS64";
       default:              return "VexArch???";
    }
 }
 
 const HChar* LibVEX_ppVexHwCaps ( VexArch arch, UInt hwcaps )
 {
-   HChar* str = show_hwcaps(arch,hwcaps);
+   const HChar* str = show_hwcaps(arch,hwcaps);
    return str ? str : "INVALID";
 }
 
@@ -730,11 +1171,15 @@ const HChar* LibVEX_ppVexHwCaps ( VexArch arch, UInt hwcaps )
 /* Write default settings info *vai. */
 void LibVEX_default_VexArchInfo ( /*OUT*/VexArchInfo* vai )
 {
-   vai->hwcaps             = 0;
-   vai->ppc_cache_line_szB = 0;
-   vai->ppc_dcbz_szB       = 0;
-   vai->ppc_dcbzl_szB      = 0;
-
+   vai->hwcaps              = 0;
+   vai->ppc_icache_line_szB = 0;
+   vai->ppc_dcbz_szB        = 0;
+   vai->ppc_dcbzl_szB       = 0;
+
+   vai->hwcache_info.num_levels = 0;
+   vai->hwcache_info.num_caches = 0;
+   vai->hwcache_info.caches = NULL;
+   vai->hwcache_info.icaches_maintain_coherence = True;  // whatever
 }
 
 /* Write default settings info *vbi. */
@@ -755,59 +1200,93 @@ void LibVEX_default_VexAbiInfo ( /*OUT*/VexAbiInfo* vbi )
    be NULL for invalid combinations of flags, so these functions also
    serve as a way to validate hwcaps values. */
 
-static HChar* show_hwcaps_x86 ( UInt hwcaps ) 
+static const HChar* show_hwcaps_x86 ( UInt hwcaps ) 
 {
-   /* Monotonic, SSE3 > SSE2 > SSE1 > baseline. */
+   /* Monotonic, LZCNT > SSE3 > SSE2 > SSE1 > MMXEXT > baseline. */
    switch (hwcaps) {
       case 0:
          return "x86-sse0";
-      case VEX_HWCAPS_X86_SSE1:
-         return "x86-sse1";
-      case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2:
-         return "x86-sse1-sse2";
-      case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2
+      case VEX_HWCAPS_X86_MMXEXT:
+         return "x86-mmxext";
+      case VEX_HWCAPS_X86_MMXEXT | VEX_HWCAPS_X86_SSE1:
+         return "x86-mmxext-sse1";
+      case VEX_HWCAPS_X86_MMXEXT | VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2:
+         return "x86-mmxext-sse1-sse2";
+      case VEX_HWCAPS_X86_MMXEXT | VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2
            | VEX_HWCAPS_X86_LZCNT:
-         return "x86-sse1-sse2-lzcnt";
-      case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2
+         return "x86-mmxext-sse1-sse2-lzcnt";
+      case VEX_HWCAPS_X86_MMXEXT | VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2
            | VEX_HWCAPS_X86_SSE3:
-         return "x86-sse1-sse2-sse3";
-      case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2
+         return "x86-mmxext-sse1-sse2-sse3";
+      case VEX_HWCAPS_X86_MMXEXT | VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2
            | VEX_HWCAPS_X86_SSE3 | VEX_HWCAPS_X86_LZCNT:
-         return "x86-sse1-sse2-sse3-lzcnt";
+         return "x86-mmxext-sse1-sse2-sse3-lzcnt";
       default:
          return NULL;
    }
 }
 
-static HChar* show_hwcaps_amd64 ( UInt hwcaps )
+static const HChar* show_hwcaps_amd64 ( UInt hwcaps )
 {
    /* SSE3 and CX16 are orthogonal and > baseline, although we really
       don't expect to come across anything which can do SSE3 but can't
       do CX16.  Still, we can handle that case.  LZCNT is similarly
       orthogonal. */
-   switch (hwcaps) {
-      case 0:
-         return "amd64-sse2";
-      case VEX_HWCAPS_AMD64_SSE3:
-         return "amd64-sse3";
-      case VEX_HWCAPS_AMD64_CX16:
-         return "amd64-sse2-cx16";
-      case VEX_HWCAPS_AMD64_SSE3 | VEX_HWCAPS_AMD64_CX16:
-         return "amd64-sse3-cx16";
-      case VEX_HWCAPS_AMD64_SSE3 | VEX_HWCAPS_AMD64_LZCNT:
-         return "amd64-sse3-lzcnt";
-      case VEX_HWCAPS_AMD64_CX16 | VEX_HWCAPS_AMD64_LZCNT:
-         return "amd64-sse2-cx16-lzcnt";
-      case VEX_HWCAPS_AMD64_SSE3 | VEX_HWCAPS_AMD64_CX16
-           | VEX_HWCAPS_AMD64_LZCNT:
-         return "amd64-sse3-cx16-lzcnt";
 
-      default:
-         return NULL;
+   /* Throw out obviously stupid cases: */
+   Bool have_sse3 = (hwcaps & VEX_HWCAPS_AMD64_SSE3) != 0;
+   Bool have_avx  = (hwcaps & VEX_HWCAPS_AMD64_AVX)  != 0;
+   Bool have_bmi  = (hwcaps & VEX_HWCAPS_AMD64_BMI)  != 0;
+   Bool have_avx2 = (hwcaps & VEX_HWCAPS_AMD64_AVX2) != 0;
+   /* AVX without SSE3 */
+   if (have_avx && !have_sse3)
+      return NULL;
+   /* AVX2 or BMI without AVX */
+   if ((have_avx2 || have_bmi) && !have_avx)
+      return NULL;
+
+   /* This isn't threadsafe.  We might need to fix it at some point. */
+   static HChar buf[100] = { 0 };
+   if (buf[0] != 0) return buf; /* already constructed */
+
+   vex_bzero(buf, sizeof(buf));
+
+   HChar* p = &buf[0];
+
+   p = p + vex_sprintf(p, "%s", "amd64");
+   if (hwcaps == 0) {
+      /* special-case the baseline case */
+      p = p + vex_sprintf(p, "%s", "-sse2");
+      goto out;
+   }
+   if (hwcaps & VEX_HWCAPS_AMD64_CX16) {
+      p = p + vex_sprintf(p, "%s", "-cx16");
+   }
+   if (hwcaps & VEX_HWCAPS_AMD64_LZCNT) {
+      p = p + vex_sprintf(p, "%s", "-lzcnt");
+   }
+   if (hwcaps & VEX_HWCAPS_AMD64_RDTSCP) {
+      p = p + vex_sprintf(p, "%s", "-rdtscp");
    }
+   if (hwcaps & VEX_HWCAPS_AMD64_SSE3) {
+      p = p + vex_sprintf(p, "%s", "-sse3");
+   }
+   if (hwcaps & VEX_HWCAPS_AMD64_AVX) {
+      p = p + vex_sprintf(p, "%s", "-avx");
+   }
+   if (hwcaps & VEX_HWCAPS_AMD64_AVX2) {
+      p = p + vex_sprintf(p, "%s", "-avx2");
+   }
+   if (hwcaps & VEX_HWCAPS_AMD64_BMI) {
+      p = p + vex_sprintf(p, "%s", "-bmi");
+   }
+
+  out:
+   vassert(buf[sizeof(buf)-1] == 0);
+   return buf;
 }
 
-static HChar* show_hwcaps_ppc32 ( UInt hwcaps )
+static const HChar* show_hwcaps_ppc32 ( UInt hwcaps )
 {
    /* Monotonic with complications.  Basically V > F > baseline,
       but once you have F then you can have FX or GX too. */
@@ -815,6 +1294,9 @@ static HChar* show_hwcaps_ppc32 ( UInt hwcaps )
    const UInt V  = VEX_HWCAPS_PPC32_V;
    const UInt FX = VEX_HWCAPS_PPC32_FX;
    const UInt GX = VEX_HWCAPS_PPC32_GX;
+   const UInt VX = VEX_HWCAPS_PPC32_VX;
+   const UInt DFP = VEX_HWCAPS_PPC32_DFP;
+   const UInt ISA2_07 = VEX_HWCAPS_PPC32_ISA2_07;
          UInt c  = hwcaps;
    if (c == 0)           return "ppc32-int";
    if (c == F)           return "ppc32-int-flt";
@@ -825,16 +1307,24 @@ static HChar* show_hwcaps_ppc32 ( UInt hwcaps )
    if (c == (F|V|FX))    return "ppc32-int-flt-vmx-FX";
    if (c == (F|V|GX))    return "ppc32-int-flt-vmx-GX";
    if (c == (F|V|FX|GX)) return "ppc32-int-flt-vmx-FX-GX";
+   if (c == (F|V|FX|GX|DFP))    return "ppc32-int-flt-vmx-FX-GX-DFP";
+   if (c == (F|V|FX|GX|VX|DFP)) return "ppc32-int-flt-vmx-FX-GX-VX-DFP";
+   if (c == (F|V|FX|GX|VX|DFP|ISA2_07))
+      return "ppc32-int-flt-vmx-FX-GX-VX-DFP-ISA2_07";
+
    return NULL;
 }
 
-static HChar* show_hwcaps_ppc64 ( UInt hwcaps )
+static const HChar* show_hwcaps_ppc64 ( UInt hwcaps )
 {
    /* Monotonic with complications.  Basically V > baseline(==F),
       but once you have F then you can have FX or GX too. */
    const UInt V  = VEX_HWCAPS_PPC64_V;
    const UInt FX = VEX_HWCAPS_PPC64_FX;
    const UInt GX = VEX_HWCAPS_PPC64_GX;
+   const UInt VX = VEX_HWCAPS_PPC64_VX;
+   const UInt DFP = VEX_HWCAPS_PPC64_DFP;
+   const UInt ISA2_07 = VEX_HWCAPS_PPC64_ISA2_07;
          UInt c  = hwcaps;
    if (c == 0)         return "ppc64-int-flt";
    if (c == FX)        return "ppc64-int-flt-FX";
@@ -844,10 +1334,14 @@ static HChar* show_hwcaps_ppc64 ( UInt hwcaps )
    if (c == (V|FX))    return "ppc64-int-flt-vmx-FX";
    if (c == (V|GX))    return "ppc64-int-flt-vmx-GX";
    if (c == (V|FX|GX)) return "ppc64-int-flt-vmx-FX-GX";
+   if (c == (V|FX|GX|DFP))    return "ppc64-int-flt-vmx-FX-GX-DFP";
+   if (c == (V|FX|GX|VX|DFP)) return "ppc64-int-flt-vmx-FX-GX-VX-DFP";
+   if (c == (V|FX|GX|VX|DFP|ISA2_07))
+      return "ppc64-int-flt-vmx-FX-GX-VX-DFP-ISA2_07";
    return NULL;
 }
 
-static HChar* show_hwcaps_arm ( UInt hwcaps )
+static const HChar* show_hwcaps_arm ( UInt hwcaps )
 {
    Bool N = ((hwcaps & VEX_HWCAPS_ARM_NEON) != 0);
    Bool vfp = ((hwcaps & (VEX_HWCAPS_ARM_VFP |
@@ -887,15 +1381,100 @@ static HChar* show_hwcaps_arm ( UInt hwcaps )
    return NULL;
 }
 
+static const HChar* show_hwcaps_s390x ( UInt hwcaps )
+{
+   static const HChar prefix[] = "s390x";
+   static const struct {
+      UInt  hwcaps_bit;
+      HChar name[6];
+   } hwcaps_list[] = {
+      { VEX_HWCAPS_S390X_LDISP, "ldisp" },
+      { VEX_HWCAPS_S390X_EIMM,  "eimm" },
+      { VEX_HWCAPS_S390X_GIE,   "gie" },
+      { VEX_HWCAPS_S390X_DFP,   "dfp" },
+      { VEX_HWCAPS_S390X_FGX,   "fgx" },
+      { VEX_HWCAPS_S390X_STFLE, "stfle" },
+      { VEX_HWCAPS_S390X_ETF2,  "etf2" },
+      { VEX_HWCAPS_S390X_ETF3,  "etf3" },
+      { VEX_HWCAPS_S390X_STCKF, "stckf" },
+      { VEX_HWCAPS_S390X_FPEXT, "fpext" },
+      { VEX_HWCAPS_S390X_LSC,   "lsc" },
+      { VEX_HWCAPS_S390X_PFPO,  "pfpo" },
+   };
+#define NUM_HWCAPS (sizeof hwcaps_list / sizeof hwcaps_list[0])
+   static HChar buf[sizeof prefix + 
+                    NUM_HWCAPS * (sizeof hwcaps_list[0].name + 1) +
+                    1];  // '\0'
+   HChar *p;
+   UInt i;
+
+   if (buf[0] != '\0') return buf;  /* already constructed */
+
+   hwcaps = VEX_HWCAPS_S390X(hwcaps);
+
+   p = buf + vex_sprintf(buf, "%s", prefix);
+   for (i = 0 ; i < NUM_HWCAPS; ++i) {
+      if (hwcaps & hwcaps_list[i].hwcaps_bit)
+         p = p + vex_sprintf(p, "-%s", hwcaps_list[i].name);
+   }
+
+   /* If there are no facilities, add "zarch" */
+   if (hwcaps == 0)
+     vex_sprintf(p, "-%s", "zarch");
+
+   return buf;
+}
+
+static const HChar* show_hwcaps_mips32 ( UInt hwcaps )
+{
+   /* MIPS baseline. */
+   if (VEX_MIPS_COMP_ID(hwcaps) == VEX_PRID_COMP_MIPS) {
+      /* MIPS baseline with dspr2. */
+      if (VEX_MIPS_PROC_DSP2(hwcaps)) {
+         return "MIPS-baseline-dspr2";
+      }
+      /* MIPS baseline with dsp. */
+      if (VEX_MIPS_PROC_DSP(hwcaps)) {
+         return "MIPS-baseline-dsp";
+      }
+      return "MIPS-baseline";
+   }
+
+   /* Broadcom baseline. */
+   if (VEX_MIPS_COMP_ID(hwcaps) == VEX_PRID_COMP_BROADCOM) {
+      return "Broadcom-baseline";
+   }
+
+   /* Netlogic baseline. */
+   if (VEX_MIPS_COMP_ID(hwcaps) == VEX_PRID_COMP_NETLOGIC) {
+      return "Netlogic-baseline";
+   }
+
+   /* Cavium baseline. */
+   if (VEX_MIPS_COMP_ID(hwcaps) == VEX_PRID_COMP_CAVIUM) {
+      return "Cavium-baseline";
+   }
+
+   return NULL;
+}
+
+static const HChar* show_hwcaps_mips64 ( UInt hwcaps )
+{
+   return "mips64-baseline";
+}
+
 /* ---- */
-static HChar* show_hwcaps ( VexArch arch, UInt hwcaps )
+static const HChar* show_hwcaps ( VexArch arch, UInt hwcaps )
 {
    switch (arch) {
-      case VexArchX86:   return show_hwcaps_x86(hwcaps);
-      case VexArchAMD64: return show_hwcaps_amd64(hwcaps);
-      case VexArchPPC32: return show_hwcaps_ppc32(hwcaps);
-      case VexArchPPC64: return show_hwcaps_ppc64(hwcaps);
-      case VexArchARM:   return show_hwcaps_arm(hwcaps);
+      case VexArchX86:    return show_hwcaps_x86(hwcaps);
+      case VexArchAMD64:  return show_hwcaps_amd64(hwcaps);
+      case VexArchPPC32:  return show_hwcaps_ppc32(hwcaps);
+      case VexArchPPC64:  return show_hwcaps_ppc64(hwcaps);
+      case VexArchARM:    return show_hwcaps_arm(hwcaps);
+      case VexArchS390X:  return show_hwcaps_s390x(hwcaps);
+      case VexArchMIPS32: return show_hwcaps_mips32(hwcaps);
+      case VexArchMIPS64: return show_hwcaps_mips64(hwcaps);
       default: return NULL;
    }
 }