Merge branch 'mingw' of git://qemu.weilnetz.de/qemu

author Aurelien Jarno <aurelien@aurel32.net>

Sat, 13 Apr 2013 12:31:54 +0000 (14:31 +0200)

committer Aurelien Jarno <aurelien@aurel32.net>

Sat, 13 Apr 2013 12:31:54 +0000 (14:31 +0200)
author Aurelien Jarno <aurelien@aurel32.net>
Sat, 13 Apr 2013 12:31:54 +0000 (14:31 +0200)
committer Aurelien Jarno <aurelien@aurel32.net>
Sat, 13 Apr 2013 12:31:54 +0000 (14:31 +0200)
diff --git a/block/qcow.c b/block/qcow.c

index 13d396b89a5d7f0cb7c1a36c5e6d7dbc1825db1e..3278e552bf220625da77e716f6cc1acd3e867487 100644 (file)
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -25,7 +25,7 @@
  #include "block/block_int.h"
  #include "qemu/module.h"
  #include <zlib.h>
-#include "block/aes.h"
+#include "qemu/aes.h"
  #include "migration/migration.h"
  
  /**************************************************************/
diff --git a/block/qcow2.c b/block/qcow2.c

index 7e7d775b37285df8813d10b81ec55ba000373864..1d180732e9a3e952b5fbcf7d10bdc4acffec0d4c 100644 (file)
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -25,7 +25,7 @@
  #include "block/block_int.h"
  #include "qemu/module.h"
  #include <zlib.h>
-#include "block/aes.h"
+#include "qemu/aes.h"
  #include "block/qcow2.h"
  #include "qemu/error-report.h"
  #include "qapi/qmp/qerror.h"
diff --git a/block/qcow2.h b/block/qcow2.h

index bf8db2abd3577ca6783ea3cf4f364c24afd99fca..94218432f34e3da0ac93c4690e4f295e22098cd2 100644 (file)
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -25,7 +25,7 @@
  #ifndef BLOCK_QCOW2_H
  #define BLOCK_QCOW2_H
  
-#include "block/aes.h"
+#include "qemu/aes.h"
  #include "block/coroutine.h"
  
  //#define DEBUG_ALLOC
diff --git a/configure b/configure

index 1ed939a3ac9f9344795ba884827694b75af61b4a..73df1814063eb566a6703df8d8e6d293a8a39d81 100755 (executable)
--- a/configure
+++ b/configure
@@ -3471,9 +3471,6 @@ echo "ARCH=$ARCH" >> $config_host_mak
  if test "$debug_tcg" = "yes" ; then
    echo "CONFIG_DEBUG_TCG=y" >> $config_host_mak
  fi
-if test "$debug" = "yes" ; then
-  echo "CONFIG_DEBUG_EXEC=y" >> $config_host_mak
-fi
  if test "$strip_opt" = "yes" ; then
    echo "STRIP=${strip}" >> $config_host_mak
  fi
diff --git a/cpu-exec.c b/cpu-exec.c

index aa8fa893d9cafffbc13fcc1e5dce713d15e23648..31c089dac0a7db66ac4d6270c293d1294bbc598a 100644 (file)
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -23,8 +23,6 @@
  #include "qemu/atomic.h"
  #include "sysemu/qtest.h"
  
-//#define CONFIG_DEBUG_EXEC
-
  bool qemu_cpu_has_work(CPUState *cpu)
  {
      return cpu_has_work(cpu);
@@ -575,7 +573,7 @@ int cpu_exec(CPUArchState *env)
                      env->exception_index = EXCP_INTERRUPT;
                      cpu_loop_exit(env);
                  }
-#if defined(DEBUG_DISAS) || defined(CONFIG_DEBUG_EXEC)
+#if defined(DEBUG_DISAS)
                  if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
                      /* restore flags in standard format */
  #if defined(TARGET_I386)
@@ -590,7 +588,7 @@ int cpu_exec(CPUArchState *env)
                      log_cpu_state(env, 0);
  #endif
                  }
-#endif /* DEBUG_DISAS || CONFIG_DEBUG_EXEC */
+#endif /* DEBUG_DISAS */
                  spin_lock(&tcg_ctx.tb_ctx.tb_lock);
                  tb = tb_find_fast(env);
                  /* Note: we do it here to avoid a gcc bug on Mac OS X when
@@ -602,11 +600,10 @@ int cpu_exec(CPUArchState *env)
                      next_tb = 0;
                      tcg_ctx.tb_ctx.tb_invalidated_flag = 0;
                  }
-#ifdef CONFIG_DEBUG_EXEC
-                qemu_log_mask(CPU_LOG_EXEC, "Trace %p [" TARGET_FMT_lx "] %s\n",
-                             tb->tc_ptr, tb->pc,
-                             lookup_symbol(tb->pc));
-#endif
+                if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
+                    qemu_log("Trace %p [" TARGET_FMT_lx "] %s\n",
+                             tb->tc_ptr, tb->pc, lookup_symbol(tb->pc));
+                }
                  /* see if we can patch the calling TB. When the TB
                     spans two pages, we cannot safely do a direct
                     jump. */
diff --git a/disas/i386.c b/disas/i386.c

index 73cc06f1c37e583b7e620a19cfb13ab26260de05..47f1f2ea616f4961d0fba26f9ed959097276da91 100644 (file)
--- a/disas/i386.c
+++ b/disas/i386.c
@@ -664,6 +664,13 @@ fetch_data(struct disassemble_info *info, bfd_byte *addr)
  #define PREGRP95  NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 95 } }
  #define PREGRP96  NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 96 } }
  #define PREGRP97  NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 97 } }
+#define PREGRP98  NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 98 } }
+#define PREGRP99  NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 99 } }
+#define PREGRP100 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 100 } }
+#define PREGRP101 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 101 } }
+#define PREGRP102 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 102 } }
+#define PREGRP103 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 103 } }
+#define PREGRP104 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 104 } }
  
  
  #define X86_64_0  NULL, { { NULL, X86_64_SPECIAL }, { NULL, 0 } }
@@ -1440,7 +1447,7 @@ static const unsigned char threebyte_0x38_uses_DATA_prefix[256] = {
    /* a0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* af */
    /* b0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* bf */
    /* c0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* cf */
-  /* d0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* df */
+  /* d0 */ 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1, /* df */
    /* e0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ef */
    /* f0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ff */
    /*       -------------------------------        */
@@ -1503,7 +1510,7 @@ static const unsigned char threebyte_0x3a_uses_DATA_prefix[256] = {
    /* 10 */ 0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0, /* 1f */
    /* 20 */ 1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 2f */
    /* 30 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 3f */
-  /* 40 */ 1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 4f */
+  /* 40 */ 1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0, /* 4f */
    /* 50 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 5f */
    /* 60 */ 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0, /* 6f */
    /* 70 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 7f */
@@ -1512,7 +1519,7 @@ static const unsigned char threebyte_0x3a_uses_DATA_prefix[256] = {
    /* a0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* af */
    /* b0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* bf */
    /* c0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* cf */
-  /* d0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* df */
+  /* d0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, /* df */
    /* e0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ef */
    /* f0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ff */
    /*       -------------------------------        */
@@ -2710,6 +2717,63 @@ static const struct dis386 prefix_user_table[][4] = {
      { "punpckldq",{ MX, EMq } },
      { "(bad)", { XX } },
    },
+
+  /* PREGRP98 */
+  {
+    { "(bad)", { XX } },
+    { "(bad)", { XX } },
+    { "pclmulqdq", { XM, EXx, Ib } },
+    { "(bad)", { XX } },
+  },
+
+  /* PREGRP99 */
+  {
+    { "(bad)", { XX } },
+    { "(bad)", { XX } },
+    { "aesimc", { XM, EXx } },
+    { "(bad)", { XX } },
+  },
+
+  /* PREGRP100 */
+  {
+    { "(bad)", { XX } },
+    { "(bad)", { XX } },
+    { "aesenc", { XM, EXx } },
+    { "(bad)", { XX } },
+  },
+
+  /* PREGRP101 */
+  {
+    { "(bad)", { XX } },
+    { "(bad)", { XX } },
+    { "aesenclast", { XM, EXx } },
+    { "(bad)", { XX } },
+  },
+
+  /* PREGRP102 */
+  {
+    { "(bad)", { XX } },
+    { "(bad)", { XX } },
+    { "aesdec", { XM, EXx } },
+    { "(bad)", { XX } },
+  },
+
+  /* PREGRP103 */
+  {
+    { "(bad)", { XX } },
+    { "(bad)", { XX } },
+    { "aesdeclast", { XM, EXx } },
+    { "(bad)", { XX } },
+  },
+
+  /* PREGRP104 */
+  {
+    { "(bad)", { XX } },
+    { "(bad)", { XX } },
+    { "aeskeygenassist", { XM, EXx, Ib } },
+    { "(bad)", { XX } },
+  },
+
  };
  
  static const struct dis386 x86_64_table[][2] = {
@@ -2981,11 +3045,11 @@ static const struct dis386 three_byte_table[][256] = {
      { "(bad)", { XX } },
      { "(bad)", { XX } },
      { "(bad)", { XX } },
-    { "(bad)", { XX } },
-    { "(bad)", { XX } },
-    { "(bad)", { XX } },
-    { "(bad)", { XX } },
-    { "(bad)", { XX } },
+    { PREGRP99 },
+    { PREGRP100 },
+    { PREGRP101 },
+    { PREGRP102 },
+    { PREGRP103 },
      /* e0 */
      { "(bad)", { XX } },
      { "(bad)", { XX } },
@@ -3102,7 +3166,7 @@ static const struct dis386 three_byte_table[][256] = {
      { PREGRP84 },
      { PREGRP85 },
      { "(bad)", { XX } },
-    { "(bad)", { XX } },
+    { PREGRP98 },
      { "(bad)", { XX } },
      { "(bad)", { XX } },
      { "(bad)", { XX } },
@@ -3276,7 +3340,7 @@ static const struct dis386 three_byte_table[][256] = {
      { "(bad)", { XX } },
      { "(bad)", { XX } },
      { "(bad)", { XX } },
-    { "(bad)", { XX } },
+    { PREGRP104 },
      /* e0 */
      { "(bad)", { XX } },
      { "(bad)", { XX } },
diff --git a/docs/bootindex.txt b/docs/bootindex.txt

index 803ebfc8314ecc587239b7e19b73d2123839a71c..f84fac7200340e3b078589360f0495e3665ba71c 100644 (file)
--- a/docs/bootindex.txt
+++ b/docs/bootindex.txt
@@ -9,7 +9,7 @@ still be bootable.
  
  == Example ==
  
-Lets assume we have QEMU machine with two NICs (virtio, e1000) and two
+Let's assume we have a QEMU machine with two NICs (virtio, e1000) and two
  disks (IDE, virtio):
  
  qemu -drive file=disk1.img,if=none,id=disk1
@@ -20,7 +20,7 @@ qemu -drive file=disk1.img,if=none,id=disk1
       -netdev type=user,id=net1 -device e1000,netdev=net1,bootindex=1
  
  Given the command above, firmware should try to boot from the e1000 NIC
-first.  If this fails, it should try the virtio NIC next, if this fails
+first.  If this fails, it should try the virtio NIC next; if this fails
  too, it should try the virtio disk, and then the IDE disk.
  
  == Limitations ==
@@ -28,7 +28,7 @@ too, it should try the virtio disk, and then the IDE disk.
  1. Some firmware has limitations on which devices can be considered for
  booting.  For instance, the PC BIOS boot specification allows only one
  disk to be bootable.  If boot from disk fails for some reason, the BIOS
-won't retry booting from other disk.  It still can try to boot from
+won't retry booting from other disk.  It can still try to boot from
  floppy or net, though.
  
  2. Sometimes, firmware cannot map the device path QEMU wants firmware to
@@ -36,8 +36,8 @@ boot from to a boot method.  It doesn't happen for devices the firmware
  can natively boot from, but if firmware relies on an option ROM for
  booting, and the same option ROM is used for booting from more then one
  device, the firmware may not be able to ask the option ROM to boot from
-a particular device reliably.  For instance with PC BIOS, if a SCSI HBA
+a particular device reliably.  For instance with the PC BIOS, if a SCSI HBA
  has three bootable devices target1, target3, target5 connected to it,
  the option ROM will have a boot method for each of them, but it is not
  possible to map from boot method back to a specific target.  This is a
-shortcoming of PC BIOS boot specification.
+shortcoming of the PC BIOS boot specification.
diff --git a/docs/ccid.txt b/docs/ccid.txt

index 450a66ad99757b7c5a6091aed75c775841503335..8bbaa940c315662284ffd90c6b94817e06ad3634 100644 (file)
--- a/docs/ccid.txt
+++ b/docs/ccid.txt
@@ -24,7 +24,7 @@ information see the specification:
   Revision 1.1
   April 22rd, 2005
  
-Smartcard are used for authentication, single sign on, decryption in
+Smartcards are used for authentication, single sign on, decryption in
  public/private schemes and digital signatures. A smartcard reader on the client
  cannot be used on a guest with simple usb passthrough since it will then not be
  available on the client, possibly locking the computer when it is "removed". On
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c

index 6efb2f063dbe5c6929bc0002f9762801ad015525..9fdf009da5bd01a7b6b5dd33a3561e14a9e96c6b 100644 (file)
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -494,10 +494,10 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
      blkcfg.heads = s->conf->heads;
      /*
       * We must ensure that the block device capacity is a multiple of
-     * the logical block size. If that is not the case, lets use
+     * the logical block size. If that is not the case, let's use
       * sector_mask to adopt the geometry to have a correct picture.
       * For those devices where the capacity is ok for the given geometry
-     * we dont touch the sector value of the geometry, since some devices
+     * we don't touch the sector value of the geometry, since some devices
       * (like s390 dasd) need a specific value. Here the capacity is already
       * cyls*heads*secs*blk_size and the sector value is not block size
       * divided by 512 - instead it is the amount of blk_size blocks
diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c

index 566b4bfa413a8b845eb9d9a6a7d55ed0fdf0509a..67dbe68cdca8bf8e667ba0e7a85692f6e53e79c5 100644 (file)
--- a/hw/intc/armv7m_nvic.c
+++ b/hw/intc/armv7m_nvic.c
@@ -172,7 +172,7 @@ static uint32_t nvic_readl(nvic_state *s, uint32_t offset)
          return 10000;
      case 0xd00: /* CPUID Base.  */
          return cpu_single_env->cp15.c0_cpuid;
-    case 0xd04: /* Interrypt Control State.  */
+    case 0xd04: /* Interrupt Control State.  */
          /* VECTACTIVE */
          val = s->gic.running_irq[0];
          if (val == 1023) {
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c

index 76b63e2ca6e842f9d2c9293ae4b734294b2b0146..a49e4401cd6466fa445a2a5e42655e243aa29ecd 100644 (file)
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -76,7 +76,7 @@ static void ccw_init(QEMUMachineInitArgs *args)
      }
      my_ram_size = my_ram_size >> (20 + shift) << (20 + shift);
  
-    /* lets propagate the changed ram size into the global variable. */
+    /* let's propagate the changed ram size into the global variable. */
      ram_size = my_ram_size;
  
      /* get a BUS */
diff --git a/hw/s390x/s390-virtio.c b/hw/s390x/s390-virtio.c

index f82c0e19640bd170091d43f40b98b6de9cc729a8..46aec999a63ac9f7b3c33fdf5cc417c4595cf092 100644 (file)
--- a/hw/s390x/s390-virtio.c
+++ b/hw/s390x/s390-virtio.c
@@ -240,7 +240,7 @@ static void s390_init(QEMUMachineInitArgs *args)
      }
      my_ram_size = my_ram_size >> (20 + shift) << (20 + shift);
  
-    /* lets propagate the changed ram size into the global variable. */
+    /* let's propagate the changed ram size into the global variable. */
      ram_size = my_ram_size;
  
      /* get a BUS */
diff --git a/hw/sd/sd.c b/hw/sd/sd.c

index 66c4014fbe0144bbf66dd7c234e209482cd5ffb5..2e0ef3e5aa5236e23d66450b12bf9578e8fc0677 100644 (file)
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -308,7 +308,7 @@ static void sd_set_csd(SDState *sd, uint64_t size)
          sd->csd[13] = 0x40;
          sd->csd[14] = 0x00;
          sd->csd[15] = 0x00;
-        sd->ocr |= 1 << 30;    /* High Capacity SD Memort Card */
+        sd->ocr |= 1 << 30;     /* High Capacity SD Memory Card */
      }
  }
  
@@ -1541,7 +1541,7 @@ void sd_write_data(SDState *sd, uint8_t value)
  
      case 25:   /* CMD25:  WRITE_MULTIPLE_BLOCK */
          if (sd->data_offset == 0) {
-            /* Start of the block - lets check the address is valid */
+            /* Start of the block - let's check the address is valid */
              if (sd->data_start + sd->blk_len > sd->size) {
                  sd->card_status |= ADDRESS_ERROR;
                  break;
diff --git a/hw/sh4/r2d.c b/hw/sh4/r2d.c

index 256a58c601340b0cf90a2063f163296d57a5b1bc..fe82bd62fec5fb02b466f1d42502154749cd09b5 100644 (file)
--- a/hw/sh4/r2d.c
+++ b/hw/sh4/r2d.c
@@ -258,7 +258,7 @@ static void r2d_init(QEMUMachineInitArgs *args)
      vmstate_register_ram_global(sdram);
      memory_region_add_subregion(address_space_mem, SDRAM_BASE, sdram);
      /* Register peripherals */
-    s = sh7750_init(env, address_space_mem);
+    s = sh7750_init(cpu, address_space_mem);
      irq = r2d_fpga_init(address_space_mem, 0x04000000, sh7750_irl(s));
  
      dev = qdev_create(NULL, "sh_pci");
diff --git a/hw/sh4/sh7750.c b/hw/sh4/sh7750.c

index 2218b9ce6ad992ef17bf5917de3fd3e37cb98faa..03e8bd1776579e676ec84f9668c600b3343c7016 100644 (file)
--- a/hw/sh4/sh7750.c
+++ b/hw/sh4/sh7750.c
@@ -44,7 +44,7 @@ typedef struct SH7750State {
      MemoryRegion iomem_ffc;
      MemoryRegion mmct_iomem;
      /* CPU */
-    CPUSH4State *cpu;
+    SuperHCPU *cpu;
      /* Peripheral frequency in Hz */
      uint32_t periph_freq;
      /* SDRAM controller */
@@ -79,7 +79,7 @@ typedef struct SH7750State {
  
  static inline int has_bcr3_and_bcr4(SH7750State * s)
  {
-       return (s->cpu->features & SH_FEATURE_BCR3_AND_BCR4);
+    return s->cpu->env.features & SH_FEATURE_BCR3_AND_BCR4;
  }
  /**********************************************************************
   I/O ports
@@ -271,21 +271,21 @@ static uint32_t sh7750_mem_readl(void *opaque, hwaddr addr)
          ignore_access("long read", addr);
          return 0;
      case SH7750_MMUCR_A7:
-       return s->cpu->mmucr;
+        return s->cpu->env.mmucr;
      case SH7750_PTEH_A7:
-       return s->cpu->pteh;
+        return s->cpu->env.pteh;
      case SH7750_PTEL_A7:
-       return s->cpu->ptel;
+        return s->cpu->env.ptel;
      case SH7750_TTB_A7:
-       return s->cpu->ttb;
+        return s->cpu->env.ttb;
      case SH7750_TEA_A7:
-       return s->cpu->tea;
+        return s->cpu->env.tea;
      case SH7750_TRA_A7:
-       return s->cpu->tra;
+        return s->cpu->env.tra;
      case SH7750_EXPEVT_A7:
-       return s->cpu->expevt;
+        return s->cpu->env.expevt;
      case SH7750_INTEVT_A7:
-       return s->cpu->intevt;
+        return s->cpu->env.intevt;
      case SH7750_CCR_A7:
         return s->ccr;
      case 0x1f000030:           /* Processor version */
@@ -409,37 +409,38 @@ static void sh7750_mem_writel(void *opaque, hwaddr addr,
         return;
      case SH7750_MMUCR_A7:
          if (mem_value & MMUCR_TI) {
-            cpu_sh4_invalidate_tlb(s->cpu);
+            cpu_sh4_invalidate_tlb(&s->cpu->env);
          }
-        s->cpu->mmucr = mem_value & ~MMUCR_TI;
+        s->cpu->env.mmucr = mem_value & ~MMUCR_TI;
          return;
      case SH7750_PTEH_A7:
          /* If asid changes, clear all registered tlb entries. */
-       if ((s->cpu->pteh & 0xff) != (mem_value & 0xff))
-           tlb_flush(s->cpu, 1);
-       s->cpu->pteh = mem_value;
-       return;
+        if ((s->cpu->env.pteh & 0xff) != (mem_value & 0xff)) {
+            tlb_flush(&s->cpu->env, 1);
+        }
+        s->cpu->env.pteh = mem_value;
+        return;
      case SH7750_PTEL_A7:
-       s->cpu->ptel = mem_value;
-       return;
+        s->cpu->env.ptel = mem_value;
+        return;
      case SH7750_PTEA_A7:
-       s->cpu->ptea = mem_value & 0x0000000f;
-       return;
+        s->cpu->env.ptea = mem_value & 0x0000000f;
+        return;
      case SH7750_TTB_A7:
-       s->cpu->ttb = mem_value;
-       return;
+        s->cpu->env.ttb = mem_value;
+        return;
      case SH7750_TEA_A7:
-       s->cpu->tea = mem_value;
-       return;
+        s->cpu->env.tea = mem_value;
+        return;
      case SH7750_TRA_A7:
-       s->cpu->tra = mem_value & 0x000007ff;
-       return;
+        s->cpu->env.tra = mem_value & 0x000007ff;
+        return;
      case SH7750_EXPEVT_A7:
-       s->cpu->expevt = mem_value & 0x000007ff;
-       return;
+        s->cpu->env.expevt = mem_value & 0x000007ff;
+        return;
      case SH7750_INTEVT_A7:
-       s->cpu->intevt = mem_value & 0x000007ff;
-       return;
+        s->cpu->env.intevt = mem_value & 0x000007ff;
+        return;
      case SH7750_CCR_A7:
         s->ccr = mem_value;
         return;
@@ -651,20 +652,20 @@ static uint64_t sh7750_mmct_read(void *opaque, hwaddr addr,
          /* do nothing */
         break;
      case MM_ITLB_ADDR:
-        ret = cpu_sh4_read_mmaped_itlb_addr(s->cpu, addr);
+        ret = cpu_sh4_read_mmaped_itlb_addr(&s->cpu->env, addr);
          break;
      case MM_ITLB_DATA:
-        ret = cpu_sh4_read_mmaped_itlb_data(s->cpu, addr);
+        ret = cpu_sh4_read_mmaped_itlb_data(&s->cpu->env, addr);
          break;
      case MM_OCACHE_ADDR:
      case MM_OCACHE_DATA:
          /* do nothing */
         break;
      case MM_UTLB_ADDR:
-        ret = cpu_sh4_read_mmaped_utlb_addr(s->cpu, addr);
+        ret = cpu_sh4_read_mmaped_utlb_addr(&s->cpu->env, addr);
          break;
      case MM_UTLB_DATA:
-        ret = cpu_sh4_read_mmaped_utlb_data(s->cpu, addr);
+        ret = cpu_sh4_read_mmaped_utlb_data(&s->cpu->env, addr);
          break;
      default:
          abort();
@@ -694,10 +695,10 @@ static void sh7750_mmct_write(void *opaque, hwaddr addr,
          /* do nothing */
         break;
      case MM_ITLB_ADDR:
-        cpu_sh4_write_mmaped_itlb_addr(s->cpu, addr, mem_value);
+        cpu_sh4_write_mmaped_itlb_addr(&s->cpu->env, addr, mem_value);
          break;
      case MM_ITLB_DATA:
-        cpu_sh4_write_mmaped_itlb_data(s->cpu, addr, mem_value);
+        cpu_sh4_write_mmaped_itlb_data(&s->cpu->env, addr, mem_value);
          abort();
         break;
      case MM_OCACHE_ADDR:
@@ -705,10 +706,10 @@ static void sh7750_mmct_write(void *opaque, hwaddr addr,
          /* do nothing */
         break;
      case MM_UTLB_ADDR:
-        cpu_sh4_write_mmaped_utlb_addr(s->cpu, addr, mem_value);
+        cpu_sh4_write_mmaped_utlb_addr(&s->cpu->env, addr, mem_value);
         break;
      case MM_UTLB_DATA:
-        cpu_sh4_write_mmaped_utlb_data(s->cpu, addr, mem_value);
+        cpu_sh4_write_mmaped_utlb_data(&s->cpu->env, addr, mem_value);
         break;
      default:
          abort();
@@ -722,7 +723,7 @@ static const MemoryRegionOps sh7750_mmct_ops = {
      .endianness = DEVICE_NATIVE_ENDIAN,
  };
  
-SH7750State *sh7750_init(CPUSH4State * cpu, MemoryRegion *sysmem)
+SH7750State *sh7750_init(SuperHCPU *cpu, MemoryRegion *sysmem)
  {
      SH7750State *s;
  
@@ -768,7 +769,7 @@ SH7750State *sh7750_init(CPUSH4State * cpu, MemoryRegion *sysmem)
                              _INTC_ARRAY(vectors),
                              _INTC_ARRAY(groups));
  
-    cpu->intc_handle = &s->intc;
+    cpu->env.intc_handle = &s->intc;
  
      sh_serial_init(sysmem, 0x1fe00000,
                     0, s->periph_freq, serial_hds[0],
@@ -794,19 +795,19 @@ SH7750State *sh7750_init(CPUSH4State * cpu, MemoryRegion *sysmem)
                 s->intc.irqs[TMU2_TUNI],
                 s->intc.irqs[TMU2_TICPI]);
  
-    if (cpu->id & (SH_CPU_SH7750 | SH_CPU_SH7750S | SH_CPU_SH7751)) {
+    if (cpu->env.id & (SH_CPU_SH7750 | SH_CPU_SH7750S | SH_CPU_SH7751)) {
          sh_intc_register_sources(&s->intc,
                                  _INTC_ARRAY(vectors_dma4),
                                  _INTC_ARRAY(groups_dma4));
      }
  
-    if (cpu->id & (SH_CPU_SH7750R | SH_CPU_SH7751R)) {
+    if (cpu->env.id & (SH_CPU_SH7750R | SH_CPU_SH7751R)) {
          sh_intc_register_sources(&s->intc,
                                  _INTC_ARRAY(vectors_dma8),
                                  _INTC_ARRAY(groups_dma8));
      }
  
-    if (cpu->id & (SH_CPU_SH7750R | SH_CPU_SH7751 | SH_CPU_SH7751R)) {
+    if (cpu->env.id & (SH_CPU_SH7750R | SH_CPU_SH7751 | SH_CPU_SH7751R)) {
          sh_intc_register_sources(&s->intc,
                                  _INTC_ARRAY(vectors_tmu34),
                                  NULL, 0);
@@ -816,13 +817,13 @@ SH7750State *sh7750_init(CPUSH4State * cpu, MemoryRegion *sysmem)
                     NULL, NULL);
      }
  
-    if (cpu->id & (SH_CPU_SH7751_ALL)) {
+    if (cpu->env.id & (SH_CPU_SH7751_ALL)) {
          sh_intc_register_sources(&s->intc,
                                  _INTC_ARRAY(vectors_pci),
                                  _INTC_ARRAY(groups_pci));
      }
  
-    if (cpu->id & (SH_CPU_SH7750S | SH_CPU_SH7750R | SH_CPU_SH7751_ALL)) {
+    if (cpu->env.id & (SH_CPU_SH7750S | SH_CPU_SH7750R | SH_CPU_SH7751_ALL)) {
          sh_intc_register_sources(&s->intc,
                                  _INTC_ARRAY(vectors_irlm),
                                  NULL, 0);
diff --git a/hw/sh4/shix.c b/hw/sh4/shix.c

index c23d4afb10f17e825ef6c5935e666c221d55ab28..ffac621a8c53517c9ac3eaa5e70b4edd68d2ce31 100644 (file)
--- a/hw/sh4/shix.c
+++ b/hw/sh4/shix.c
@@ -41,7 +41,7 @@ static void shix_init(QEMUMachineInitArgs *args)
  {
      const char *cpu_model = args->cpu_model;
      int ret;
-    CPUSH4State *env;
+    SuperHCPU *cpu;
      struct SH7750State *s;
      MemoryRegion *sysmem = get_system_memory();
      MemoryRegion *rom = g_new(MemoryRegion, 1);
@@ -51,7 +51,11 @@ static void shix_init(QEMUMachineInitArgs *args)
          cpu_model = "any";
  
      printf("Initializing CPU\n");
-    env = cpu_init(cpu_model);
+    cpu = cpu_sh4_init(cpu_model);
+    if (cpu == NULL) {
+        fprintf(stderr, "Unable to find CPU definition\n");
+        exit(1);
+    }
  
      /* Allocate memory space */
      printf("Allocating ROM\n");
@@ -81,7 +85,7 @@ static void shix_init(QEMUMachineInitArgs *args)
      }
  
      /* Register peripherals */
-    s = sh7750_init(env, sysmem);
+    s = sh7750_init(cpu, sysmem);
      /* XXXXX Check success */
      tc58128_init(s, "shix_linux_nand.bin", NULL);
      fprintf(stderr, "initialization terminated\n");
diff --git a/include/block/aes.h b/include/block/aes.h

deleted file mode 100644 (file)

index a0167eb..0000000
--- a/include/block/aes.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#ifndef QEMU_AES_H
-#define QEMU_AES_H
-
-#define AES_MAXNR 14
-#define AES_BLOCK_SIZE 16
-
-struct aes_key_st {
-    uint32_t rd_key[4 *(AES_MAXNR + 1)];
-    int rounds;
-};
-typedef struct aes_key_st AES_KEY;
-
-int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
-       AES_KEY *key);
-int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
-       AES_KEY *key);
-
-void AES_encrypt(const unsigned char *in, unsigned char *out,
-       const AES_KEY *key);
-void AES_decrypt(const unsigned char *in, unsigned char *out,
-       const AES_KEY *key);
-void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
-                    const unsigned long length, const AES_KEY *key,
-                    unsigned char *ivec, const int enc);
-
-#endif
diff --git a/include/hw/sh4/sh.h b/include/hw/sh4/sh.h

index 87c378f1eb252f8364e2654847c67fcb91f587f3..e61de9acc6507385a0a751b44dc5ccae69522aaa 100644 (file)
--- a/include/hw/sh4/sh.h
+++ b/include/hw/sh4/sh.h
@@ -11,7 +11,7 @@
  struct SH7750State;
  struct MemoryRegion;
  
-struct SH7750State *sh7750_init(CPUSH4State * cpu, struct MemoryRegion *sysmem);
+struct SH7750State *sh7750_init(SuperHCPU *cpu, struct MemoryRegion *sysmem);
  
  typedef struct {
      /* The callback will be triggered if any of the designated lines change */
diff --git a/include/qemu/aes.h b/include/qemu/aes.h

new file mode 100644 (file)

index 0000000..e79c707
--- /dev/null
+++ b/include/qemu/aes.h
@@ -0,0 +1,45 @@
+#ifndef QEMU_AES_H
+#define QEMU_AES_H
+
+#define AES_MAXNR 14
+#define AES_BLOCK_SIZE 16
+
+struct aes_key_st {
+    uint32_t rd_key[4 *(AES_MAXNR + 1)];
+    int rounds;
+};
+typedef struct aes_key_st AES_KEY;
+
+int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
+       AES_KEY *key);
+int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
+       AES_KEY *key);
+
+void AES_encrypt(const unsigned char *in, unsigned char *out,
+       const AES_KEY *key);
+void AES_decrypt(const unsigned char *in, unsigned char *out,
+       const AES_KEY *key);
+void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
+                    const unsigned long length, const AES_KEY *key,
+                    unsigned char *ivec, const int enc);
+
+/*
+AES_Te0[x] = S [x].[02, 01, 01, 03];
+AES_Te1[x] = S [x].[03, 02, 01, 01];
+AES_Te2[x] = S [x].[01, 03, 02, 01];
+AES_Te3[x] = S [x].[01, 01, 03, 02];
+AES_Te4[x] = S [x].[01, 01, 01, 01];
+
+AES_Td0[x] = Si[x].[0e, 09, 0d, 0b];
+AES_Td1[x] = Si[x].[0b, 0e, 09, 0d];
+AES_Td2[x] = Si[x].[0d, 0b, 0e, 09];
+AES_Td3[x] = Si[x].[09, 0d, 0b, 0e];
+AES_Td4[x] = Si[x].[01, 01, 01, 01];
+*/
+
+extern const uint32_t AES_Te0[256], AES_Te1[256], AES_Te2[256],
+                      AES_Te3[256], AES_Te4[256];
+extern const uint32_t AES_Td0[256], AES_Td1[256], AES_Td2[256],
+                      AES_Td3[256], AES_Td4[256];
+
+#endif
diff --git a/linux-user/syscall.c b/linux-user/syscall.c

index ee82a2da4ef73fb35e70e73bffbaeb3bef9f9d53..1f07621ffe2d00febfa016f3f9ab475a1f6c663a 100644 (file)
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -8823,8 +8823,17 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
  #endif
  #if defined(TARGET_NR_eventfd2)
      case TARGET_NR_eventfd2:
-        ret = get_errno(eventfd(arg1, arg2));
+    {
+        int host_flags = arg2 & (~(TARGET_O_NONBLOCK | TARGET_O_CLOEXEC));
+        if (arg2 & TARGET_O_NONBLOCK) {
+            host_flags |= O_NONBLOCK;
+        }
+        if (arg2 & TARGET_O_CLOEXEC) {
+            host_flags |= O_CLOEXEC;
+        }
+        ret = get_errno(eventfd(arg1, host_flags));
          break;
+    }
  #endif
  #endif /* CONFIG_EVENTFD  */
  #if defined(CONFIG_FALLOCATE) && defined(TARGET_NR_fallocate)
diff --git a/target-i386/cpu.c b/target-i386/cpu.c

index 356378c11000e912db82ac4ac88eef71c59b380b..6dd993f8473b2a34448670629f9a3b8d9ae9afef 100644 (file)
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -387,16 +387,15 @@ typedef struct x86_def_t {
            CPUID_PSE36 (needed for Solaris) */
            /* missing:
            CPUID_VME, CPUID_DTS, CPUID_SS, CPUID_HT, CPUID_TM, CPUID_PBE */
-#define TCG_EXT_FEATURES (CPUID_EXT_SSE3 | CPUID_EXT_MONITOR | \
-          CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | CPUID_EXT_SSE41 | \
-          CPUID_EXT_SSE42 | CPUID_EXT_POPCNT | CPUID_EXT_MOVBE | \
-          CPUID_EXT_HYPERVISOR)
+#define TCG_EXT_FEATURES (CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | \
+          CPUID_EXT_MONITOR | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | \
+          CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_POPCNT | \
+          CPUID_EXT_MOVBE | CPUID_EXT_AES | CPUID_EXT_HYPERVISOR)
            /* missing:
-          CPUID_EXT_PCLMULQDQ, CPUID_EXT_DTES64, CPUID_EXT_DSCPL,
-          CPUID_EXT_VMX, CPUID_EXT_SMX, CPUID_EXT_EST, CPUID_EXT_TM2,
-          CPUID_EXT_CID, CPUID_EXT_FMA, CPUID_EXT_XTPR, CPUID_EXT_PDCM,
-          CPUID_EXT_PCID, CPUID_EXT_DCA, CPUID_EXT_X2APIC,
-          CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_AES, CPUID_EXT_XSAVE,
+          CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_SMX,
+          CPUID_EXT_EST, CPUID_EXT_TM2, CPUID_EXT_CID, CPUID_EXT_FMA,
+          CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_PCID, CPUID_EXT_DCA,
+          CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_XSAVE,
            CPUID_EXT_OSXSAVE, CPUID_EXT_AVX, CPUID_EXT_F16C,
            CPUID_EXT_RDRAND */
  #define TCG_EXT2_FEATURES ((TCG_FEATURES & CPUID_EXT2_AMD_ALIASES) | \
@@ -688,7 +687,7 @@ static x86_def_t builtin_x86_defs[] = {
               CPUID_DE | CPUID_FP87,
          .ext_features = CPUID_EXT_AES | CPUID_EXT_POPCNT | CPUID_EXT_SSE42 |
               CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 |
-             CPUID_EXT_SSE3,
+             CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3,
          .ext2_features = CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX,
          .ext3_features = CPUID_EXT3_LAHF_LM,
          .xlevel = 0x8000000A,
diff --git a/target-i386/fpu_helper.c b/target-i386/fpu_helper.c

index 29a8fb68f8930cf4879e3e1ea8b4802fed519005..c0427fec7a080e163ee4c086e1e367cc545b9b1c 100644 (file)
--- a/target-i386/fpu_helper.c
+++ b/target-i386/fpu_helper.c
@@ -20,6 +20,7 @@
  #include <math.h>
  #include "cpu.h"
  #include "helper.h"
+#include "qemu/aes.h"
  #include "qemu/host-utils.h"
  
  #if !defined(CONFIG_USER_ONLY)
diff --git a/target-i386/ops_sse.h b/target-i386/ops_sse.h

index a11dba15669b4d64496f3b6998abed87b91c1c26..eb24b5f721102cf28918feb6f2e54b5cc6a3c7a2 100644 (file)
--- a/target-i386/ops_sse.h
+++ b/target-i386/ops_sse.h
@@ -2179,6 +2179,117 @@ target_ulong helper_popcnt(CPUX86State *env, target_ulong n, uint32_t type)
      return POPCOUNT(n, 5);
  #endif
  }
+
+void glue(helper_pclmulqdq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
+                                    uint32_t ctrl)
+{
+    uint64_t ah, al, b, resh, resl;
+
+    ah = 0;
+    al = d->Q((ctrl & 1) != 0);
+    b = s->Q((ctrl & 16) != 0);
+    resh = resl = 0;
+
+    while (b) {
+        if (b & 1) {
+            resl ^= al;
+            resh ^= ah;
+        }
+        ah = (ah << 1) | (al >> 63);
+        al <<= 1;
+        b >>= 1;
+    }
+
+    d->Q(0) = resl;
+    d->Q(1) = resh;
+}
+
+/* AES-NI op helpers */
+static const uint8_t aes_shifts[16] = {
+    0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11
+};
+
+static const uint8_t aes_ishifts[16] = {
+    0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3
+};
+
+void glue(helper_aesdec, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+    int i;
+    Reg st = *d;
+    Reg rk = *s;
+
+    for (i = 0 ; i < 4 ; i++) {
+        d->L(i) = rk.L(i) ^ bswap32(AES_Td0[st.B(aes_ishifts[4*i+0])] ^
+                                    AES_Td1[st.B(aes_ishifts[4*i+1])] ^
+                                    AES_Td2[st.B(aes_ishifts[4*i+2])] ^
+                                    AES_Td3[st.B(aes_ishifts[4*i+3])]);
+    }
+}
+
+void glue(helper_aesdeclast, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+    int i;
+    Reg st = *d;
+    Reg rk = *s;
+
+    for (i = 0; i < 16; i++) {
+        d->B(i) = rk.B(i) ^ (AES_Td4[st.B(aes_ishifts[i])] & 0xff);
+    }
+}
+
+void glue(helper_aesenc, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+    int i;
+    Reg st = *d;
+    Reg rk = *s;
+
+    for (i = 0 ; i < 4 ; i++) {
+        d->L(i) = rk.L(i) ^ bswap32(AES_Te0[st.B(aes_shifts[4*i+0])] ^
+                                    AES_Te1[st.B(aes_shifts[4*i+1])] ^
+                                    AES_Te2[st.B(aes_shifts[4*i+2])] ^
+                                    AES_Te3[st.B(aes_shifts[4*i+3])]);
+    }
+}
+
+void glue(helper_aesenclast, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+    int i;
+    Reg st = *d;
+    Reg rk = *s;
+
+    for (i = 0; i < 16; i++) {
+        d->B(i) = rk.B(i) ^ (AES_Te4[st.B(aes_shifts[i])] & 0xff);
+    }
+
+}
+
+void glue(helper_aesimc, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+    int i;
+    Reg tmp = *s;
+
+    for (i = 0 ; i < 4 ; i++) {
+        d->L(i) = bswap32(AES_Td0[AES_Te4[tmp.B(4*i+0)] & 0xff] ^
+                          AES_Td1[AES_Te4[tmp.B(4*i+1)] & 0xff] ^
+                          AES_Td2[AES_Te4[tmp.B(4*i+2)] & 0xff] ^
+                          AES_Td3[AES_Te4[tmp.B(4*i+3)] & 0xff]);
+    }
+}
+
+void glue(helper_aeskeygenassist, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
+                                          uint32_t ctrl)
+{
+    int i;
+    Reg tmp = *s;
+
+    for (i = 0 ; i < 4 ; i++) {
+        d->B(i) = AES_Te4[tmp.B(i + 4)] & 0xff;
+        d->B(i + 8) = AES_Te4[tmp.B(i + 12)] & 0xff;
+    }
+    d->L(1) = (d->L(0) << 24 | d->L(0) >> 8) ^ ctrl;
+    d->L(3) = (d->L(2) << 24 | d->L(2) >> 8) ^ ctrl;
+}
  #endif
  
  #undef SHIFT
diff --git a/target-i386/ops_sse_header.h b/target-i386/ops_sse_header.h

index 401eac6f00b4c0be461fbf07b4bda74f8e75d9fe..a68c7cc0c9e08e36baf26357502f5c68e635bfdb 100644 (file)
--- a/target-i386/ops_sse_header.h
+++ b/target-i386/ops_sse_header.h
@@ -336,6 +336,17 @@ DEF_HELPER_3(crc32, tl, i32, tl, i32)
  DEF_HELPER_3(popcnt, tl, env, tl, i32)
  #endif
  
+/* AES-NI op helpers */
+#if SHIFT == 1
+DEF_HELPER_3(glue(aesdec, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(aesdeclast, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(aesenc, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(aesenclast, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(aesimc, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_4(glue(aeskeygenassist, SUFFIX), void, env, Reg, Reg, i32)
+DEF_HELPER_4(glue(pclmulqdq, SUFFIX), void, env, Reg, Reg, i32)
+#endif
+
  #undef SHIFT
  #undef Reg
  #undef SUFFIX
diff --git a/target-i386/translate.c b/target-i386/translate.c

index 7596a90dc4d49b0c3e43524ece23b79fc1637b53..233f24ff41055ca83744543c7286441a8b56e21a 100644 (file)
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -3147,6 +3147,9 @@ struct SSEOpHelper_eppi {
  #define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
  #define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
  #define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
+#define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
+        CPUID_EXT_PCLMULQDQ }
+#define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
  
  static const struct SSEOpHelper_epp sse_op_table6[256] = {
      [0x00] = SSSE3_OP(pshufb),
@@ -3195,6 +3198,11 @@ static const struct SSEOpHelper_epp sse_op_table6[256] = {
      [0x3f] = SSE41_OP(pmaxud),
      [0x40] = SSE41_OP(pmulld),
      [0x41] = SSE41_OP(phminposuw),
+    [0xdb] = AESNI_OP(aesimc),
+    [0xdc] = AESNI_OP(aesenc),
+    [0xdd] = AESNI_OP(aesenclast),
+    [0xde] = AESNI_OP(aesdec),
+    [0xdf] = AESNI_OP(aesdeclast),
  };
  
  static const struct SSEOpHelper_eppi sse_op_table7[256] = {
@@ -3216,10 +3224,12 @@ static const struct SSEOpHelper_eppi sse_op_table7[256] = {
      [0x40] = SSE41_OP(dpps),
      [0x41] = SSE41_OP(dppd),
      [0x42] = SSE41_OP(mpsadbw),
+    [0x44] = PCLMULQDQ_OP(pclmulqdq),
      [0x60] = SSE42_OP(pcmpestrm),
      [0x61] = SSE42_OP(pcmpestri),
      [0x62] = SSE42_OP(pcmpistrm),
      [0x63] = SSE42_OP(pcmpistri),
+    [0xdf] = AESNI_OP(aeskeygenassist),
  };
  
  static void gen_sse(CPUX86State *env, DisasContext *s, int b,
diff --git a/target-microblaze/mmu.c b/target-microblaze/mmu.c

index 53ad263a051b9b678fe236a32e79422694ac80b9..73bf8059be36810e6eea3ecb4bbbdab68d9fdd3d 100644 (file)
--- a/target-microblaze/mmu.c
+++ b/target-microblaze/mmu.c
@@ -114,7 +114,7 @@ unsigned int mmu_translate(struct microblaze_mmu *mmu,
              tlb_ex = d & TLB_EX;
              tlb_wr = d & TLB_WR;
  
-            /* Now lets see if there is a zone that overrides the protbits.  */
+            /* Now let's see if there is a zone that overrides the protbits.  */
              tlb_zsel = (d >> 4) & 0xf;
              t0 = mmu->regs[MMU_R_ZPR] >> (30 - (tlb_zsel * 2));
              t0 &= 0x3;
diff --git a/target-moxie/cpu.c b/target-moxie/cpu.c

index c0855f0573c4c56de5ca76af03ea66e29a7e3008..f2b0791b91bca154c4977d0d6cf1c0f878c276d7 100644 (file)
--- a/target-moxie/cpu.c
+++ b/target-moxie/cpu.c
@@ -97,7 +97,7 @@ static void moxie_cpu_class_init(ObjectClass *oc, void *data)
  
      cc->class_by_name = moxie_cpu_class_by_name;
  
-    dc->vmsd = &vmstate_moxie_cpu;
+    cpu_class_set_vmsd(cc, &vmstate_moxie_cpu);
      cc->do_interrupt = moxie_cpu_do_interrupt;
  }
  
diff --git a/target-moxie/cpu.h b/target-moxie/cpu.h

index 988729a06fb8d1963bb03405d74f6b5b14712d02..a9d9ace3035e44a4de3ebb47e8be3a5947b5b36c 100644 (file)
--- a/target-moxie/cpu.h
+++ b/target-moxie/cpu.h
@@ -28,8 +28,6 @@
  
  #define TARGET_HAS_ICE 1
  
-#define CPU_SAVE_VERSION 1
-
  #define ELF_MACHINE     0xFEED /* EM_MOXIE */
  
  #define MOXIE_EX_DIV0        0
diff --git a/target-moxie/machine.c b/target-moxie/machine.c

index 5bfdb2886a29f49e1bfab11bab5500025624cab5..0f5992b6a0a3f9b26eb202a3f826b5a3ab543030 100644 (file)
--- a/target-moxie/machine.c
+++ b/target-moxie/machine.c
@@ -3,7 +3,7 @@
  
  const VMStateDescription vmstate_moxie_cpu = {
      .name = "cpu",
-    .version_id = CPU_SAVE_VERSION,
+    .version_id = 1,
      .minimum_version_id = 1,
      .minimum_version_id_old = 1,
      .fields      = (VMStateField[]) {
@@ -16,13 +16,3 @@ const VMStateDescription vmstate_moxie_cpu = {
          VMSTATE_END_OF_LIST()
      }
  };
-
-void cpu_save(QEMUFile *f, void *opaque)
-{
-    vmstate_save_state(f, &vmstate_moxie_cpu, opaque);
-}
-
-int cpu_load(QEMUFile *f, void *opaque, int version_id)
-{
-    return vmstate_load_state(f, &vmstate_moxie_cpu, opaque, version_id);
-}
diff --git a/target-s390x/mem_helper.c b/target-s390x/mem_helper.c

index 372334b3c8ffb9256b80b453add36f4b90f15d59..02bc432ce795d3f95fdd4957a0231fa4eea19f9b 100644 (file)
--- a/target-s390x/mem_helper.c
+++ b/target-s390x/mem_helper.c
@@ -344,7 +344,7 @@ uint64_t HELPER(srst)(CPUS390XState *env, uint64_t r0, uint64_t end,
      env->retxl = str;
  
      /* Lest we fail to service interrupts in a timely manner, limit the
-       amount of work we're willing to do.  For now, lets cap at 8k.  */
+       amount of work we're willing to do.  For now, let's cap at 8k.  */
      for (len = 0; len < 0x2000; ++len) {
          if (str + len == end) {
              /* Character not found.  R1 & R2 are unmodified.  */
@@ -375,7 +375,7 @@ uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
      s2 = fix_address(env, s2);
  
      /* Lest we fail to service interrupts in a timely manner, limit the
-       amount of work we're willing to do.  For now, lets cap at 8k.  */
+       amount of work we're willing to do.  For now, let's cap at 8k.  */
      for (len = 0; len < 0x2000; ++len) {
          uint8_t v1 = cpu_ldub_data(env, s1 + len);
          uint8_t v2 = cpu_ldub_data(env, s2 + len);
@@ -424,7 +424,7 @@ uint64_t HELPER(mvst)(CPUS390XState *env, uint64_t c, uint64_t d, uint64_t s)
      s = fix_address(env, s);
  
      /* Lest we fail to service interrupts in a timely manner, limit the
-       amount of work we're willing to do.  For now, lets cap at 8k.  */
+       amount of work we're willing to do.  For now, let's cap at 8k.  */
      for (len = 0; len < 0x2000; ++len) {
          uint8_t v = cpu_ldub_data(env, s + len);
          cpu_stb_data(env, d + len, v);
@@ -708,7 +708,7 @@ uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1,
      uint64_t cksm = (uint32_t)r1;
  
      /* Lest we fail to service interrupts in a timely manner, limit the
-       amount of work we're willing to do.  For now, lets cap at 8k.  */
+       amount of work we're willing to do.  For now, let's cap at 8k.  */
      max_len = (src_len > 0x2000 ? 0x2000 : src_len);
  
      /* Process full words as available.  */
diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c

index e12a15221a1a68f69a4891ea891a0afdd0886695..7d6f777081b0df7f268c5ba3164f816502d5df52 100644 (file)
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -24,16 +24,18 @@
   * THE SOFTWARE.
   */
  
+/* We only support generating code for 64-bit mode.  */
+#if TCG_TARGET_REG_BITS != 64
+#error "unsupported code generation mode"
+#endif
+
  /* ??? The translation blocks produced by TCG are generally small enough to
     be entirely reachable with a 16-bit displacement.  Leaving the option for
     a 32-bit displacement here Just In Case.  */
  #define USE_LONG_BRANCHES 0
  
  #define TCG_CT_CONST_32    0x0100
-#define TCG_CT_CONST_NEG   0x0200
-#define TCG_CT_CONST_ADDI  0x0400
  #define TCG_CT_CONST_MULI  0x0800
-#define TCG_CT_CONST_ANDI  0x1000
  #define TCG_CT_CONST_ORI   0x2000
  #define TCG_CT_CONST_XORI  0x4000
  #define TCG_CT_CONST_CMPI  0x8000
@@ -63,6 +65,7 @@
  typedef enum S390Opcode {
      RIL_AFI     = 0xc209,
      RIL_AGFI    = 0xc208,
+    RIL_ALFI    = 0xc20b,
      RIL_ALGFI   = 0xc20a,
      RIL_BRASL   = 0xc005,
      RIL_BRCL    = 0xc004,
@@ -84,6 +87,8 @@ typedef enum S390Opcode {
      RIL_NILF    = 0xc00b,
      RIL_OIHF    = 0xc00c,
      RIL_OILF    = 0xc00d,
+    RIL_SLFI    = 0xc205,
+    RIL_SLGFI   = 0xc204,
      RIL_XIHF    = 0xc006,
      RIL_XILF    = 0xc007,
  
@@ -118,8 +123,12 @@ typedef enum S390Opcode {
      RIE_CLGIJ   = 0xec7d,
      RIE_CLRJ    = 0xec77,
      RIE_CRJ     = 0xec76,
+    RIE_RISBG   = 0xec55,
  
      RRE_AGR     = 0xb908,
+    RRE_ALGR    = 0xb90a,
+    RRE_ALCR    = 0xb998,
+    RRE_ALCGR   = 0xb988,
      RRE_CGR     = 0xb920,
      RRE_CLGR    = 0xb921,
      RRE_DLGR    = 0xb987,
@@ -137,14 +146,22 @@ typedef enum S390Opcode {
      RRE_LRVR    = 0xb91f,
      RRE_LRVGR   = 0xb90f,
      RRE_LTGR    = 0xb902,
+    RRE_MLGR    = 0xb986,
      RRE_MSGR    = 0xb90c,
      RRE_MSR     = 0xb252,
      RRE_NGR     = 0xb980,
      RRE_OGR     = 0xb981,
      RRE_SGR     = 0xb909,
+    RRE_SLGR    = 0xb90b,
+    RRE_SLBR    = 0xb999,
+    RRE_SLBGR   = 0xb989,
      RRE_XGR     = 0xb982,
  
+    RRF_LOCR    = 0xb9f2,
+    RRF_LOCGR   = 0xb9e2,
+
      RR_AR       = 0x1a,
+    RR_ALR      = 0x1e,
      RR_BASR     = 0x0d,
      RR_BCR      = 0x07,
      RR_CLR      = 0x15,
@@ -156,6 +173,7 @@ typedef enum S390Opcode {
      RR_NR       = 0x14,
      RR_OR       = 0x16,
      RR_SR       = 0x1b,
+    RR_SLR      = 0x1f,
      RR_XR       = 0x17,
  
      RSY_RLL     = 0xeb1d,
@@ -172,6 +190,7 @@ typedef enum S390Opcode {
      RXY_AY      = 0xe35a,
      RXY_CG      = 0xe320,
      RXY_CY      = 0xe359,
+    RXY_LAY     = 0xe371,
      RXY_LB      = 0xe376,
      RXY_LG      = 0xe304,
      RXY_LGB     = 0xe377,
@@ -198,6 +217,7 @@ typedef enum S390Opcode {
      RX_A        = 0x5a,
      RX_C        = 0x59,
      RX_L        = 0x58,
+    RX_LA       = 0x41,
      RX_LH       = 0x48,
      RX_ST       = 0x50,
      RX_STC      = 0x42,
@@ -252,9 +272,6 @@ static const int tcg_target_call_iarg_regs[] = {
  
  static const int tcg_target_call_oarg_regs[] = {
      TCG_REG_R2,
-#if TCG_TARGET_REG_BITS == 32
-    TCG_REG_R3
-#endif
  };
  
  #define S390_CC_EQ      8
@@ -329,6 +346,7 @@ static uint8_t *tb_ret_addr;
  #define FACILITY_LONG_DISP     (1ULL << (63 - 18))
  #define FACILITY_EXT_IMM       (1ULL << (63 - 21))
  #define FACILITY_GEN_INST_EXT  (1ULL << (63 - 34))
+#define FACILITY_LOAD_ON_COND   (1ULL << (63 - 45))
  
  static uint64_t facilities;
  
@@ -376,6 +394,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
          tcg_regset_set32(ct->u.regs, 0, 0xffff);
          tcg_regset_reset_reg (ct->u.regs, TCG_REG_R2);
          tcg_regset_reset_reg (ct->u.regs, TCG_REG_R3);
+        tcg_regset_reset_reg (ct->u.regs, TCG_REG_R4);
          break;
      case 'a':                  /* force R2 for division */
          ct->ct |= TCG_CT_REG;
@@ -387,21 +406,12 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
          tcg_regset_clear(ct->u.regs);
          tcg_regset_set_reg(ct->u.regs, TCG_REG_R3);
          break;
-    case 'N':                  /* force immediate negate */
-        ct->ct |= TCG_CT_CONST_NEG;
-        break;
      case 'W':                  /* force 32-bit ("word") immediate */
          ct->ct |= TCG_CT_CONST_32;
          break;
-    case 'I':
-        ct->ct |= TCG_CT_CONST_ADDI;
-        break;
      case 'K':
          ct->ct |= TCG_CT_CONST_MULI;
          break;
-    case 'A':
-        ct->ct |= TCG_CT_CONST_ANDI;
-        break;
      case 'O':
          ct->ct |= TCG_CT_CONST_ORI;
          break;
@@ -420,63 +430,6 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
      return 0;
  }
  
-/* Immediates to be used with logical AND.  This is an optimization only,
-   since a full 64-bit immediate AND can always be performed with 4 sequential
-   NI[LH][LH] instructions.  What we're looking for is immediates that we
-   can load efficiently, and the immediate load plus the reg-reg AND is
-   smaller than the sequential NI's.  */
-
-static int tcg_match_andi(int ct, tcg_target_ulong val)
-{
-    int i;
-
-    if (facilities & FACILITY_EXT_IMM) {
-        if (ct & TCG_CT_CONST_32) {
-            /* All 32-bit ANDs can be performed with 1 48-bit insn.  */
-            return 1;
-        }
-
-        /* Zero-extensions.  */
-        if (val == 0xff || val == 0xffff || val == 0xffffffff) {
-            return 1;
-        }
-    } else {
-        if (ct & TCG_CT_CONST_32) {
-            val = (uint32_t)val;
-        } else if (val == 0xffffffff) {
-            return 1;
-        }
-    }
-
-    /* Try all 32-bit insns that can perform it in one go.  */
-    for (i = 0; i < 4; i++) {
-        tcg_target_ulong mask = ~(0xffffull << i*16);
-        if ((val & mask) == mask) {
-            return 1;
-        }
-    }
-
-    /* Look for 16-bit values performing the mask.  These are better
-       to load with LLI[LH][LH].  */
-    for (i = 0; i < 4; i++) {
-        tcg_target_ulong mask = 0xffffull << i*16;
-        if ((val & mask) == val) {
-            return 0;
-        }
-    }
-
-    /* Look for 32-bit values performing the 64-bit mask.  These
-       are better to load with LLI[LH]F, or if extended immediates
-       not available, with a pair of LLI insns.  */
-    if ((ct & TCG_CT_CONST_32) == 0) {
-        if (val <= 0xffffffff || (val & 0xffffffff) == 0) {
-            return 0;
-        }
-    }
-
-    return 1;
-}
-
  /* Immediates to be used with logical OR.  This is an optimization only,
     since a full 64-bit immediate OR can always be performed with 4 sequential
     OI[LH][LH] instructions.  What we're looking for is immediates that we
@@ -571,25 +524,12 @@ static int tcg_target_const_match(tcg_target_long val,
      }
  
      /* Handle the modifiers.  */
-    if (ct & TCG_CT_CONST_NEG) {
-        val = -val;
-    }
      if (ct & TCG_CT_CONST_32) {
          val = (int32_t)val;
      }
  
      /* The following are mutually exclusive.  */
-    if (ct & TCG_CT_CONST_ADDI) {
-        /* Immediates that may be used with add.  If we have the
-           extended-immediates facility then we have ADD IMMEDIATE
-           with signed and unsigned 32-bit, otherwise we have only
-           ADD HALFWORD IMMEDIATE with a signed 16-bit.  */
-        if (facilities & FACILITY_EXT_IMM) {
-            return val == (int32_t)val || val == (uint32_t)val;
-        } else {
-            return val == (int16_t)val;
-        }
-    } else if (ct & TCG_CT_CONST_MULI) {
+    if (ct & TCG_CT_CONST_MULI) {
          /* Immediates that may be used with multiply.  If we have the
             general-instruction-extensions, then we have MULTIPLY SINGLE
             IMMEDIATE with a signed 32-bit, otherwise we have only
@@ -599,8 +539,6 @@ static int tcg_target_const_match(tcg_target_long val,
          } else {
              return val == (int16_t)val;
          }
-    } else if (ct & TCG_CT_CONST_ANDI) {
-        return tcg_match_andi(ct, val);
      } else if (ct & TCG_CT_CONST_ORI) {
          return tcg_match_ori(ct, val);
      } else if (ct & TCG_CT_CONST_XORI) {
@@ -625,6 +563,12 @@ static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op,
      tcg_out32(s, (op << 16) | (r1 << 4) | r2);
  }
  
+static void tcg_out_insn_RRF(TCGContext *s, S390Opcode op,
+                             TCGReg r1, TCGReg r2, int m3)
+{
+    tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2);
+}
+
  static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
  {
      tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff));
@@ -770,7 +714,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type,
      /* If we get here, both the high and low parts have non-zero bits.  */
  
      /* Recurse to load the lower 32-bits.  */
-    tcg_out_movi(s, TCG_TYPE_I32, ret, sval);
+    tcg_out_movi(s, TCG_TYPE_I64, ret, uval & 0xffffffff);
  
      /* Insert data into the high 32-bits.  */
      uval = uval >> 31 >> 1;
@@ -804,10 +748,11 @@ static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy,
                          tcg_target_long ofs)
  {
      if (ofs < -0x80000 || ofs >= 0x80000) {
-        /* Combine the low 16 bits of the offset with the actual load insn;
-           the high 48 bits must come from an immediate load.  */
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs & ~0xffff);
-        ofs &= 0xffff;
+        /* Combine the low 20 bits of the offset with the actual load insn;
+           the high 44 bits must come from an immediate load.  */
+        tcg_target_long low = ((ofs & 0xfffff) ^ 0x80000) - 0x80000;
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - low);
+        ofs = low;
  
          /* If we were already given an index register, add it in.  */
          if (index != TCG_REG_NONE) {
@@ -866,6 +811,15 @@ static void tcg_out_ld_abs(TCGContext *s, TCGType type, TCGReg dest, void *abs)
      tcg_out_ld(s, type, dest, dest, addr & 0xffff);
  }
  
+static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
+                                 int msb, int lsb, int ofs, int z)
+{
+    /* Format RIE-f */
+    tcg_out16(s, (RIE_RISBG & 0xff00) | (dest << 4) | src);
+    tcg_out16(s, (msb << 8) | (z << 7) | lsb);
+    tcg_out16(s, (ofs << 8) | (RIE_RISBG & 0xff));
+}
+
  static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
  {
      if (facilities & FACILITY_EXT_IMM) {
@@ -956,30 +910,37 @@ static inline void tgen_ext32u(TCGContext *s, TCGReg dest, TCGReg src)
      tcg_out_insn(s, RRE, LLGFR, dest, src);
  }
  
-static inline void tgen32_addi(TCGContext *s, TCGReg dest, int32_t val)
+/* Accept bit patterns like these:
+    0....01....1
+    1....10....0
+    1..10..01..1
+    0..01..10..0
+   Copied from gcc sources.  */
+static inline bool risbg_mask(uint64_t c)
  {
-    if (val == (int16_t)val) {
-        tcg_out_insn(s, RI, AHI, dest, val);
-    } else {
-        tcg_out_insn(s, RIL, AFI, dest, val);
-    }
-}
-
-static inline void tgen64_addi(TCGContext *s, TCGReg dest, int64_t val)
-{
-    if (val == (int16_t)val) {
-        tcg_out_insn(s, RI, AGHI, dest, val);
-    } else if (val == (int32_t)val) {
-        tcg_out_insn(s, RIL, AGFI, dest, val);
-    } else if (val == (uint32_t)val) {
-        tcg_out_insn(s, RIL, ALGFI, dest, val);
-    } else {
-        tcg_abort();
-    }
-
+    uint64_t lsb;
+    /* We don't change the number of transitions by inverting,
+       so make sure we start with the LSB zero.  */
+    if (c & 1) {
+        c = ~c;
+    }
+    /* Reject all zeros or all ones.  */
+    if (c == 0) {
+        return false;
+    }
+    /* Find the first transition.  */
+    lsb = c & -c;
+    /* Invert to look for a second transition.  */
+    c = ~c;
+    /* Erase the first transition.  */
+    c &= -lsb;
+    /* Find the second transition, if any.  */
+    lsb = c & -c;
+    /* Match if all the bits are 1's, or if c is zero.  */
+    return c == -lsb;
  }
  
-static void tgen64_andi(TCGContext *s, TCGReg dest, tcg_target_ulong val)
+static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
  {
      static const S390Opcode ni_insns[4] = {
          RI_NILL, RI_NILH, RI_NIHL, RI_NIHH
@@ -987,63 +948,64 @@ static void tgen64_andi(TCGContext *s, TCGReg dest, tcg_target_ulong val)
      static const S390Opcode nif_insns[2] = {
          RIL_NILF, RIL_NIHF
      };
-
+    uint64_t valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull);
      int i;
  
-    /* Look for no-op.  */
-    if (val == -1) {
-        return;
-    }
-
      /* Look for the zero-extensions.  */
-    if (val == 0xffffffff) {
+    if ((val & valid) == 0xffffffff) {
          tgen_ext32u(s, dest, dest);
          return;
      }
-
      if (facilities & FACILITY_EXT_IMM) {
-        if (val == 0xff) {
+        if ((val & valid) == 0xff) {
              tgen_ext8u(s, TCG_TYPE_I64, dest, dest);
              return;
          }
-        if (val == 0xffff) {
+        if ((val & valid) == 0xffff) {
              tgen_ext16u(s, TCG_TYPE_I64, dest, dest);
              return;
          }
+    }
  
-        /* Try all 32-bit insns that can perform it in one go.  */
-        for (i = 0; i < 4; i++) {
-            tcg_target_ulong mask = ~(0xffffull << i*16);
-            if ((val & mask) == mask) {
-                tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
-                return;
-            }
+    /* Try all 32-bit insns that can perform it in one go.  */
+    for (i = 0; i < 4; i++) {
+        tcg_target_ulong mask = ~(0xffffull << i*16);
+        if (((val | ~valid) & mask) == mask) {
+            tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
+            return;
          }
+    }
  
-        /* Try all 48-bit insns that can perform it in one go.  */
-        if (facilities & FACILITY_EXT_IMM) {
-            for (i = 0; i < 2; i++) {
-                tcg_target_ulong mask = ~(0xffffffffull << i*32);
-                if ((val & mask) == mask) {
-                    tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
-                    return;
-                }
+    /* Try all 48-bit insns that can perform it in one go.  */
+    if (facilities & FACILITY_EXT_IMM) {
+        for (i = 0; i < 2; i++) {
+            tcg_target_ulong mask = ~(0xffffffffull << i*32);
+            if (((val | ~valid) & mask) == mask) {
+                tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
+                return;
              }
          }
+    }
+    if ((facilities & FACILITY_GEN_INST_EXT) && risbg_mask(val)) {
+        int msb, lsb;
+        if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
+            /* Achieve wraparound by swapping msb and lsb.  */
+            msb = 63 - ctz64(~val);
+            lsb = clz64(~val) + 1;
+        } else {
+            msb = clz64(val);
+            lsb = 63 - ctz64(val);
+        }
+        tcg_out_risbg(s, dest, dest, msb, lsb, 0, 1);
+        return;
+    }
  
-        /* Perform the AND via sequential modifications to the high and low
-           parts.  Do this via recursion to handle 16-bit vs 32-bit masks in
-           each half.  */
-        tgen64_andi(s, dest, val | 0xffffffff00000000ull);
-        tgen64_andi(s, dest, val | 0x00000000ffffffffull);
+    /* Fall back to loading the constant.  */
+    tcg_out_movi(s, type, TCG_TMP0, val);
+    if (type == TCG_TYPE_I32) {
+        tcg_out_insn(s, RR, NR, dest, TCG_TMP0);
      } else {
-        /* With no extended-immediate facility, just emit the sequence.  */
-        for (i = 0; i < 4; i++) {
-            tcg_target_ulong mask = 0xffffull << i*16;
-            if ((val & mask) != mask) {
-                tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
-            }
-        }
+        tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0);
      }
  }
  
@@ -1156,9 +1118,9 @@ static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
  }
  
  static void tgen_setcond(TCGContext *s, TCGType type, TCGCond c,
-                         TCGReg dest, TCGReg r1, TCGArg c2, int c2const)
+                         TCGReg dest, TCGReg c1, TCGArg c2, int c2const)
  {
-    int cc = tgen_cmp(s, type, c, r1, c2, c2const);
+    int cc = tgen_cmp(s, type, c, c1, c2, c2const);
  
      /* Emit: r1 = 1; if (cc) goto over; r1 = 0; over:  */
      tcg_out_movi(s, type, dest, 1);
@@ -1166,6 +1128,36 @@ static void tgen_setcond(TCGContext *s, TCGType type, TCGCond c,
      tcg_out_movi(s, type, dest, 0);
  }
  
+static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
+                         TCGReg c1, TCGArg c2, int c2const, TCGReg r3)
+{
+    int cc;
+    if (facilities & FACILITY_LOAD_ON_COND) {
+        cc = tgen_cmp(s, type, c, c1, c2, c2const);
+        tcg_out_insn(s, RRF, LOCGR, dest, r3, cc);
+    } else {
+        c = tcg_invert_cond(c);
+        cc = tgen_cmp(s, type, c, c1, c2, c2const);
+
+        /* Emit: if (cc) goto over; dest = r3; over:  */
+        tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
+        tcg_out_insn(s, RRE, LGR, dest, r3);
+    }
+}
+
+bool tcg_target_deposit_valid(int ofs, int len)
+{
+    return (facilities & FACILITY_GEN_INST_EXT) != 0;
+}
+
+static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
+                         int ofs, int len)
+{
+    int lsb = (63 - ofs);
+    int msb = lsb - (len - 1);
+    tcg_out_risbg(s, dest, src, msb, lsb, ofs, 0);
+}
+
  static void tgen_gotoi(TCGContext *s, int cc, tcg_target_long dest)
  {
      tcg_target_long off = (dest - (tcg_target_long)s->code_ptr) >> 1;
@@ -1239,7 +1231,7 @@ static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
      int cc;
  
      if (facilities & FACILITY_GEN_INST_EXT) {
-        bool is_unsigned = (c > TCG_COND_GT);
+        bool is_unsigned = is_unsigned_cond(c);
          bool in_range;
          S390Opcode opc;
  
@@ -1405,37 +1397,29 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int opc, TCGReg data,
  }
  
  #if defined(CONFIG_SOFTMMU)
-static void tgen64_andi_tmp(TCGContext *s, TCGReg dest, tcg_target_ulong val)
+static TCGReg tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
+                                    TCGReg addr_reg, int mem_index, int opc,
+                                    uint16_t **label2_ptr_p, int is_store)
  {
-    if (tcg_match_andi(0, val)) {
-        tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, val);
-        tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0);
-    } else {
-        tgen64_andi(s, dest, val);
-    }
-}
-
-static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
-                                  TCGReg addr_reg, int mem_index, int opc,
-                                  uint16_t **label2_ptr_p, int is_store)
-{
-    const TCGReg arg0 = TCG_REG_R2;
-    const TCGReg arg1 = TCG_REG_R3;
+    const TCGReg arg0 = tcg_target_call_iarg_regs[0];
+    const TCGReg arg1 = tcg_target_call_iarg_regs[1];
+    const TCGReg arg2 = tcg_target_call_iarg_regs[2];
+    const TCGReg arg3 = tcg_target_call_iarg_regs[3];
      int s_bits = opc & 3;
      uint16_t *label1_ptr;
      tcg_target_long ofs;
  
      if (TARGET_LONG_BITS == 32) {
-        tgen_ext32u(s, arg0, addr_reg);
+        tgen_ext32u(s, arg1, addr_reg);
      } else {
-        tcg_out_mov(s, TCG_TYPE_I64, arg0, addr_reg);
+        tcg_out_mov(s, TCG_TYPE_I64, arg1, addr_reg);
      }
  
-    tcg_out_sh64(s, RSY_SRLG, arg1, addr_reg, TCG_REG_NONE,
+    tcg_out_sh64(s, RSY_SRLG, arg2, addr_reg, TCG_REG_NONE,
                   TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
  
-    tgen64_andi_tmp(s, arg0, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
-    tgen64_andi_tmp(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
+    tgen_andi(s, TCG_TYPE_I64, arg1, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
+    tgen_andi(s, TCG_TYPE_I64, arg2, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
  
      if (is_store) {
          ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
@@ -1445,15 +1429,15 @@ static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
      assert(ofs < 0x80000);
  
      if (TARGET_LONG_BITS == 32) {
-        tcg_out_mem(s, RX_C, RXY_CY, arg0, arg1, TCG_AREG0, ofs);
+        tcg_out_mem(s, RX_C, RXY_CY, arg1, arg2, TCG_AREG0, ofs);
      } else {
-        tcg_out_mem(s, 0, RXY_CG, arg0, arg1, TCG_AREG0, ofs);
+        tcg_out_mem(s, 0, RXY_CG, arg1, arg2, TCG_AREG0, ofs);
      }
  
      if (TARGET_LONG_BITS == 32) {
-        tgen_ext32u(s, arg0, addr_reg);
+        tgen_ext32u(s, arg1, addr_reg);
      } else {
-        tcg_out_mov(s, TCG_TYPE_I64, arg0, addr_reg);
+        tcg_out_mov(s, TCG_TYPE_I64, arg1, addr_reg);
      }
  
      label1_ptr = (uint16_t*)s->code_ptr;
@@ -1467,56 +1451,42 @@ static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
             for the calling convention.  */
          switch (opc) {
          case LD_UINT8:
-            tgen_ext8u(s, TCG_TYPE_I64, arg1, data_reg);
+            tgen_ext8u(s, TCG_TYPE_I64, arg2, data_reg);
              break;
          case LD_UINT16:
-            tgen_ext16u(s, TCG_TYPE_I64, arg1, data_reg);
+            tgen_ext16u(s, TCG_TYPE_I64, arg2, data_reg);
              break;
          case LD_UINT32:
-            tgen_ext32u(s, arg1, data_reg);
+            tgen_ext32u(s, arg2, data_reg);
              break;
          case LD_UINT64:
-            tcg_out_mov(s, TCG_TYPE_I64, arg1, data_reg);
+            tcg_out_mov(s, TCG_TYPE_I64, arg2, data_reg);
              break;
          default:
              tcg_abort();
          }
-        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, mem_index);
-        /* XXX/FIXME: suboptimal */
-        tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
-                    tcg_target_call_iarg_regs[2]);
-        tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
-                    tcg_target_call_iarg_regs[1]);
-        tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
-                    tcg_target_call_iarg_regs[0]);
-        tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
-                    TCG_AREG0);
+        tcg_out_movi(s, TCG_TYPE_I32, arg3, mem_index);
+        tcg_out_mov(s, TCG_TYPE_I64, arg0, TCG_AREG0);
          tgen_calli(s, (tcg_target_ulong)qemu_st_helpers[s_bits]);
      } else {
-        tcg_out_movi(s, TCG_TYPE_I32, arg1, mem_index);
-        /* XXX/FIXME: suboptimal */
-        tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
-                    tcg_target_call_iarg_regs[1]);
-        tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
-                    tcg_target_call_iarg_regs[0]);
-        tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
-                    TCG_AREG0);
+        tcg_out_movi(s, TCG_TYPE_I32, arg2, mem_index);
+        tcg_out_mov(s, TCG_TYPE_I64, arg0, TCG_AREG0);
          tgen_calli(s, (tcg_target_ulong)qemu_ld_helpers[s_bits]);
  
          /* sign extension */
          switch (opc) {
          case LD_INT8:
-            tgen_ext8s(s, TCG_TYPE_I64, data_reg, arg0);
+            tgen_ext8s(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
              break;
          case LD_INT16:
-            tgen_ext16s(s, TCG_TYPE_I64, data_reg, arg0);
+            tgen_ext16s(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
              break;
          case LD_INT32:
-            tgen_ext32s(s, data_reg, arg0);
+            tgen_ext32s(s, data_reg, TCG_REG_R2);
              break;
          default:
              /* unsigned -> just copy */
-            tcg_out_mov(s, TCG_TYPE_I64, data_reg, arg0);
+            tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
              break;
          }
      }
@@ -1533,7 +1503,9 @@ static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
      ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
      assert(ofs < 0x80000);
  
-    tcg_out_mem(s, 0, RXY_AG, arg0, arg1, TCG_AREG0, ofs);
+    tcg_out_mem(s, 0, RXY_AG, arg1, arg2, TCG_AREG0, ofs);
+
+    return arg1;
  }
  
  static void tcg_finish_qemu_ldst(TCGContext* s, uint16_t *label2_ptr)
@@ -1579,10 +1551,10 @@ static void tcg_out_qemu_ld(TCGContext* s, const TCGArg* args, int opc)
  #if defined(CONFIG_SOFTMMU)
      mem_index = *args;
  
-    tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index,
-                          opc, &label2_ptr, 0);
+    addr_reg = tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index,
+                                     opc, &label2_ptr, 0);
  
-    tcg_out_qemu_ld_direct(s, opc, data_reg, TCG_REG_R2, TCG_REG_NONE, 0);
+    tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, TCG_REG_NONE, 0);
  
      tcg_finish_qemu_ldst(s, label2_ptr);
  #else
@@ -1608,10 +1580,10 @@ static void tcg_out_qemu_st(TCGContext* s, const TCGArg* args, int opc)
  #if defined(CONFIG_SOFTMMU)
      mem_index = *args;
  
-    tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index,
-                          opc, &label2_ptr, 1);
+    addr_reg = tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index,
+                                     opc, &label2_ptr, 1);
  
-    tcg_out_qemu_st_direct(s, opc, data_reg, TCG_REG_R2, TCG_REG_NONE, 0);
+    tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, TCG_REG_NONE, 0);
  
      tcg_finish_qemu_ldst(s, label2_ptr);
  #else
@@ -1620,19 +1592,15 @@ static void tcg_out_qemu_st(TCGContext* s, const TCGArg* args, int opc)
  #endif
  }
  
-#if TCG_TARGET_REG_BITS == 64
  # define OP_32_64(x) \
          case glue(glue(INDEX_op_,x),_i32): \
          case glue(glue(INDEX_op_,x),_i64)
-#else
-# define OP_32_64(x) \
-        case glue(glue(INDEX_op_,x),_i32)
-#endif
  
  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
                  const TCGArg *args, const int *const_args)
  {
      S390Opcode op;
+    TCGArg a0, a1, a2;
  
      switch (opc) {
      case INDEX_op_exit_tb:
@@ -1708,23 +1676,38 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
          break;
  
      case INDEX_op_add_i32:
+        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
          if (const_args[2]) {
-            tgen32_addi(s, args[0], args[2]);
+        do_addi_32:
+            if (a0 == a1) {
+                if (a2 == (int16_t)a2) {
+                    tcg_out_insn(s, RI, AHI, a0, a2);
+                    break;
+                }
+                if (facilities & FACILITY_EXT_IMM) {
+                    tcg_out_insn(s, RIL, AFI, a0, a2);
+                    break;
+                }
+            }
+            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
+        } else if (a0 == a1) {
+            tcg_out_insn(s, RR, AR, a0, a2);
          } else {
-            tcg_out_insn(s, RR, AR, args[0], args[2]);
+            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
          }
          break;
      case INDEX_op_sub_i32:
+        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
          if (const_args[2]) {
-            tgen32_addi(s, args[0], -args[2]);
-        } else {
-            tcg_out_insn(s, RR, SR, args[0], args[2]);
+            a2 = -a2;
+            goto do_addi_32;
          }
+        tcg_out_insn(s, RR, SR, args[0], args[2]);
          break;
  
      case INDEX_op_and_i32:
          if (const_args[2]) {
-            tgen64_andi(s, args[0], args[2] | 0xffffffff00000000ull);
+            tgen_andi(s, TCG_TYPE_I32, args[0], args[2]);
          } else {
              tcg_out_insn(s, RR, NR, args[0], args[2]);
          }
@@ -1824,6 +1807,17 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
          tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
          break;
  
+    case INDEX_op_add2_i32:
+        /* ??? Make use of ALFI.  */
+        tcg_out_insn(s, RR, ALR, args[0], args[4]);
+        tcg_out_insn(s, RRE, ALCR, args[1], args[5]);
+        break;
+    case INDEX_op_sub2_i32:
+        /* ??? Make use of SLFI.  */
+        tcg_out_insn(s, RR, SLR, args[0], args[4]);
+        tcg_out_insn(s, RRE, SLBR, args[1], args[5]);
+        break;
+
      case INDEX_op_br:
          tgen_branch(s, S390_CC_ALWAYS, args[0]);
          break;
@@ -1836,6 +1830,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
          tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1],
                       args[2], const_args[2]);
          break;
+    case INDEX_op_movcond_i32:
+        tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1],
+                     args[2], const_args[2], args[3]);
+        break;
  
      case INDEX_op_qemu_ld8u:
          tcg_out_qemu_ld(s, args, LD_UINT8);
@@ -1870,7 +1868,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
          tcg_out_qemu_st(s, args, LD_UINT64);
          break;
  
-#if TCG_TARGET_REG_BITS == 64
      case INDEX_op_mov_i64:
          tcg_out_mov(s, TCG_TYPE_I64, args[0], args[1]);
          break;
@@ -1899,15 +1896,39 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
          break;
  
      case INDEX_op_add_i64:
+        a0 = args[0], a1 = args[1], a2 = args[2];
          if (const_args[2]) {
-            tgen64_addi(s, args[0], args[2]);
+        do_addi_64:
+            if (a0 == a1) {
+                if (a2 == (int16_t)a2) {
+                    tcg_out_insn(s, RI, AGHI, a0, a2);
+                    break;
+                }
+                if (facilities & FACILITY_EXT_IMM) {
+                    if (a2 == (int32_t)a2) {
+                        tcg_out_insn(s, RIL, AGFI, a0, a2);
+                        break;
+                    } else if (a2 == (uint32_t)a2) {
+                        tcg_out_insn(s, RIL, ALGFI, a0, a2);
+                        break;
+                    } else if (-a2 == (uint32_t)-a2) {
+                        tcg_out_insn(s, RIL, SLGFI, a0, -a2);
+                        break;
+                    }
+                }
+            }
+            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
+        } else if (a0 == a1) {
+            tcg_out_insn(s, RRE, AGR, a0, a2);
          } else {
-            tcg_out_insn(s, RRE, AGR, args[0], args[2]);
+            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
          }
          break;
      case INDEX_op_sub_i64:
+        a0 = args[0], a1 = args[1], a2 = args[2];
          if (const_args[2]) {
-            tgen64_addi(s, args[0], -args[2]);
+            a2 = -a2;
+            goto do_addi_64;
          } else {
              tcg_out_insn(s, RRE, SGR, args[0], args[2]);
          }
@@ -1915,7 +1936,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
  
      case INDEX_op_and_i64:
          if (const_args[2]) {
-            tgen64_andi(s, args[0], args[2]);
+            tgen_andi(s, TCG_TYPE_I64, args[0], args[2]);
          } else {
              tcg_out_insn(s, RRE, NGR, args[0], args[2]);
          }
@@ -1964,6 +1985,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
      case INDEX_op_divu2_i64:
          tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, args[4]);
          break;
+    case INDEX_op_mulu2_i64:
+        tcg_out_insn(s, RRE, MLGR, TCG_REG_R2, args[3]);
+        break;
  
      case INDEX_op_shl_i64:
          op = RSY_SLLG;
@@ -2020,6 +2044,17 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
          tgen_ext32u(s, args[0], args[1]);
          break;
  
+    case INDEX_op_add2_i64:
+        /* ??? Make use of ALGFI and SLGFI.  */
+        tcg_out_insn(s, RRE, ALGR, args[0], args[4]);
+        tcg_out_insn(s, RRE, ALCGR, args[1], args[5]);
+        break;
+    case INDEX_op_sub2_i64:
+        /* ??? Make use of ALGFI and SLGFI.  */
+        tcg_out_insn(s, RRE, SLGR, args[0], args[4]);
+        tcg_out_insn(s, RRE, SLBGR, args[1], args[5]);
+        break;
+
      case INDEX_op_brcond_i64:
          tgen_brcond(s, TCG_TYPE_I64, args[2], args[0],
                      args[1], const_args[1], args[3]);
@@ -2028,6 +2063,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
          tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1],
                       args[2], const_args[2]);
          break;
+    case INDEX_op_movcond_i64:
+        tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1],
+                     args[2], const_args[2], args[3]);
+        break;
  
      case INDEX_op_qemu_ld32u:
          tcg_out_qemu_ld(s, args, LD_UINT32);
@@ -2035,7 +2074,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
      case INDEX_op_qemu_ld32s:
          tcg_out_qemu_ld(s, args, LD_INT32);
          break;
-#endif /* TCG_TARGET_REG_BITS == 64 */
+
+    OP_32_64(deposit):
+        tgen_deposit(s, args[0], args[2], args[3], args[4]);
+        break;
  
      default:
          fprintf(stderr,"unimplemented opc 0x%x\n",opc);
@@ -2061,14 +2103,14 @@ static const TCGTargetOpDef s390_op_defs[] = {
      { INDEX_op_st16_i32, { "r", "r" } },
      { INDEX_op_st_i32, { "r", "r" } },
  
-    { INDEX_op_add_i32, { "r", "0", "rWI" } },
-    { INDEX_op_sub_i32, { "r", "0", "rWNI" } },
+    { INDEX_op_add_i32, { "r", "r", "ri" } },
+    { INDEX_op_sub_i32, { "r", "0", "ri" } },
      { INDEX_op_mul_i32, { "r", "0", "rK" } },
  
      { INDEX_op_div2_i32, { "b", "a", "0", "1", "r" } },
      { INDEX_op_divu2_i32, { "b", "a", "0", "1", "r" } },
  
-    { INDEX_op_and_i32, { "r", "0", "rWA" } },
+    { INDEX_op_and_i32, { "r", "0", "ri" } },
      { INDEX_op_or_i32, { "r", "0", "rWO" } },
      { INDEX_op_xor_i32, { "r", "0", "rWX" } },
  
@@ -2089,8 +2131,13 @@ static const TCGTargetOpDef s390_op_defs[] = {
      { INDEX_op_bswap16_i32, { "r", "r" } },
      { INDEX_op_bswap32_i32, { "r", "r" } },
  
+    { INDEX_op_add2_i32, { "r", "r", "0", "1", "r", "r" } },
+    { INDEX_op_sub2_i32, { "r", "r", "0", "1", "r", "r" } },
+
      { INDEX_op_brcond_i32, { "r", "rWC" } },
      { INDEX_op_setcond_i32, { "r", "r", "rWC" } },
+    { INDEX_op_movcond_i32, { "r", "r", "rWC", "r", "0" } },
+    { INDEX_op_deposit_i32, { "r", "0", "r" } },
  
      { INDEX_op_qemu_ld8u, { "r", "L" } },
      { INDEX_op_qemu_ld8s, { "r", "L" } },
@@ -2104,7 +2151,6 @@ static const TCGTargetOpDef s390_op_defs[] = {
      { INDEX_op_qemu_st32, { "L", "L" } },
      { INDEX_op_qemu_st64, { "L", "L" } },
  
-#if defined(__s390x__)
      { INDEX_op_mov_i64, { "r", "r" } },
      { INDEX_op_movi_i64, { "r" } },
  
@@ -2121,14 +2167,15 @@ static const TCGTargetOpDef s390_op_defs[] = {
      { INDEX_op_st32_i64, { "r", "r" } },
      { INDEX_op_st_i64, { "r", "r" } },
  
-    { INDEX_op_add_i64, { "r", "0", "rI" } },
-    { INDEX_op_sub_i64, { "r", "0", "rNI" } },
+    { INDEX_op_add_i64, { "r", "r", "ri" } },
+    { INDEX_op_sub_i64, { "r", "0", "ri" } },
      { INDEX_op_mul_i64, { "r", "0", "rK" } },
  
      { INDEX_op_div2_i64, { "b", "a", "0", "1", "r" } },
      { INDEX_op_divu2_i64, { "b", "a", "0", "1", "r" } },
+    { INDEX_op_mulu2_i64, { "b", "a", "0", "r" } },
  
-    { INDEX_op_and_i64, { "r", "0", "rA" } },
+    { INDEX_op_and_i64, { "r", "0", "ri" } },
      { INDEX_op_or_i64, { "r", "0", "rO" } },
      { INDEX_op_xor_i64, { "r", "0", "rX" } },
  
@@ -2152,12 +2199,16 @@ static const TCGTargetOpDef s390_op_defs[] = {
      { INDEX_op_bswap32_i64, { "r", "r" } },
      { INDEX_op_bswap64_i64, { "r", "r" } },
  
+    { INDEX_op_add2_i64, { "r", "r", "0", "1", "r", "r" } },
+    { INDEX_op_sub2_i64, { "r", "r", "0", "1", "r", "r" } },
+
      { INDEX_op_brcond_i64, { "r", "rC" } },
      { INDEX_op_setcond_i64, { "r", "r", "rC" } },
+    { INDEX_op_movcond_i64, { "r", "r", "rC", "r", "0" } },
+    { INDEX_op_deposit_i64, { "r", "0", "r" } },
  
      { INDEX_op_qemu_ld32u, { "r", "L" } },
      { INDEX_op_qemu_ld32s, { "r", "L" } },
-#endif
  
      { -1 },
  };
@@ -2302,17 +2353,24 @@ static void tcg_target_init(TCGContext *s)
      tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
  
      tcg_add_target_add_op_defs(s390_op_defs);
-    tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf),
-                  CPU_TEMP_BUF_NLONGS * sizeof(long));
  }
  
  static void tcg_target_qemu_prologue(TCGContext *s)
  {
+    tcg_target_long frame_size;
+
      /* stmg %r6,%r15,48(%r15) (save registers) */
      tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48);
  
-    /* aghi %r15,-160 (stack frame) */
-    tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -160);
+    /* aghi %r15,-frame_size */
+    frame_size = TCG_TARGET_CALL_STACK_OFFSET;
+    frame_size += TCG_STATIC_CALL_ARGS_SIZE;
+    frame_size += CPU_TEMP_BUF_NLONGS * sizeof(long);
+    tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -frame_size);
+
+    tcg_set_frame(s, TCG_REG_CALL_STACK,
+                  TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET,
+                  CPU_TEMP_BUF_NLONGS * sizeof(long));
  
      if (GUEST_BASE >= 0x80000) {
          tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE);
@@ -2325,8 +2383,9 @@ static void tcg_target_qemu_prologue(TCGContext *s)
  
      tb_ret_addr = s->code_ptr;
  
-    /* lmg %r6,%r15,208(%r15) (restore registers) */
-    tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 208);
+    /* lmg %r6,%r15,fs+48(%r15) (restore registers) */
+    tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15,
+                 frame_size + 48);
  
      /* br %r14 (return) */
      tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14);
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h

index 40211e68f10b9bc0d3bba425a02ae5ab648f439f..42ca36c0e958e6c10a55fd4e230d0eaaae4923da 100644 (file)
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -63,14 +63,13 @@ typedef enum TCGReg {
  #define TCG_TARGET_HAS_eqv_i32          0
  #define TCG_TARGET_HAS_nand_i32         0
  #define TCG_TARGET_HAS_nor_i32          0
-#define TCG_TARGET_HAS_deposit_i32      0
-#define TCG_TARGET_HAS_movcond_i32      0
-#define TCG_TARGET_HAS_add2_i32         0
-#define TCG_TARGET_HAS_sub2_i32         0
+#define TCG_TARGET_HAS_deposit_i32      1
+#define TCG_TARGET_HAS_movcond_i32      1
+#define TCG_TARGET_HAS_add2_i32         1
+#define TCG_TARGET_HAS_sub2_i32         1
  #define TCG_TARGET_HAS_mulu2_i32        0
  #define TCG_TARGET_HAS_muls2_i32        0
  
-#if TCG_TARGET_REG_BITS == 64
  #define TCG_TARGET_HAS_div2_i64         1
  #define TCG_TARGET_HAS_rot_i64          1
  #define TCG_TARGET_HAS_ext8s_i64        1
@@ -89,18 +88,21 @@ typedef enum TCGReg {
  #define TCG_TARGET_HAS_eqv_i64          0
  #define TCG_TARGET_HAS_nand_i64         0
  #define TCG_TARGET_HAS_nor_i64          0
-#define TCG_TARGET_HAS_deposit_i64      0
-#define TCG_TARGET_HAS_movcond_i64      0
-#define TCG_TARGET_HAS_add2_i64         0
-#define TCG_TARGET_HAS_sub2_i64         0
-#define TCG_TARGET_HAS_mulu2_i64        0
+#define TCG_TARGET_HAS_deposit_i64      1
+#define TCG_TARGET_HAS_movcond_i64      1
+#define TCG_TARGET_HAS_add2_i64         1
+#define TCG_TARGET_HAS_sub2_i64         1
+#define TCG_TARGET_HAS_mulu2_i64        1
  #define TCG_TARGET_HAS_muls2_i64        0
-#endif
+
+extern bool tcg_target_deposit_valid(int ofs, int len);
+#define TCG_TARGET_deposit_i32_valid  tcg_target_deposit_valid
+#define TCG_TARGET_deposit_i64_valid  tcg_target_deposit_valid
  
  /* used for function call generation */
  #define TCG_REG_CALL_STACK             TCG_REG_R15
  #define TCG_TARGET_STACK_ALIGN         8
-#define TCG_TARGET_CALL_STACK_OFFSET   0
+#define TCG_TARGET_CALL_STACK_OFFSET   160
  
  #define TCG_TARGET_EXTEND_ARGS 1
  
diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c

index 2d561b32e16024f9182967c0d0ab5f0b5ac8018c..d1241b569213aad0cd8bee6f3be2ea4b64e60c3e 100644 (file)
--- a/tcg/tci/tcg-target.c
+++ b/tcg/tci/tcg-target.c
@@ -40,14 +40,6 @@
  /* Bitfield n...m (in 32 bit value). */
  #define BITS(n, m) (((0xffffffffU << (31 - n)) >> (31 - n + m)) << m)
  
-/* Used for function call generation. */
-#define TCG_REG_CALL_STACK              TCG_REG_R4
-#define TCG_TARGET_STACK_ALIGN          16
-#define TCG_TARGET_CALL_STACK_OFFSET    0
-
-/* TODO: documentation. */
-static uint8_t *tb_ret_addr;
-
  /* Macros used in tcg_target_op_defs. */
  #define R       "r"
  #define RI      "ri"
@@ -513,7 +505,7 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
          tcg_out_op_t(s, INDEX_op_ld_i64);
          tcg_out_r(s, ret);
          tcg_out_r(s, arg1);
-        assert(arg2 == (uint32_t)arg2);
+        assert(arg2 == (int32_t)arg2);
          tcg_out32(s, arg2);
  #else
          TODO();
@@ -636,7 +628,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
      case INDEX_op_st_i64:
          tcg_out_r(s, args[0]);
          tcg_out_r(s, args[1]);
-        assert(args[2] == (uint32_t)args[2]);
+        assert(args[2] == (int32_t)args[2]);
          tcg_out32(s, args[2]);
          break;
      case INDEX_op_add_i32:
@@ -904,15 +896,19 @@ static void tcg_target_init(TCGContext *s)
      /* TODO: Which registers should be set here? */
      tcg_regset_set32(tcg_target_call_clobber_regs, 0,
                       BIT(TCG_TARGET_NB_REGS) - 1);
+
      tcg_regset_clear(s->reserved_regs);
      tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
      tcg_add_target_add_op_defs(tcg_target_op_defs);
-    tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf),
+
+    /* We use negative offsets from "sp" so that we can distinguish
+       stores that might pretend to be call arguments.  */
+    tcg_set_frame(s, TCG_REG_CALL_STACK,
+                  -CPU_TEMP_BUF_NLONGS * sizeof(long),
                    CPU_TEMP_BUF_NLONGS * sizeof(long));
  }
  
  /* Generate global QEMU prologue and epilogue code. */
-static void tcg_target_qemu_prologue(TCGContext *s)
+static inline void tcg_target_qemu_prologue(TCGContext *s)
  {
-    tb_ret_addr = s->code_ptr;
  }
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h

index 1f17576f549c1990d3b1b41a958e34c37382a3a4..0395bbb8e42e26b363fb04e3d1baf5e52b934731 100644 (file)
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -127,7 +127,6 @@ typedef enum {
      TCG_REG_R5,
      TCG_REG_R6,
      TCG_REG_R7,
-    TCG_AREG0 = TCG_REG_R7,
  #if TCG_TARGET_NB_REGS >= 16
      TCG_REG_R8,
      TCG_REG_R9,
@@ -160,6 +159,13 @@ typedef enum {
      TCG_CONST = UINT8_MAX
  } TCGReg;
  
+#define TCG_AREG0                       (TCG_TARGET_NB_REGS - 2)
+
+/* Used for function call generation. */
+#define TCG_REG_CALL_STACK              (TCG_TARGET_NB_REGS - 1)
+#define TCG_TARGET_CALL_STACK_OFFSET    0
+#define TCG_TARGET_STACK_ALIGN          16
+
  void tci_disas(uint8_t opc);
  
  tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr);
diff --git a/tci.c b/tci.c

index 2b2c11f25960a60e9b55dceb27396377d7bac293..c742c8df5cbbef96f77d6c7b9ccbd2ccf99a912a 100644 (file)
--- a/tci.c
+++ b/tci.c
@@ -51,11 +51,6 @@ typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong,
                                      tcg_target_ulong);
  #endif
  
-/* TCI can optionally use a global register variable for env. */
-#if !defined(AREG0)
-CPUArchState *env;
-#endif
-
  /* Targets which don't use GETPC also don't need tci_tb_ptr
     which makes them a little faster. */
  #if defined(GETPC)
@@ -117,6 +112,7 @@ static void tci_write_reg(TCGReg index, tcg_target_ulong value)
  {
      assert(index < ARRAY_SIZE(tci_reg));
      assert(index != TCG_AREG0);
+    assert(index != TCG_REG_CALL_STACK);
      tci_reg[index] = value;
  }
  
@@ -182,7 +178,7 @@ static tcg_target_ulong tci_read_i(uint8_t **tb_ptr)
      return value;
  }
  
-/* Read constant (32 bit) from bytecode. */
+/* Read unsigned constant (32 bit) from bytecode. */
  static uint32_t tci_read_i32(uint8_t **tb_ptr)
  {
      uint32_t value = *(uint32_t *)(*tb_ptr);
@@ -190,6 +186,14 @@ static uint32_t tci_read_i32(uint8_t **tb_ptr)
      return value;
  }
  
+/* Read signed constant (32 bit) from bytecode. */
+static int32_t tci_read_s32(uint8_t **tb_ptr)
+{
+    int32_t value = *(int32_t *)(*tb_ptr);
+    *tb_ptr += sizeof(value);
+    return value;
+}
+
  #if TCG_TARGET_REG_BITS == 64
  /* Read constant (64 bit) from bytecode. */
  static uint64_t tci_read_i64(uint8_t **tb_ptr)
@@ -430,18 +434,17 @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition)
  }
  
  /* Interpret pseudo code in tb. */
-tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *cpustate, uint8_t *tb_ptr)
+tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
  {
+    long tcg_temps[CPU_TEMP_BUF_NLONGS];
+    uintptr_t sp_value = (uintptr_t)(tcg_temps + CPU_TEMP_BUF_NLONGS);
      tcg_target_ulong next_tb = 0;
  
-    env = cpustate;
      tci_reg[TCG_AREG0] = (tcg_target_ulong)env;
+    tci_reg[TCG_REG_CALL_STACK] = sp_value;
      assert(tb_ptr);
  
      for (;;) {
-#if defined(GETPC)
-        tci_tb_ptr = (uintptr_t)tb_ptr;
-#endif
          TCGOpcode opc = tb_ptr[0];
  #if !defined(NDEBUG)
          uint8_t op_size = tb_ptr[1];
@@ -464,6 +467,10 @@ tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *cpustate, uint8_t *tb_ptr)
          uint64_t v64;
  #endif
  
+#if defined(GETPC)
+        tci_tb_ptr = (uintptr_t)tb_ptr;
+#endif
+
          /* Skip opcode and size entry. */
          tb_ptr += 2;
  
@@ -550,7 +557,7 @@ tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *cpustate, uint8_t *tb_ptr)
          case INDEX_op_ld8u_i32:
              t0 = *tb_ptr++;
              t1 = tci_read_r(&tb_ptr);
-            t2 = tci_read_i32(&tb_ptr);
+            t2 = tci_read_s32(&tb_ptr);
              tci_write_reg8(t0, *(uint8_t *)(t1 + t2));
              break;
          case INDEX_op_ld8s_i32:
@@ -563,25 +570,26 @@ tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *cpustate, uint8_t *tb_ptr)
          case INDEX_op_ld_i32:
              t0 = *tb_ptr++;
              t1 = tci_read_r(&tb_ptr);
-            t2 = tci_read_i32(&tb_ptr);
+            t2 = tci_read_s32(&tb_ptr);
              tci_write_reg32(t0, *(uint32_t *)(t1 + t2));
              break;
          case INDEX_op_st8_i32:
              t0 = tci_read_r8(&tb_ptr);
              t1 = tci_read_r(&tb_ptr);
-            t2 = tci_read_i32(&tb_ptr);
+            t2 = tci_read_s32(&tb_ptr);
              *(uint8_t *)(t1 + t2) = t0;
              break;
          case INDEX_op_st16_i32:
              t0 = tci_read_r16(&tb_ptr);
              t1 = tci_read_r(&tb_ptr);
-            t2 = tci_read_i32(&tb_ptr);
+            t2 = tci_read_s32(&tb_ptr);
              *(uint16_t *)(t1 + t2) = t0;
              break;
          case INDEX_op_st_i32:
              t0 = tci_read_r32(&tb_ptr);
              t1 = tci_read_r(&tb_ptr);
-            t2 = tci_read_i32(&tb_ptr);
+            t2 = tci_read_s32(&tb_ptr);
+            assert(t1 != sp_value || (int32_t)t2 < 0);
              *(uint32_t *)(t1 + t2) = t0;
              break;
  
@@ -818,7 +826,7 @@ tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *cpustate, uint8_t *tb_ptr)
          case INDEX_op_ld8u_i64:
              t0 = *tb_ptr++;
              t1 = tci_read_r(&tb_ptr);
-            t2 = tci_read_i32(&tb_ptr);
+            t2 = tci_read_s32(&tb_ptr);
              tci_write_reg8(t0, *(uint8_t *)(t1 + t2));
              break;
          case INDEX_op_ld8s_i64:
@@ -829,43 +837,44 @@ tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *cpustate, uint8_t *tb_ptr)
          case INDEX_op_ld32u_i64:
              t0 = *tb_ptr++;
              t1 = tci_read_r(&tb_ptr);
-            t2 = tci_read_i32(&tb_ptr);
+            t2 = tci_read_s32(&tb_ptr);
              tci_write_reg32(t0, *(uint32_t *)(t1 + t2));
              break;
          case INDEX_op_ld32s_i64:
              t0 = *tb_ptr++;
              t1 = tci_read_r(&tb_ptr);
-            t2 = tci_read_i32(&tb_ptr);
+            t2 = tci_read_s32(&tb_ptr);
              tci_write_reg32s(t0, *(int32_t *)(t1 + t2));
              break;
          case INDEX_op_ld_i64:
              t0 = *tb_ptr++;
              t1 = tci_read_r(&tb_ptr);
-            t2 = tci_read_i32(&tb_ptr);
+            t2 = tci_read_s32(&tb_ptr);
              tci_write_reg64(t0, *(uint64_t *)(t1 + t2));
              break;
          case INDEX_op_st8_i64:
              t0 = tci_read_r8(&tb_ptr);
              t1 = tci_read_r(&tb_ptr);
-            t2 = tci_read_i32(&tb_ptr);
+            t2 = tci_read_s32(&tb_ptr);
              *(uint8_t *)(t1 + t2) = t0;
              break;
          case INDEX_op_st16_i64:
              t0 = tci_read_r16(&tb_ptr);
              t1 = tci_read_r(&tb_ptr);
-            t2 = tci_read_i32(&tb_ptr);
+            t2 = tci_read_s32(&tb_ptr);
              *(uint16_t *)(t1 + t2) = t0;
              break;
          case INDEX_op_st32_i64:
              t0 = tci_read_r32(&tb_ptr);
              t1 = tci_read_r(&tb_ptr);
-            t2 = tci_read_i32(&tb_ptr);
+            t2 = tci_read_s32(&tb_ptr);
              *(uint32_t *)(t1 + t2) = t0;
              break;
          case INDEX_op_st_i64:
              t0 = tci_read_r64(&tb_ptr);
              t1 = tci_read_r(&tb_ptr);
-            t2 = tci_read_i32(&tb_ptr);
+            t2 = tci_read_s32(&tb_ptr);
+            assert(t1 != sp_value || (int32_t)t2 < 0);
              *(uint64_t *)(t1 + t2) = t0;
              break;
  
diff --git a/util/aes.c b/util/aes.c

index 1da7bff1c96a4ddbf40bf4c5c06b8d92a891e627..91e97fa6e7f414733742296873616f30fd04a630 100644 (file)
--- a/util/aes.c
+++ b/util/aes.c
@@ -28,7 +28,7 @@
   * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   */
  #include "qemu-common.h"
-#include "block/aes.h"
+#include "qemu/aes.h"
  
  #ifndef NDEBUG
  #define NDEBUG
@@ -44,20 +44,20 @@ typedef uint8_t u8;
  # define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >>  8); (ct)[3] = (u8)(st); }
  
  /*
-Te0[x] = S [x].[02, 01, 01, 03];
-Te1[x] = S [x].[03, 02, 01, 01];
-Te2[x] = S [x].[01, 03, 02, 01];
-Te3[x] = S [x].[01, 01, 03, 02];
-Te4[x] = S [x].[01, 01, 01, 01];
+AES_Te0[x] = S [x].[02, 01, 01, 03];
+AES_Te1[x] = S [x].[03, 02, 01, 01];
+AES_Te2[x] = S [x].[01, 03, 02, 01];
+AES_Te3[x] = S [x].[01, 01, 03, 02];
+AES_Te4[x] = S [x].[01, 01, 01, 01];
  
-Td0[x] = Si[x].[0e, 09, 0d, 0b];
-Td1[x] = Si[x].[0b, 0e, 09, 0d];
-Td2[x] = Si[x].[0d, 0b, 0e, 09];
-Td3[x] = Si[x].[09, 0d, 0b, 0e];
-Td4[x] = Si[x].[01, 01, 01, 01];
+AES_Td0[x] = Si[x].[0e, 09, 0d, 0b];
+AES_Td1[x] = Si[x].[0b, 0e, 09, 0d];
+AES_Td2[x] = Si[x].[0d, 0b, 0e, 09];
+AES_Td3[x] = Si[x].[09, 0d, 0b, 0e];
+AES_Td4[x] = Si[x].[01, 01, 01, 01];
  */
  
-static const u32 Te0[256] = {
+const uint32_t AES_Te0[256] = {
      0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
      0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
      0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
@@ -123,7 +123,7 @@ static const u32 Te0[256] = {
      0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
      0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
  };
-static const u32 Te1[256] = {
+const uint32_t AES_Te1[256] = {
      0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
      0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
      0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
@@ -189,7 +189,7 @@ static const u32 Te1[256] = {
      0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
      0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
  };
-static const u32 Te2[256] = {
+const uint32_t AES_Te2[256] = {
      0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
      0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
      0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
@@ -255,7 +255,7 @@ static const u32 Te2[256] = {
      0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
      0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
  };
-static const u32 Te3[256] = {
+const uint32_t AES_Te3[256] = {
  
      0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
      0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
@@ -322,7 +322,7 @@ static const u32 Te3[256] = {
      0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
      0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
  };
-static const u32 Te4[256] = {
+const uint32_t AES_Te4[256] = {
      0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU,
      0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U,
      0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU,
@@ -388,7 +388,7 @@ static const u32 Te4[256] = {
      0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU,
      0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U,
  };
-static const u32 Td0[256] = {
+const uint32_t AES_Td0[256] = {
      0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
      0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
      0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
@@ -454,7 +454,7 @@ static const u32 Td0[256] = {
      0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
      0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
  };
-static const u32 Td1[256] = {
+const uint32_t AES_Td1[256] = {
      0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
      0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
      0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
@@ -520,7 +520,7 @@ static const u32 Td1[256] = {
      0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
      0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
  };
-static const u32 Td2[256] = {
+const uint32_t AES_Td2[256] = {
      0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
      0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
      0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
@@ -587,7 +587,7 @@ static const u32 Td2[256] = {
      0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
      0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
  };
-static const u32 Td3[256] = {
+const uint32_t AES_Td3[256] = {
      0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
      0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
      0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
@@ -653,7 +653,7 @@ static const u32 Td3[256] = {
      0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
      0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
  };
-static const u32 Td4[256] = {
+const uint32_t AES_Td4[256] = {
      0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U,
      0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U,
      0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU,
@@ -757,10 +757,10 @@ int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
                 while (1) {
                         temp  = rk[3];
                         rk[4] = rk[0] ^
-                               (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
-                               (Te4[(temp >>  8) & 0xff] & 0x00ff0000) ^
-                               (Te4[(temp      ) & 0xff] & 0x0000ff00) ^
-                               (Te4[(temp >> 24)       ] & 0x000000ff) ^
+                                (AES_Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+                                (AES_Te4[(temp >>  8) & 0xff] & 0x00ff0000) ^
+                                (AES_Te4[(temp      ) & 0xff] & 0x0000ff00) ^
+                                (AES_Te4[(temp >> 24)       ] & 0x000000ff) ^
                                 rcon[i];
                         rk[5] = rk[1] ^ rk[4];
                         rk[6] = rk[2] ^ rk[5];
@@ -777,10 +777,10 @@ int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
                 while (1) {
                         temp = rk[ 5];
                         rk[ 6] = rk[ 0] ^
-                               (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
-                               (Te4[(temp >>  8) & 0xff] & 0x00ff0000) ^
-                               (Te4[(temp      ) & 0xff] & 0x0000ff00) ^
-                               (Te4[(temp >> 24)       ] & 0x000000ff) ^
+                                (AES_Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+                                (AES_Te4[(temp >>  8) & 0xff] & 0x00ff0000) ^
+                                (AES_Te4[(temp      ) & 0xff] & 0x0000ff00) ^
+                                (AES_Te4[(temp >> 24)       ] & 0x000000ff) ^
                                 rcon[i];
                         rk[ 7] = rk[ 1] ^ rk[ 6];
                         rk[ 8] = rk[ 2] ^ rk[ 7];
@@ -799,10 +799,10 @@ int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
                 while (1) {
                         temp = rk[ 7];
                         rk[ 8] = rk[ 0] ^
-                               (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
-                               (Te4[(temp >>  8) & 0xff] & 0x00ff0000) ^
-                               (Te4[(temp      ) & 0xff] & 0x0000ff00) ^
-                               (Te4[(temp >> 24)       ] & 0x000000ff) ^
+                                (AES_Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+                                (AES_Te4[(temp >>  8) & 0xff] & 0x00ff0000) ^
+                                (AES_Te4[(temp      ) & 0xff] & 0x0000ff00) ^
+                                (AES_Te4[(temp >> 24)       ] & 0x000000ff) ^
                                 rcon[i];
                         rk[ 9] = rk[ 1] ^ rk[ 8];
                         rk[10] = rk[ 2] ^ rk[ 9];
@@ -812,10 +812,10 @@ int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
                         }
                         temp = rk[11];
                         rk[12] = rk[ 4] ^
-                               (Te4[(temp >> 24)       ] & 0xff000000) ^
-                               (Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^
-                               (Te4[(temp >>  8) & 0xff] & 0x0000ff00) ^
-                               (Te4[(temp      ) & 0xff] & 0x000000ff);
+                                (AES_Te4[(temp >> 24)       ] & 0xff000000) ^
+                                (AES_Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^
+                                (AES_Te4[(temp >>  8) & 0xff] & 0x0000ff00) ^
+                                (AES_Te4[(temp      ) & 0xff] & 0x000000ff);
                         rk[13] = rk[ 5] ^ rk[12];
                         rk[14] = rk[ 6] ^ rk[13];
                         rk[15] = rk[ 7] ^ rk[14];
@@ -854,25 +854,25 @@ int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
         for (i = 1; i < (key->rounds); i++) {
                 rk += 4;
                 rk[0] =
-                       Td0[Te4[(rk[0] >> 24)       ] & 0xff] ^
-                       Td1[Te4[(rk[0] >> 16) & 0xff] & 0xff] ^
-                       Td2[Te4[(rk[0] >>  8) & 0xff] & 0xff] ^
-                       Td3[Te4[(rk[0]      ) & 0xff] & 0xff];
+                        AES_Td0[AES_Te4[(rk[0] >> 24)       ] & 0xff] ^
+                        AES_Td1[AES_Te4[(rk[0] >> 16) & 0xff] & 0xff] ^
+                        AES_Td2[AES_Te4[(rk[0] >>  8) & 0xff] & 0xff] ^
+                        AES_Td3[AES_Te4[(rk[0]      ) & 0xff] & 0xff];
                 rk[1] =
-                       Td0[Te4[(rk[1] >> 24)       ] & 0xff] ^
-                       Td1[Te4[(rk[1] >> 16) & 0xff] & 0xff] ^
-                       Td2[Te4[(rk[1] >>  8) & 0xff] & 0xff] ^
-                       Td3[Te4[(rk[1]      ) & 0xff] & 0xff];
+                        AES_Td0[AES_Te4[(rk[1] >> 24)       ] & 0xff] ^
+                        AES_Td1[AES_Te4[(rk[1] >> 16) & 0xff] & 0xff] ^
+                        AES_Td2[AES_Te4[(rk[1] >>  8) & 0xff] & 0xff] ^
+                        AES_Td3[AES_Te4[(rk[1]      ) & 0xff] & 0xff];
                 rk[2] =
-                       Td0[Te4[(rk[2] >> 24)       ] & 0xff] ^
-                       Td1[Te4[(rk[2] >> 16) & 0xff] & 0xff] ^
-                       Td2[Te4[(rk[2] >>  8) & 0xff] & 0xff] ^
-                       Td3[Te4[(rk[2]      ) & 0xff] & 0xff];
+                        AES_Td0[AES_Te4[(rk[2] >> 24)       ] & 0xff] ^
+                        AES_Td1[AES_Te4[(rk[2] >> 16) & 0xff] & 0xff] ^
+                        AES_Td2[AES_Te4[(rk[2] >>  8) & 0xff] & 0xff] ^
+                        AES_Td3[AES_Te4[(rk[2]      ) & 0xff] & 0xff];
                 rk[3] =
-                       Td0[Te4[(rk[3] >> 24)       ] & 0xff] ^
-                       Td1[Te4[(rk[3] >> 16) & 0xff] & 0xff] ^
-                       Td2[Te4[(rk[3] >>  8) & 0xff] & 0xff] ^
-                       Td3[Te4[(rk[3]      ) & 0xff] & 0xff];
+                        AES_Td0[AES_Te4[(rk[3] >> 24)       ] & 0xff] ^
+                        AES_Td1[AES_Te4[(rk[3] >> 16) & 0xff] & 0xff] ^
+                        AES_Td2[AES_Te4[(rk[3] >>  8) & 0xff] & 0xff] ^
+                        AES_Td3[AES_Te4[(rk[3]      ) & 0xff] & 0xff];
         }
         return 0;
  }
@@ -904,72 +904,72 @@ void AES_encrypt(const unsigned char *in, unsigned char *out,
         s3 = GETU32(in + 12) ^ rk[3];
  #ifdef FULL_UNROLL
         /* round 1: */
-       t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
-       t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
-       t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
-       t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
+        t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >>  8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[ 4];
+        t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >>  8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[ 5];
+        t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >>  8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[ 6];
+        t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >>  8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[ 7];
         /* round 2: */
-       s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
-       s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
-       s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
-       s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
+        s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >>  8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[ 8];
+        s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >>  8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[ 9];
+        s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >>  8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[10];
+        s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >>  8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[11];
         /* round 3: */
-       t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
-       t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13];
-       t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14];
-       t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15];
+        t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >>  8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[12];
+        t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >>  8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[13];
+        t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >>  8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[14];
+        t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >>  8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[15];
         /* round 4: */
-       s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16];
-       s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17];
-       s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18];
-       s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19];
+        s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >>  8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[16];
+        s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >>  8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[17];
+        s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >>  8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[18];
+        s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >>  8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[19];
         /* round 5: */
-       t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20];
-       t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21];
-       t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22];
-       t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23];
+        t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >>  8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[20];
+        t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >>  8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[21];
+        t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >>  8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[22];
+        t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >>  8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[23];
         /* round 6: */
-       s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24];
-       s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25];
-       s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26];
-       s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27];
+        s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >>  8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[24];
+        s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >>  8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[25];
+        s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >>  8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[26];
+        s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >>  8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[27];
         /* round 7: */
-       t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28];
-       t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29];
-       t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30];
-       t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31];
+        t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >>  8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[28];
+        t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >>  8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[29];
+        t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >>  8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[30];
+        t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >>  8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[31];
         /* round 8: */
-       s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32];
-       s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33];
-       s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34];
-       s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35];
+        s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >>  8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[32];
+        s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >>  8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[33];
+        s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >>  8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[34];
+        s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >>  8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[35];
         /* round 9: */
-       t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36];
-       t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37];
-       t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38];
-       t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39];
+        t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >>  8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[36];
+        t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >>  8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[37];
+        t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >>  8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[38];
+        t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >>  8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[39];
      if (key->rounds > 10) {
          /* round 10: */
-        s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40];
-        s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41];
-        s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42];
-        s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43];
+        s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >>  8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[40];
+        s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >>  8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[41];
+        s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >>  8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[42];
+        s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >>  8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[43];
          /* round 11: */
-        t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44];
-        t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45];
-        t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46];
-        t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47];
+        t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >>  8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[44];
+        t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >>  8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[45];
+        t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >>  8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[46];
+        t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >>  8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[47];
          if (key->rounds > 12) {
              /* round 12: */
-            s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48];
-            s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49];
-            s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50];
-            s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51];
+            s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >>  8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[48];
+            s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >>  8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[49];
+            s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >>  8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[50];
+            s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >>  8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[51];
              /* round 13: */
-            t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52];
-            t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53];
-            t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54];
-            t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55];
+            t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >>  8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[52];
+            t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >>  8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[53];
+            t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >>  8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[54];
+            t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >>  8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[55];
          }
      }
      rk += key->rounds << 2;
@@ -980,28 +980,28 @@ void AES_encrypt(const unsigned char *in, unsigned char *out,
      r = key->rounds >> 1;
      for (;;) {
          t0 =
-            Te0[(s0 >> 24)       ] ^
-            Te1[(s1 >> 16) & 0xff] ^
-            Te2[(s2 >>  8) & 0xff] ^
-            Te3[(s3      ) & 0xff] ^
+            AES_Te0[(s0 >> 24)       ] ^
+            AES_Te1[(s1 >> 16) & 0xff] ^
+            AES_Te2[(s2 >>  8) & 0xff] ^
+            AES_Te3[(s3      ) & 0xff] ^
              rk[4];
          t1 =
-            Te0[(s1 >> 24)       ] ^
-            Te1[(s2 >> 16) & 0xff] ^
-            Te2[(s3 >>  8) & 0xff] ^
-            Te3[(s0      ) & 0xff] ^
+            AES_Te0[(s1 >> 24)       ] ^
+            AES_Te1[(s2 >> 16) & 0xff] ^
+            AES_Te2[(s3 >>  8) & 0xff] ^
+            AES_Te3[(s0      ) & 0xff] ^
              rk[5];
          t2 =
-            Te0[(s2 >> 24)       ] ^
-            Te1[(s3 >> 16) & 0xff] ^
-            Te2[(s0 >>  8) & 0xff] ^
-            Te3[(s1      ) & 0xff] ^
+            AES_Te0[(s2 >> 24)       ] ^
+            AES_Te1[(s3 >> 16) & 0xff] ^
+            AES_Te2[(s0 >>  8) & 0xff] ^
+            AES_Te3[(s1      ) & 0xff] ^
              rk[6];
          t3 =
-            Te0[(s3 >> 24)       ] ^
-            Te1[(s0 >> 16) & 0xff] ^
-            Te2[(s1 >>  8) & 0xff] ^
-            Te3[(s2      ) & 0xff] ^
+            AES_Te0[(s3 >> 24)       ] ^
+            AES_Te1[(s0 >> 16) & 0xff] ^
+            AES_Te2[(s1 >>  8) & 0xff] ^
+            AES_Te3[(s2      ) & 0xff] ^
              rk[7];
  
          rk += 8;
@@ -1010,28 +1010,28 @@ void AES_encrypt(const unsigned char *in, unsigned char *out,
          }
  
          s0 =
-            Te0[(t0 >> 24)       ] ^
-            Te1[(t1 >> 16) & 0xff] ^
-            Te2[(t2 >>  8) & 0xff] ^
-            Te3[(t3      ) & 0xff] ^
+            AES_Te0[(t0 >> 24)       ] ^
+            AES_Te1[(t1 >> 16) & 0xff] ^
+            AES_Te2[(t2 >>  8) & 0xff] ^
+            AES_Te3[(t3      ) & 0xff] ^
              rk[0];
          s1 =
-            Te0[(t1 >> 24)       ] ^
-            Te1[(t2 >> 16) & 0xff] ^
-            Te2[(t3 >>  8) & 0xff] ^
-            Te3[(t0      ) & 0xff] ^
+            AES_Te0[(t1 >> 24)       ] ^
+            AES_Te1[(t2 >> 16) & 0xff] ^
+            AES_Te2[(t3 >>  8) & 0xff] ^
+            AES_Te3[(t0      ) & 0xff] ^
              rk[1];
          s2 =
-            Te0[(t2 >> 24)       ] ^
-            Te1[(t3 >> 16) & 0xff] ^
-            Te2[(t0 >>  8) & 0xff] ^
-            Te3[(t1      ) & 0xff] ^
+            AES_Te0[(t2 >> 24)       ] ^
+            AES_Te1[(t3 >> 16) & 0xff] ^
+            AES_Te2[(t0 >>  8) & 0xff] ^
+            AES_Te3[(t1      ) & 0xff] ^
              rk[2];
          s3 =
-            Te0[(t3 >> 24)       ] ^
-            Te1[(t0 >> 16) & 0xff] ^
-            Te2[(t1 >>  8) & 0xff] ^
-            Te3[(t2      ) & 0xff] ^
+            AES_Te0[(t3 >> 24)       ] ^
+            AES_Te1[(t0 >> 16) & 0xff] ^
+            AES_Te2[(t1 >>  8) & 0xff] ^
+            AES_Te3[(t2      ) & 0xff] ^
              rk[3];
      }
  #endif /* ?FULL_UNROLL */
@@ -1040,31 +1040,31 @@ void AES_encrypt(const unsigned char *in, unsigned char *out,
          * map cipher state to byte array block:
          */
         s0 =
-               (Te4[(t0 >> 24)       ] & 0xff000000) ^
-               (Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
-               (Te4[(t2 >>  8) & 0xff] & 0x0000ff00) ^
-               (Te4[(t3      ) & 0xff] & 0x000000ff) ^
+                (AES_Te4[(t0 >> 24)       ] & 0xff000000) ^
+                (AES_Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+                (AES_Te4[(t2 >>  8) & 0xff] & 0x0000ff00) ^
+                (AES_Te4[(t3      ) & 0xff] & 0x000000ff) ^
                 rk[0];
         PUTU32(out     , s0);
         s1 =
-               (Te4[(t1 >> 24)       ] & 0xff000000) ^
-               (Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
-               (Te4[(t3 >>  8) & 0xff] & 0x0000ff00) ^
-               (Te4[(t0      ) & 0xff] & 0x000000ff) ^
+                (AES_Te4[(t1 >> 24)       ] & 0xff000000) ^
+                (AES_Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+                (AES_Te4[(t3 >>  8) & 0xff] & 0x0000ff00) ^
+                (AES_Te4[(t0      ) & 0xff] & 0x000000ff) ^
                 rk[1];
         PUTU32(out +  4, s1);
         s2 =
-               (Te4[(t2 >> 24)       ] & 0xff000000) ^
-               (Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
-               (Te4[(t0 >>  8) & 0xff] & 0x0000ff00) ^
-               (Te4[(t1      ) & 0xff] & 0x000000ff) ^
+                (AES_Te4[(t2 >> 24)       ] & 0xff000000) ^
+                (AES_Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+                (AES_Te4[(t0 >>  8) & 0xff] & 0x0000ff00) ^
+                (AES_Te4[(t1      ) & 0xff] & 0x000000ff) ^
                 rk[2];
         PUTU32(out +  8, s2);
         s3 =
-               (Te4[(t3 >> 24)       ] & 0xff000000) ^
-               (Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
-               (Te4[(t1 >>  8) & 0xff] & 0x0000ff00) ^
-               (Te4[(t2      ) & 0xff] & 0x000000ff) ^
+                (AES_Te4[(t3 >> 24)       ] & 0xff000000) ^
+                (AES_Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+                (AES_Te4[(t1 >>  8) & 0xff] & 0x0000ff00) ^
+                (AES_Te4[(t2      ) & 0xff] & 0x000000ff) ^
                 rk[3];
         PUTU32(out + 12, s3);
  }
@@ -1095,72 +1095,72 @@ void AES_decrypt(const unsigned char *in, unsigned char *out,
      s3 = GETU32(in + 12) ^ rk[3];
  #ifdef FULL_UNROLL
      /* round 1: */
-    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4];
-    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5];
-    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6];
-    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7];
+    t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >>  8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[ 4];
+    t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >>  8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[ 5];
+    t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >>  8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[ 6];
+    t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >>  8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[ 7];
      /* round 2: */
-    s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8];
-    s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9];
-    s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10];
-    s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11];
+    s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >>  8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[ 8];
+    s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >>  8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[ 9];
+    s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >>  8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[10];
+    s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >>  8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[11];
      /* round 3: */
-    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12];
-    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13];
-    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14];
-    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15];
+    t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >>  8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[12];
+    t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >>  8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[13];
+    t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >>  8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[14];
+    t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >>  8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[15];
      /* round 4: */
-    s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16];
-    s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17];
-    s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18];
-    s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19];
+    s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >>  8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[16];
+    s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >>  8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[17];
+    s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >>  8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[18];
+    s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >>  8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[19];
      /* round 5: */
-    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20];
-    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21];
-    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22];
-    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23];
+    t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >>  8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[20];
+    t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >>  8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[21];
+    t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >>  8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[22];
+    t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >>  8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[23];
      /* round 6: */
-    s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24];
-    s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25];
-    s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26];
-    s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27];
+    s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >>  8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[24];
+    s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >>  8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[25];
+    s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >>  8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[26];
+    s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >>  8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[27];
      /* round 7: */
-    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28];
-    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29];
-    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30];
-    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31];
+    t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >>  8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[28];
+    t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >>  8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[29];
+    t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >>  8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[30];
+    t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >>  8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[31];
      /* round 8: */
-    s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32];
-    s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33];
-    s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34];
-    s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35];
+    s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >>  8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[32];
+    s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >>  8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[33];
+    s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >>  8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[34];
+    s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >>  8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[35];
      /* round 9: */
-    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36];
-    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37];
-    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38];
-    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39];
+    t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >>  8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[36];
+    t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >>  8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[37];
+    t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >>  8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[38];
+    t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >>  8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[39];
      if (key->rounds > 10) {
          /* round 10: */
-        s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40];
-        s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41];
-        s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42];
-        s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43];
+        s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >>  8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[40];
+        s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >>  8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[41];
+        s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >>  8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[42];
+        s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >>  8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[43];
          /* round 11: */
-        t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44];
-        t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45];
-        t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46];
-        t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47];
+        t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >>  8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[44];
+        t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >>  8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[45];
+        t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >>  8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[46];
+        t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >>  8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[47];
          if (key->rounds > 12) {
              /* round 12: */
-            s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48];
-            s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49];
-            s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50];
-            s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51];
+            s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >>  8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[48];
+            s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >>  8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[49];
+            s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >>  8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[50];
+            s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >>  8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[51];
              /* round 13: */
-            t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52];
-            t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53];
-            t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54];
-            t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55];
+            t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >>  8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[52];
+            t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >>  8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[53];
+            t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >>  8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[54];
+            t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >>  8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[55];
          }
      }
         rk += key->rounds << 2;
@@ -1171,28 +1171,28 @@ void AES_decrypt(const unsigned char *in, unsigned char *out,
      r = key->rounds >> 1;
      for (;;) {
          t0 =
-            Td0[(s0 >> 24)       ] ^
-            Td1[(s3 >> 16) & 0xff] ^
-            Td2[(s2 >>  8) & 0xff] ^
-            Td3[(s1      ) & 0xff] ^
+            AES_Td0[(s0 >> 24)       ] ^
+            AES_Td1[(s3 >> 16) & 0xff] ^
+            AES_Td2[(s2 >>  8) & 0xff] ^
+            AES_Td3[(s1      ) & 0xff] ^
              rk[4];
          t1 =
-            Td0[(s1 >> 24)       ] ^
-            Td1[(s0 >> 16) & 0xff] ^
-            Td2[(s3 >>  8) & 0xff] ^
-            Td3[(s2      ) & 0xff] ^
+            AES_Td0[(s1 >> 24)       ] ^
+            AES_Td1[(s0 >> 16) & 0xff] ^
+            AES_Td2[(s3 >>  8) & 0xff] ^
+            AES_Td3[(s2      ) & 0xff] ^
              rk[5];
          t2 =
-            Td0[(s2 >> 24)       ] ^
-            Td1[(s1 >> 16) & 0xff] ^
-            Td2[(s0 >>  8) & 0xff] ^
-            Td3[(s3      ) & 0xff] ^
+            AES_Td0[(s2 >> 24)       ] ^
+            AES_Td1[(s1 >> 16) & 0xff] ^
+            AES_Td2[(s0 >>  8) & 0xff] ^
+            AES_Td3[(s3      ) & 0xff] ^
              rk[6];
          t3 =
-            Td0[(s3 >> 24)       ] ^
-            Td1[(s2 >> 16) & 0xff] ^
-            Td2[(s1 >>  8) & 0xff] ^
-            Td3[(s0      ) & 0xff] ^
+            AES_Td0[(s3 >> 24)       ] ^
+            AES_Td1[(s2 >> 16) & 0xff] ^
+            AES_Td2[(s1 >>  8) & 0xff] ^
+            AES_Td3[(s0      ) & 0xff] ^
              rk[7];
  
          rk += 8;
@@ -1201,28 +1201,28 @@ void AES_decrypt(const unsigned char *in, unsigned char *out,
          }
  
          s0 =
-            Td0[(t0 >> 24)       ] ^
-            Td1[(t3 >> 16) & 0xff] ^
-            Td2[(t2 >>  8) & 0xff] ^
-            Td3[(t1      ) & 0xff] ^
+            AES_Td0[(t0 >> 24)       ] ^
+            AES_Td1[(t3 >> 16) & 0xff] ^
+            AES_Td2[(t2 >>  8) & 0xff] ^
+            AES_Td3[(t1      ) & 0xff] ^
              rk[0];
          s1 =
-            Td0[(t1 >> 24)       ] ^
-            Td1[(t0 >> 16) & 0xff] ^
-            Td2[(t3 >>  8) & 0xff] ^
-            Td3[(t2      ) & 0xff] ^
+            AES_Td0[(t1 >> 24)       ] ^
+            AES_Td1[(t0 >> 16) & 0xff] ^
+            AES_Td2[(t3 >>  8) & 0xff] ^
+            AES_Td3[(t2      ) & 0xff] ^
              rk[1];
          s2 =
-            Td0[(t2 >> 24)       ] ^
-            Td1[(t1 >> 16) & 0xff] ^
-            Td2[(t0 >>  8) & 0xff] ^
-            Td3[(t3      ) & 0xff] ^
+            AES_Td0[(t2 >> 24)       ] ^
+            AES_Td1[(t1 >> 16) & 0xff] ^
+            AES_Td2[(t0 >>  8) & 0xff] ^
+            AES_Td3[(t3      ) & 0xff] ^
              rk[2];
          s3 =
-            Td0[(t3 >> 24)       ] ^
-            Td1[(t2 >> 16) & 0xff] ^
-            Td2[(t1 >>  8) & 0xff] ^
-            Td3[(t0      ) & 0xff] ^
+            AES_Td0[(t3 >> 24)       ] ^
+            AES_Td1[(t2 >> 16) & 0xff] ^
+            AES_Td2[(t1 >>  8) & 0xff] ^
+            AES_Td3[(t0      ) & 0xff] ^
              rk[3];
      }
  #endif /* ?FULL_UNROLL */
@@ -1231,31 +1231,31 @@ void AES_decrypt(const unsigned char *in, unsigned char *out,
          * map cipher state to byte array block:
          */
         s0 =
-               (Td4[(t0 >> 24)       ] & 0xff000000) ^
-               (Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
-               (Td4[(t2 >>  8) & 0xff] & 0x0000ff00) ^
-               (Td4[(t1      ) & 0xff] & 0x000000ff) ^
+                (AES_Td4[(t0 >> 24)       ] & 0xff000000) ^
+                (AES_Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+                (AES_Td4[(t2 >>  8) & 0xff] & 0x0000ff00) ^
+                (AES_Td4[(t1      ) & 0xff] & 0x000000ff) ^
                 rk[0];
         PUTU32(out     , s0);
         s1 =
-               (Td4[(t1 >> 24)       ] & 0xff000000) ^
-               (Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
-               (Td4[(t3 >>  8) & 0xff] & 0x0000ff00) ^
-               (Td4[(t2      ) & 0xff] & 0x000000ff) ^
+                (AES_Td4[(t1 >> 24)       ] & 0xff000000) ^
+                (AES_Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+                (AES_Td4[(t3 >>  8) & 0xff] & 0x0000ff00) ^
+                (AES_Td4[(t2      ) & 0xff] & 0x000000ff) ^
                 rk[1];
         PUTU32(out +  4, s1);
         s2 =
-               (Td4[(t2 >> 24)       ] & 0xff000000) ^
-               (Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
-               (Td4[(t0 >>  8) & 0xff] & 0x0000ff00) ^
-               (Td4[(t3      ) & 0xff] & 0x000000ff) ^
+                (AES_Td4[(t2 >> 24)       ] & 0xff000000) ^
+                (AES_Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+                (AES_Td4[(t0 >>  8) & 0xff] & 0x0000ff00) ^
+                (AES_Td4[(t3      ) & 0xff] & 0x000000ff) ^
                 rk[2];
         PUTU32(out +  8, s2);
         s3 =
-               (Td4[(t3 >> 24)       ] & 0xff000000) ^
-               (Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
-               (Td4[(t1 >>  8) & 0xff] & 0x0000ff00) ^
-               (Td4[(t0      ) & 0xff] & 0x000000ff) ^
+                (AES_Td4[(t3 >> 24)       ] & 0xff000000) ^
+                (AES_Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+                (AES_Td4[(t1 >>  8) & 0xff] & 0x0000ff00) ^
+                (AES_Td4[(t0      ) & 0xff] & 0x000000ff) ^
                 rk[3];
         PUTU32(out + 12, s3);
  }
author	Aurelien Jarno <aurelien@aurel32.net>
	Sat, 13 Apr 2013 12:31:54 +0000 (14:31 +0200)
committer	Aurelien Jarno <aurelien@aurel32.net>
	Sat, 13 Apr 2013 12:31:54 +0000 (14:31 +0200)
block/qcow.c		patch \| blob \| history
block/qcow2.c		patch \| blob \| history
block/qcow2.h		patch \| blob \| history
configure		patch \| blob \| history
cpu-exec.c		patch \| blob \| history
disas/i386.c		patch \| blob \| history
docs/bootindex.txt		patch \| blob \| history
docs/ccid.txt		patch \| blob \| history
hw/block/virtio-blk.c		patch \| blob \| history
hw/intc/armv7m_nvic.c		patch \| blob \| history
hw/s390x/s390-virtio-ccw.c		patch \| blob \| history
hw/s390x/s390-virtio.c		patch \| blob \| history
hw/sd/sd.c		patch \| blob \| history
hw/sh4/r2d.c		patch \| blob \| history
hw/sh4/sh7750.c		patch \| blob \| history
hw/sh4/shix.c		patch \| blob \| history
include/block/aes.h	[deleted file]	patch \| blob \| history
include/hw/sh4/sh.h		patch \| blob \| history
include/qemu/aes.h	[new file with mode: 0644]	patch \| blob
linux-user/syscall.c		patch \| blob \| history
target-i386/cpu.c		patch \| blob \| history
target-i386/fpu_helper.c		patch \| blob \| history
target-i386/ops_sse.h		patch \| blob \| history
target-i386/ops_sse_header.h		patch \| blob \| history
target-i386/translate.c		patch \| blob \| history
target-microblaze/mmu.c		patch \| blob \| history
target-moxie/cpu.c		patch \| blob \| history
target-moxie/cpu.h		patch \| blob \| history
target-moxie/machine.c		patch \| blob \| history
target-s390x/mem_helper.c		patch \| blob \| history
tcg/s390/tcg-target.c		patch \| blob \| history
tcg/s390/tcg-target.h		patch \| blob \| history
tcg/tci/tcg-target.c		patch \| blob \| history
tcg/tci/tcg-target.h		patch \| blob \| history
tci.c		patch \| blob \| history
util/aes.c		patch \| blob \| history