#include "block/block_int.h"
#include "qemu/module.h"
#include <zlib.h>
-#include "block/aes.h"
+#include "qemu/aes.h"
#include "migration/migration.h"
/**************************************************************/
#include "block/block_int.h"
#include "qemu/module.h"
#include <zlib.h>
-#include "block/aes.h"
+#include "qemu/aes.h"
#include "block/qcow2.h"
#include "qemu/error-report.h"
#include "qapi/qmp/qerror.h"
#ifndef BLOCK_QCOW2_H
#define BLOCK_QCOW2_H
-#include "block/aes.h"
+#include "qemu/aes.h"
#include "block/coroutine.h"
//#define DEBUG_ALLOC
if test "$debug_tcg" = "yes" ; then
echo "CONFIG_DEBUG_TCG=y" >> $config_host_mak
fi
-if test "$debug" = "yes" ; then
- echo "CONFIG_DEBUG_EXEC=y" >> $config_host_mak
-fi
if test "$strip_opt" = "yes" ; then
echo "STRIP=${strip}" >> $config_host_mak
fi
#include "qemu/atomic.h"
#include "sysemu/qtest.h"
-//#define CONFIG_DEBUG_EXEC
-
bool qemu_cpu_has_work(CPUState *cpu)
{
return cpu_has_work(cpu);
env->exception_index = EXCP_INTERRUPT;
cpu_loop_exit(env);
}
-#if defined(DEBUG_DISAS) || defined(CONFIG_DEBUG_EXEC)
+#if defined(DEBUG_DISAS)
if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
/* restore flags in standard format */
#if defined(TARGET_I386)
log_cpu_state(env, 0);
#endif
}
-#endif /* DEBUG_DISAS || CONFIG_DEBUG_EXEC */
+#endif /* DEBUG_DISAS */
spin_lock(&tcg_ctx.tb_ctx.tb_lock);
tb = tb_find_fast(env);
/* Note: we do it here to avoid a gcc bug on Mac OS X when
next_tb = 0;
tcg_ctx.tb_ctx.tb_invalidated_flag = 0;
}
-#ifdef CONFIG_DEBUG_EXEC
- qemu_log_mask(CPU_LOG_EXEC, "Trace %p [" TARGET_FMT_lx "] %s\n",
- tb->tc_ptr, tb->pc,
- lookup_symbol(tb->pc));
-#endif
+ if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
+ qemu_log("Trace %p [" TARGET_FMT_lx "] %s\n",
+ tb->tc_ptr, tb->pc, lookup_symbol(tb->pc));
+ }
/* see if we can patch the calling TB. When the TB
spans two pages, we cannot safely do a direct
jump. */
#define PREGRP95 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 95 } }
#define PREGRP96 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 96 } }
#define PREGRP97 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 97 } }
+#define PREGRP98 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 98 } }
+#define PREGRP99 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 99 } }
+#define PREGRP100 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 100 } }
+#define PREGRP101 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 101 } }
+#define PREGRP102 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 102 } }
+#define PREGRP103 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 103 } }
+#define PREGRP104 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 104 } }
#define X86_64_0 NULL, { { NULL, X86_64_SPECIAL }, { NULL, 0 } }
/* a0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* af */
/* b0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* bf */
/* c0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* cf */
- /* d0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* df */
+ /* d0 */ 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1, /* df */
/* e0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ef */
/* f0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ff */
/* ------------------------------- */
/* 10 */ 0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0, /* 1f */
/* 20 */ 1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 2f */
/* 30 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 3f */
- /* 40 */ 1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 4f */
+ /* 40 */ 1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0, /* 4f */
/* 50 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 5f */
/* 60 */ 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0, /* 6f */
/* 70 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 7f */
/* a0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* af */
/* b0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* bf */
/* c0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* cf */
- /* d0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* df */
+ /* d0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, /* df */
/* e0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ef */
/* f0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ff */
/* ------------------------------- */
{ "punpckldq",{ MX, EMq } },
{ "(bad)", { XX } },
},
+
+ /* PREGRP98 */
+ {
+ { "(bad)", { XX } },
+ { "(bad)", { XX } },
+ { "pclmulqdq", { XM, EXx, Ib } },
+ { "(bad)", { XX } },
+ },
+
+ /* PREGRP99 */
+ {
+ { "(bad)", { XX } },
+ { "(bad)", { XX } },
+ { "aesimc", { XM, EXx } },
+ { "(bad)", { XX } },
+ },
+
+ /* PREGRP100 */
+ {
+ { "(bad)", { XX } },
+ { "(bad)", { XX } },
+ { "aesenc", { XM, EXx } },
+ { "(bad)", { XX } },
+ },
+
+ /* PREGRP101 */
+ {
+ { "(bad)", { XX } },
+ { "(bad)", { XX } },
+ { "aesenclast", { XM, EXx } },
+ { "(bad)", { XX } },
+ },
+
+ /* PREGRP102 */
+ {
+ { "(bad)", { XX } },
+ { "(bad)", { XX } },
+ { "aesdec", { XM, EXx } },
+ { "(bad)", { XX } },
+ },
+
+ /* PREGRP103 */
+ {
+ { "(bad)", { XX } },
+ { "(bad)", { XX } },
+ { "aesdeclast", { XM, EXx } },
+ { "(bad)", { XX } },
+ },
+
+ /* PREGRP104 */
+ {
+ { "(bad)", { XX } },
+ { "(bad)", { XX } },
+ { "aeskeygenassist", { XM, EXx, Ib } },
+ { "(bad)", { XX } },
+ },
+
};
static const struct dis386 x86_64_table[][2] = {
{ "(bad)", { XX } },
{ "(bad)", { XX } },
{ "(bad)", { XX } },
- { "(bad)", { XX } },
- { "(bad)", { XX } },
- { "(bad)", { XX } },
- { "(bad)", { XX } },
- { "(bad)", { XX } },
+ { PREGRP99 },
+ { PREGRP100 },
+ { PREGRP101 },
+ { PREGRP102 },
+ { PREGRP103 },
/* e0 */
{ "(bad)", { XX } },
{ "(bad)", { XX } },
{ PREGRP84 },
{ PREGRP85 },
{ "(bad)", { XX } },
- { "(bad)", { XX } },
+ { PREGRP98 },
{ "(bad)", { XX } },
{ "(bad)", { XX } },
{ "(bad)", { XX } },
{ "(bad)", { XX } },
{ "(bad)", { XX } },
{ "(bad)", { XX } },
- { "(bad)", { XX } },
+ { PREGRP104 },
/* e0 */
{ "(bad)", { XX } },
{ "(bad)", { XX } },
== Example ==
-Lets assume we have QEMU machine with two NICs (virtio, e1000) and two
+Let's assume we have a QEMU machine with two NICs (virtio, e1000) and two
disks (IDE, virtio):
qemu -drive file=disk1.img,if=none,id=disk1
-netdev type=user,id=net1 -device e1000,netdev=net1,bootindex=1
Given the command above, firmware should try to boot from the e1000 NIC
-first. If this fails, it should try the virtio NIC next, if this fails
+first. If this fails, it should try the virtio NIC next; if this fails
too, it should try the virtio disk, and then the IDE disk.
== Limitations ==
1. Some firmware has limitations on which devices can be considered for
booting. For instance, the PC BIOS boot specification allows only one
disk to be bootable. If boot from disk fails for some reason, the BIOS
-won't retry booting from other disk. It still can try to boot from
+won't retry booting from other disk. It can still try to boot from
floppy or net, though.
2. Sometimes, firmware cannot map the device path QEMU wants firmware to
can natively boot from, but if firmware relies on an option ROM for
booting, and the same option ROM is used for booting from more then one
device, the firmware may not be able to ask the option ROM to boot from
-a particular device reliably. For instance with PC BIOS, if a SCSI HBA
+a particular device reliably. For instance with the PC BIOS, if a SCSI HBA
has three bootable devices target1, target3, target5 connected to it,
the option ROM will have a boot method for each of them, but it is not
possible to map from boot method back to a specific target. This is a
-shortcoming of PC BIOS boot specification.
+shortcoming of the PC BIOS boot specification.
Revision 1.1
April 22rd, 2005
-Smartcard are used for authentication, single sign on, decryption in
+Smartcards are used for authentication, single sign on, decryption in
public/private schemes and digital signatures. A smartcard reader on the client
cannot be used on a guest with simple usb passthrough since it will then not be
available on the client, possibly locking the computer when it is "removed". On
blkcfg.heads = s->conf->heads;
/*
* We must ensure that the block device capacity is a multiple of
- * the logical block size. If that is not the case, lets use
+ * the logical block size. If that is not the case, let's use
* sector_mask to adopt the geometry to have a correct picture.
* For those devices where the capacity is ok for the given geometry
- * we dont touch the sector value of the geometry, since some devices
+ * we don't touch the sector value of the geometry, since some devices
* (like s390 dasd) need a specific value. Here the capacity is already
* cyls*heads*secs*blk_size and the sector value is not block size
* divided by 512 - instead it is the amount of blk_size blocks
return 10000;
case 0xd00: /* CPUID Base. */
return cpu_single_env->cp15.c0_cpuid;
- case 0xd04: /* Interrypt Control State. */
+ case 0xd04: /* Interrupt Control State. */
/* VECTACTIVE */
val = s->gic.running_irq[0];
if (val == 1023) {
}
my_ram_size = my_ram_size >> (20 + shift) << (20 + shift);
- /* lets propagate the changed ram size into the global variable. */
+ /* let's propagate the changed ram size into the global variable. */
ram_size = my_ram_size;
/* get a BUS */
}
my_ram_size = my_ram_size >> (20 + shift) << (20 + shift);
- /* lets propagate the changed ram size into the global variable. */
+ /* let's propagate the changed ram size into the global variable. */
ram_size = my_ram_size;
/* get a BUS */
sd->csd[13] = 0x40;
sd->csd[14] = 0x00;
sd->csd[15] = 0x00;
- sd->ocr |= 1 << 30; /* High Capacity SD Memort Card */
+ sd->ocr |= 1 << 30; /* High Capacity SD Memory Card */
}
}
case 25: /* CMD25: WRITE_MULTIPLE_BLOCK */
if (sd->data_offset == 0) {
- /* Start of the block - lets check the address is valid */
+ /* Start of the block - let's check the address is valid */
if (sd->data_start + sd->blk_len > sd->size) {
sd->card_status |= ADDRESS_ERROR;
break;
vmstate_register_ram_global(sdram);
memory_region_add_subregion(address_space_mem, SDRAM_BASE, sdram);
/* Register peripherals */
- s = sh7750_init(env, address_space_mem);
+ s = sh7750_init(cpu, address_space_mem);
irq = r2d_fpga_init(address_space_mem, 0x04000000, sh7750_irl(s));
dev = qdev_create(NULL, "sh_pci");
MemoryRegion iomem_ffc;
MemoryRegion mmct_iomem;
/* CPU */
- CPUSH4State *cpu;
+ SuperHCPU *cpu;
/* Peripheral frequency in Hz */
uint32_t periph_freq;
/* SDRAM controller */
static inline int has_bcr3_and_bcr4(SH7750State * s)
{
- return (s->cpu->features & SH_FEATURE_BCR3_AND_BCR4);
+ return s->cpu->env.features & SH_FEATURE_BCR3_AND_BCR4;
}
/**********************************************************************
I/O ports
ignore_access("long read", addr);
return 0;
case SH7750_MMUCR_A7:
- return s->cpu->mmucr;
+ return s->cpu->env.mmucr;
case SH7750_PTEH_A7:
- return s->cpu->pteh;
+ return s->cpu->env.pteh;
case SH7750_PTEL_A7:
- return s->cpu->ptel;
+ return s->cpu->env.ptel;
case SH7750_TTB_A7:
- return s->cpu->ttb;
+ return s->cpu->env.ttb;
case SH7750_TEA_A7:
- return s->cpu->tea;
+ return s->cpu->env.tea;
case SH7750_TRA_A7:
- return s->cpu->tra;
+ return s->cpu->env.tra;
case SH7750_EXPEVT_A7:
- return s->cpu->expevt;
+ return s->cpu->env.expevt;
case SH7750_INTEVT_A7:
- return s->cpu->intevt;
+ return s->cpu->env.intevt;
case SH7750_CCR_A7:
return s->ccr;
case 0x1f000030: /* Processor version */
return;
case SH7750_MMUCR_A7:
if (mem_value & MMUCR_TI) {
- cpu_sh4_invalidate_tlb(s->cpu);
+ cpu_sh4_invalidate_tlb(&s->cpu->env);
}
- s->cpu->mmucr = mem_value & ~MMUCR_TI;
+ s->cpu->env.mmucr = mem_value & ~MMUCR_TI;
return;
case SH7750_PTEH_A7:
/* If asid changes, clear all registered tlb entries. */
- if ((s->cpu->pteh & 0xff) != (mem_value & 0xff))
- tlb_flush(s->cpu, 1);
- s->cpu->pteh = mem_value;
- return;
+ if ((s->cpu->env.pteh & 0xff) != (mem_value & 0xff)) {
+ tlb_flush(&s->cpu->env, 1);
+ }
+ s->cpu->env.pteh = mem_value;
+ return;
case SH7750_PTEL_A7:
- s->cpu->ptel = mem_value;
- return;
+ s->cpu->env.ptel = mem_value;
+ return;
case SH7750_PTEA_A7:
- s->cpu->ptea = mem_value & 0x0000000f;
- return;
+ s->cpu->env.ptea = mem_value & 0x0000000f;
+ return;
case SH7750_TTB_A7:
- s->cpu->ttb = mem_value;
- return;
+ s->cpu->env.ttb = mem_value;
+ return;
case SH7750_TEA_A7:
- s->cpu->tea = mem_value;
- return;
+ s->cpu->env.tea = mem_value;
+ return;
case SH7750_TRA_A7:
- s->cpu->tra = mem_value & 0x000007ff;
- return;
+ s->cpu->env.tra = mem_value & 0x000007ff;
+ return;
case SH7750_EXPEVT_A7:
- s->cpu->expevt = mem_value & 0x000007ff;
- return;
+ s->cpu->env.expevt = mem_value & 0x000007ff;
+ return;
case SH7750_INTEVT_A7:
- s->cpu->intevt = mem_value & 0x000007ff;
- return;
+ s->cpu->env.intevt = mem_value & 0x000007ff;
+ return;
case SH7750_CCR_A7:
s->ccr = mem_value;
return;
/* do nothing */
break;
case MM_ITLB_ADDR:
- ret = cpu_sh4_read_mmaped_itlb_addr(s->cpu, addr);
+ ret = cpu_sh4_read_mmaped_itlb_addr(&s->cpu->env, addr);
break;
case MM_ITLB_DATA:
- ret = cpu_sh4_read_mmaped_itlb_data(s->cpu, addr);
+ ret = cpu_sh4_read_mmaped_itlb_data(&s->cpu->env, addr);
break;
case MM_OCACHE_ADDR:
case MM_OCACHE_DATA:
/* do nothing */
break;
case MM_UTLB_ADDR:
- ret = cpu_sh4_read_mmaped_utlb_addr(s->cpu, addr);
+ ret = cpu_sh4_read_mmaped_utlb_addr(&s->cpu->env, addr);
break;
case MM_UTLB_DATA:
- ret = cpu_sh4_read_mmaped_utlb_data(s->cpu, addr);
+ ret = cpu_sh4_read_mmaped_utlb_data(&s->cpu->env, addr);
break;
default:
abort();
/* do nothing */
break;
case MM_ITLB_ADDR:
- cpu_sh4_write_mmaped_itlb_addr(s->cpu, addr, mem_value);
+ cpu_sh4_write_mmaped_itlb_addr(&s->cpu->env, addr, mem_value);
break;
case MM_ITLB_DATA:
- cpu_sh4_write_mmaped_itlb_data(s->cpu, addr, mem_value);
+ cpu_sh4_write_mmaped_itlb_data(&s->cpu->env, addr, mem_value);
abort();
break;
case MM_OCACHE_ADDR:
/* do nothing */
break;
case MM_UTLB_ADDR:
- cpu_sh4_write_mmaped_utlb_addr(s->cpu, addr, mem_value);
+ cpu_sh4_write_mmaped_utlb_addr(&s->cpu->env, addr, mem_value);
break;
case MM_UTLB_DATA:
- cpu_sh4_write_mmaped_utlb_data(s->cpu, addr, mem_value);
+ cpu_sh4_write_mmaped_utlb_data(&s->cpu->env, addr, mem_value);
break;
default:
abort();
.endianness = DEVICE_NATIVE_ENDIAN,
};
-SH7750State *sh7750_init(CPUSH4State * cpu, MemoryRegion *sysmem)
+SH7750State *sh7750_init(SuperHCPU *cpu, MemoryRegion *sysmem)
{
SH7750State *s;
_INTC_ARRAY(vectors),
_INTC_ARRAY(groups));
- cpu->intc_handle = &s->intc;
+ cpu->env.intc_handle = &s->intc;
sh_serial_init(sysmem, 0x1fe00000,
0, s->periph_freq, serial_hds[0],
s->intc.irqs[TMU2_TUNI],
s->intc.irqs[TMU2_TICPI]);
- if (cpu->id & (SH_CPU_SH7750 | SH_CPU_SH7750S | SH_CPU_SH7751)) {
+ if (cpu->env.id & (SH_CPU_SH7750 | SH_CPU_SH7750S | SH_CPU_SH7751)) {
sh_intc_register_sources(&s->intc,
_INTC_ARRAY(vectors_dma4),
_INTC_ARRAY(groups_dma4));
}
- if (cpu->id & (SH_CPU_SH7750R | SH_CPU_SH7751R)) {
+ if (cpu->env.id & (SH_CPU_SH7750R | SH_CPU_SH7751R)) {
sh_intc_register_sources(&s->intc,
_INTC_ARRAY(vectors_dma8),
_INTC_ARRAY(groups_dma8));
}
- if (cpu->id & (SH_CPU_SH7750R | SH_CPU_SH7751 | SH_CPU_SH7751R)) {
+ if (cpu->env.id & (SH_CPU_SH7750R | SH_CPU_SH7751 | SH_CPU_SH7751R)) {
sh_intc_register_sources(&s->intc,
_INTC_ARRAY(vectors_tmu34),
NULL, 0);
NULL, NULL);
}
- if (cpu->id & (SH_CPU_SH7751_ALL)) {
+ if (cpu->env.id & (SH_CPU_SH7751_ALL)) {
sh_intc_register_sources(&s->intc,
_INTC_ARRAY(vectors_pci),
_INTC_ARRAY(groups_pci));
}
- if (cpu->id & (SH_CPU_SH7750S | SH_CPU_SH7750R | SH_CPU_SH7751_ALL)) {
+ if (cpu->env.id & (SH_CPU_SH7750S | SH_CPU_SH7750R | SH_CPU_SH7751_ALL)) {
sh_intc_register_sources(&s->intc,
_INTC_ARRAY(vectors_irlm),
NULL, 0);
{
const char *cpu_model = args->cpu_model;
int ret;
- CPUSH4State *env;
+ SuperHCPU *cpu;
struct SH7750State *s;
MemoryRegion *sysmem = get_system_memory();
MemoryRegion *rom = g_new(MemoryRegion, 1);
cpu_model = "any";
printf("Initializing CPU\n");
- env = cpu_init(cpu_model);
+ cpu = cpu_sh4_init(cpu_model);
+ if (cpu == NULL) {
+ fprintf(stderr, "Unable to find CPU definition\n");
+ exit(1);
+ }
/* Allocate memory space */
printf("Allocating ROM\n");
}
/* Register peripherals */
- s = sh7750_init(env, sysmem);
+ s = sh7750_init(cpu, sysmem);
/* XXXXX Check success */
tc58128_init(s, "shix_linux_nand.bin", NULL);
fprintf(stderr, "initialization terminated\n");
+++ /dev/null
-#ifndef QEMU_AES_H
-#define QEMU_AES_H
-
-#define AES_MAXNR 14
-#define AES_BLOCK_SIZE 16
-
-struct aes_key_st {
- uint32_t rd_key[4 *(AES_MAXNR + 1)];
- int rounds;
-};
-typedef struct aes_key_st AES_KEY;
-
-int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
- AES_KEY *key);
-int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
- AES_KEY *key);
-
-void AES_encrypt(const unsigned char *in, unsigned char *out,
- const AES_KEY *key);
-void AES_decrypt(const unsigned char *in, unsigned char *out,
- const AES_KEY *key);
-void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
- const unsigned long length, const AES_KEY *key,
- unsigned char *ivec, const int enc);
-
-#endif
struct SH7750State;
struct MemoryRegion;
-struct SH7750State *sh7750_init(CPUSH4State * cpu, struct MemoryRegion *sysmem);
+struct SH7750State *sh7750_init(SuperHCPU *cpu, struct MemoryRegion *sysmem);
typedef struct {
/* The callback will be triggered if any of the designated lines change */
--- /dev/null
+#ifndef QEMU_AES_H
+#define QEMU_AES_H
+
+#define AES_MAXNR 14
+#define AES_BLOCK_SIZE 16
+
+struct aes_key_st {
+ uint32_t rd_key[4 *(AES_MAXNR + 1)];
+ int rounds;
+};
+typedef struct aes_key_st AES_KEY;
+
+int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
+ AES_KEY *key);
+int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
+ AES_KEY *key);
+
+void AES_encrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key);
+void AES_decrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key);
+void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
+ const unsigned long length, const AES_KEY *key,
+ unsigned char *ivec, const int enc);
+
+/*
+AES_Te0[x] = S [x].[02, 01, 01, 03];
+AES_Te1[x] = S [x].[03, 02, 01, 01];
+AES_Te2[x] = S [x].[01, 03, 02, 01];
+AES_Te3[x] = S [x].[01, 01, 03, 02];
+AES_Te4[x] = S [x].[01, 01, 01, 01];
+
+AES_Td0[x] = Si[x].[0e, 09, 0d, 0b];
+AES_Td1[x] = Si[x].[0b, 0e, 09, 0d];
+AES_Td2[x] = Si[x].[0d, 0b, 0e, 09];
+AES_Td3[x] = Si[x].[09, 0d, 0b, 0e];
+AES_Td4[x] = Si[x].[01, 01, 01, 01];
+*/
+
+extern const uint32_t AES_Te0[256], AES_Te1[256], AES_Te2[256],
+ AES_Te3[256], AES_Te4[256];
+extern const uint32_t AES_Td0[256], AES_Td1[256], AES_Td2[256],
+ AES_Td3[256], AES_Td4[256];
+
+#endif
#endif
#if defined(TARGET_NR_eventfd2)
case TARGET_NR_eventfd2:
- ret = get_errno(eventfd(arg1, arg2));
+ {
+ int host_flags = arg2 & (~(TARGET_O_NONBLOCK | TARGET_O_CLOEXEC));
+ if (arg2 & TARGET_O_NONBLOCK) {
+ host_flags |= O_NONBLOCK;
+ }
+ if (arg2 & TARGET_O_CLOEXEC) {
+ host_flags |= O_CLOEXEC;
+ }
+ ret = get_errno(eventfd(arg1, host_flags));
break;
+ }
#endif
#endif /* CONFIG_EVENTFD */
#if defined(CONFIG_FALLOCATE) && defined(TARGET_NR_fallocate)
CPUID_PSE36 (needed for Solaris) */
/* missing:
CPUID_VME, CPUID_DTS, CPUID_SS, CPUID_HT, CPUID_TM, CPUID_PBE */
-#define TCG_EXT_FEATURES (CPUID_EXT_SSE3 | CPUID_EXT_MONITOR | \
- CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | CPUID_EXT_SSE41 | \
- CPUID_EXT_SSE42 | CPUID_EXT_POPCNT | CPUID_EXT_MOVBE | \
- CPUID_EXT_HYPERVISOR)
+#define TCG_EXT_FEATURES (CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | \
+ CPUID_EXT_MONITOR | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | \
+ CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_POPCNT | \
+ CPUID_EXT_MOVBE | CPUID_EXT_AES | CPUID_EXT_HYPERVISOR)
/* missing:
- CPUID_EXT_PCLMULQDQ, CPUID_EXT_DTES64, CPUID_EXT_DSCPL,
- CPUID_EXT_VMX, CPUID_EXT_SMX, CPUID_EXT_EST, CPUID_EXT_TM2,
- CPUID_EXT_CID, CPUID_EXT_FMA, CPUID_EXT_XTPR, CPUID_EXT_PDCM,
- CPUID_EXT_PCID, CPUID_EXT_DCA, CPUID_EXT_X2APIC,
- CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_AES, CPUID_EXT_XSAVE,
+ CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_SMX,
+ CPUID_EXT_EST, CPUID_EXT_TM2, CPUID_EXT_CID, CPUID_EXT_FMA,
+ CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_PCID, CPUID_EXT_DCA,
+ CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_XSAVE,
CPUID_EXT_OSXSAVE, CPUID_EXT_AVX, CPUID_EXT_F16C,
CPUID_EXT_RDRAND */
#define TCG_EXT2_FEATURES ((TCG_FEATURES & CPUID_EXT2_AMD_ALIASES) | \
CPUID_DE | CPUID_FP87,
.ext_features = CPUID_EXT_AES | CPUID_EXT_POPCNT | CPUID_EXT_SSE42 |
CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 |
- CPUID_EXT_SSE3,
+ CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3,
.ext2_features = CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX,
.ext3_features = CPUID_EXT3_LAHF_LM,
.xlevel = 0x8000000A,
#include <math.h>
#include "cpu.h"
#include "helper.h"
+#include "qemu/aes.h"
#include "qemu/host-utils.h"
#if !defined(CONFIG_USER_ONLY)
return POPCOUNT(n, 5);
#endif
}
+
+void glue(helper_pclmulqdq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
+ uint32_t ctrl)
+{
+ uint64_t ah, al, b, resh, resl;
+
+ ah = 0;
+ al = d->Q((ctrl & 1) != 0);
+ b = s->Q((ctrl & 16) != 0);
+ resh = resl = 0;
+
+ while (b) {
+ if (b & 1) {
+ resl ^= al;
+ resh ^= ah;
+ }
+ ah = (ah << 1) | (al >> 63);
+ al <<= 1;
+ b >>= 1;
+ }
+
+ d->Q(0) = resl;
+ d->Q(1) = resh;
+}
+
+/* AES-NI op helpers */
+static const uint8_t aes_shifts[16] = {
+ 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11
+};
+
+static const uint8_t aes_ishifts[16] = {
+ 0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3
+};
+
+void glue(helper_aesdec, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+ int i;
+ Reg st = *d;
+ Reg rk = *s;
+
+ for (i = 0 ; i < 4 ; i++) {
+ d->L(i) = rk.L(i) ^ bswap32(AES_Td0[st.B(aes_ishifts[4*i+0])] ^
+ AES_Td1[st.B(aes_ishifts[4*i+1])] ^
+ AES_Td2[st.B(aes_ishifts[4*i+2])] ^
+ AES_Td3[st.B(aes_ishifts[4*i+3])]);
+ }
+}
+
+void glue(helper_aesdeclast, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+ int i;
+ Reg st = *d;
+ Reg rk = *s;
+
+ for (i = 0; i < 16; i++) {
+ d->B(i) = rk.B(i) ^ (AES_Td4[st.B(aes_ishifts[i])] & 0xff);
+ }
+}
+
+void glue(helper_aesenc, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+ int i;
+ Reg st = *d;
+ Reg rk = *s;
+
+ for (i = 0 ; i < 4 ; i++) {
+ d->L(i) = rk.L(i) ^ bswap32(AES_Te0[st.B(aes_shifts[4*i+0])] ^
+ AES_Te1[st.B(aes_shifts[4*i+1])] ^
+ AES_Te2[st.B(aes_shifts[4*i+2])] ^
+ AES_Te3[st.B(aes_shifts[4*i+3])]);
+ }
+}
+
+void glue(helper_aesenclast, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+ int i;
+ Reg st = *d;
+ Reg rk = *s;
+
+ for (i = 0; i < 16; i++) {
+ d->B(i) = rk.B(i) ^ (AES_Te4[st.B(aes_shifts[i])] & 0xff);
+ }
+
+}
+
+void glue(helper_aesimc, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+ int i;
+ Reg tmp = *s;
+
+ for (i = 0 ; i < 4 ; i++) {
+ d->L(i) = bswap32(AES_Td0[AES_Te4[tmp.B(4*i+0)] & 0xff] ^
+ AES_Td1[AES_Te4[tmp.B(4*i+1)] & 0xff] ^
+ AES_Td2[AES_Te4[tmp.B(4*i+2)] & 0xff] ^
+ AES_Td3[AES_Te4[tmp.B(4*i+3)] & 0xff]);
+ }
+}
+
+void glue(helper_aeskeygenassist, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
+ uint32_t ctrl)
+{
+ int i;
+ Reg tmp = *s;
+
+ for (i = 0 ; i < 4 ; i++) {
+ d->B(i) = AES_Te4[tmp.B(i + 4)] & 0xff;
+ d->B(i + 8) = AES_Te4[tmp.B(i + 12)] & 0xff;
+ }
+ d->L(1) = (d->L(0) << 24 | d->L(0) >> 8) ^ ctrl;
+ d->L(3) = (d->L(2) << 24 | d->L(2) >> 8) ^ ctrl;
+}
#endif
#undef SHIFT
DEF_HELPER_3(popcnt, tl, env, tl, i32)
#endif
+/* AES-NI op helpers */
+#if SHIFT == 1
+DEF_HELPER_3(glue(aesdec, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(aesdeclast, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(aesenc, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(aesenclast, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(aesimc, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_4(glue(aeskeygenassist, SUFFIX), void, env, Reg, Reg, i32)
+DEF_HELPER_4(glue(pclmulqdq, SUFFIX), void, env, Reg, Reg, i32)
+#endif
+
#undef SHIFT
#undef Reg
#undef SUFFIX
#define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
#define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
+#define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
+ CPUID_EXT_PCLMULQDQ }
+#define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
static const struct SSEOpHelper_epp sse_op_table6[256] = {
[0x00] = SSSE3_OP(pshufb),
[0x3f] = SSE41_OP(pmaxud),
[0x40] = SSE41_OP(pmulld),
[0x41] = SSE41_OP(phminposuw),
+ [0xdb] = AESNI_OP(aesimc),
+ [0xdc] = AESNI_OP(aesenc),
+ [0xdd] = AESNI_OP(aesenclast),
+ [0xde] = AESNI_OP(aesdec),
+ [0xdf] = AESNI_OP(aesdeclast),
};
static const struct SSEOpHelper_eppi sse_op_table7[256] = {
[0x40] = SSE41_OP(dpps),
[0x41] = SSE41_OP(dppd),
[0x42] = SSE41_OP(mpsadbw),
+ [0x44] = PCLMULQDQ_OP(pclmulqdq),
[0x60] = SSE42_OP(pcmpestrm),
[0x61] = SSE42_OP(pcmpestri),
[0x62] = SSE42_OP(pcmpistrm),
[0x63] = SSE42_OP(pcmpistri),
+ [0xdf] = AESNI_OP(aeskeygenassist),
};
static void gen_sse(CPUX86State *env, DisasContext *s, int b,
tlb_ex = d & TLB_EX;
tlb_wr = d & TLB_WR;
- /* Now lets see if there is a zone that overrides the protbits. */
+ /* Now let's see if there is a zone that overrides the protbits. */
tlb_zsel = (d >> 4) & 0xf;
t0 = mmu->regs[MMU_R_ZPR] >> (30 - (tlb_zsel * 2));
t0 &= 0x3;
cc->class_by_name = moxie_cpu_class_by_name;
- dc->vmsd = &vmstate_moxie_cpu;
+ cpu_class_set_vmsd(cc, &vmstate_moxie_cpu);
cc->do_interrupt = moxie_cpu_do_interrupt;
}
#define TARGET_HAS_ICE 1
-#define CPU_SAVE_VERSION 1
-
#define ELF_MACHINE 0xFEED /* EM_MOXIE */
#define MOXIE_EX_DIV0 0
const VMStateDescription vmstate_moxie_cpu = {
.name = "cpu",
- .version_id = CPU_SAVE_VERSION,
+ .version_id = 1,
.minimum_version_id = 1,
.minimum_version_id_old = 1,
.fields = (VMStateField[]) {
VMSTATE_END_OF_LIST()
}
};
-
-void cpu_save(QEMUFile *f, void *opaque)
-{
- vmstate_save_state(f, &vmstate_moxie_cpu, opaque);
-}
-
-int cpu_load(QEMUFile *f, void *opaque, int version_id)
-{
- return vmstate_load_state(f, &vmstate_moxie_cpu, opaque, version_id);
-}
env->retxl = str;
/* Lest we fail to service interrupts in a timely manner, limit the
- amount of work we're willing to do. For now, lets cap at 8k. */
+ amount of work we're willing to do. For now, let's cap at 8k. */
for (len = 0; len < 0x2000; ++len) {
if (str + len == end) {
/* Character not found. R1 & R2 are unmodified. */
s2 = fix_address(env, s2);
/* Lest we fail to service interrupts in a timely manner, limit the
- amount of work we're willing to do. For now, lets cap at 8k. */
+ amount of work we're willing to do. For now, let's cap at 8k. */
for (len = 0; len < 0x2000; ++len) {
uint8_t v1 = cpu_ldub_data(env, s1 + len);
uint8_t v2 = cpu_ldub_data(env, s2 + len);
s = fix_address(env, s);
/* Lest we fail to service interrupts in a timely manner, limit the
- amount of work we're willing to do. For now, lets cap at 8k. */
+ amount of work we're willing to do. For now, let's cap at 8k. */
for (len = 0; len < 0x2000; ++len) {
uint8_t v = cpu_ldub_data(env, s + len);
cpu_stb_data(env, d + len, v);
uint64_t cksm = (uint32_t)r1;
/* Lest we fail to service interrupts in a timely manner, limit the
- amount of work we're willing to do. For now, lets cap at 8k. */
+ amount of work we're willing to do. For now, let's cap at 8k. */
max_len = (src_len > 0x2000 ? 0x2000 : src_len);
/* Process full words as available. */
* THE SOFTWARE.
*/
+/* We only support generating code for 64-bit mode. */
+#if TCG_TARGET_REG_BITS != 64
+#error "unsupported code generation mode"
+#endif
+
/* ??? The translation blocks produced by TCG are generally small enough to
be entirely reachable with a 16-bit displacement. Leaving the option for
a 32-bit displacement here Just In Case. */
#define USE_LONG_BRANCHES 0
#define TCG_CT_CONST_32 0x0100
-#define TCG_CT_CONST_NEG 0x0200
-#define TCG_CT_CONST_ADDI 0x0400
#define TCG_CT_CONST_MULI 0x0800
-#define TCG_CT_CONST_ANDI 0x1000
#define TCG_CT_CONST_ORI 0x2000
#define TCG_CT_CONST_XORI 0x4000
#define TCG_CT_CONST_CMPI 0x8000
typedef enum S390Opcode {
RIL_AFI = 0xc209,
RIL_AGFI = 0xc208,
+ RIL_ALFI = 0xc20b,
RIL_ALGFI = 0xc20a,
RIL_BRASL = 0xc005,
RIL_BRCL = 0xc004,
RIL_NILF = 0xc00b,
RIL_OIHF = 0xc00c,
RIL_OILF = 0xc00d,
+ RIL_SLFI = 0xc205,
+ RIL_SLGFI = 0xc204,
RIL_XIHF = 0xc006,
RIL_XILF = 0xc007,
RIE_CLGIJ = 0xec7d,
RIE_CLRJ = 0xec77,
RIE_CRJ = 0xec76,
+ RIE_RISBG = 0xec55,
RRE_AGR = 0xb908,
+ RRE_ALGR = 0xb90a,
+ RRE_ALCR = 0xb998,
+ RRE_ALCGR = 0xb988,
RRE_CGR = 0xb920,
RRE_CLGR = 0xb921,
RRE_DLGR = 0xb987,
RRE_LRVR = 0xb91f,
RRE_LRVGR = 0xb90f,
RRE_LTGR = 0xb902,
+ RRE_MLGR = 0xb986,
RRE_MSGR = 0xb90c,
RRE_MSR = 0xb252,
RRE_NGR = 0xb980,
RRE_OGR = 0xb981,
RRE_SGR = 0xb909,
+ RRE_SLGR = 0xb90b,
+ RRE_SLBR = 0xb999,
+ RRE_SLBGR = 0xb989,
RRE_XGR = 0xb982,
+ RRF_LOCR = 0xb9f2,
+ RRF_LOCGR = 0xb9e2,
+
RR_AR = 0x1a,
+ RR_ALR = 0x1e,
RR_BASR = 0x0d,
RR_BCR = 0x07,
RR_CLR = 0x15,
RR_NR = 0x14,
RR_OR = 0x16,
RR_SR = 0x1b,
+ RR_SLR = 0x1f,
RR_XR = 0x17,
RSY_RLL = 0xeb1d,
RXY_AY = 0xe35a,
RXY_CG = 0xe320,
RXY_CY = 0xe359,
+ RXY_LAY = 0xe371,
RXY_LB = 0xe376,
RXY_LG = 0xe304,
RXY_LGB = 0xe377,
RX_A = 0x5a,
RX_C = 0x59,
RX_L = 0x58,
+ RX_LA = 0x41,
RX_LH = 0x48,
RX_ST = 0x50,
RX_STC = 0x42,
static const int tcg_target_call_oarg_regs[] = {
TCG_REG_R2,
-#if TCG_TARGET_REG_BITS == 32
- TCG_REG_R3
-#endif
};
#define S390_CC_EQ 8
#define FACILITY_LONG_DISP (1ULL << (63 - 18))
#define FACILITY_EXT_IMM (1ULL << (63 - 21))
#define FACILITY_GEN_INST_EXT (1ULL << (63 - 34))
+#define FACILITY_LOAD_ON_COND (1ULL << (63 - 45))
static uint64_t facilities;
tcg_regset_set32(ct->u.regs, 0, 0xffff);
tcg_regset_reset_reg (ct->u.regs, TCG_REG_R2);
tcg_regset_reset_reg (ct->u.regs, TCG_REG_R3);
+ tcg_regset_reset_reg (ct->u.regs, TCG_REG_R4);
break;
case 'a': /* force R2 for division */
ct->ct |= TCG_CT_REG;
tcg_regset_clear(ct->u.regs);
tcg_regset_set_reg(ct->u.regs, TCG_REG_R3);
break;
- case 'N': /* force immediate negate */
- ct->ct |= TCG_CT_CONST_NEG;
- break;
case 'W': /* force 32-bit ("word") immediate */
ct->ct |= TCG_CT_CONST_32;
break;
- case 'I':
- ct->ct |= TCG_CT_CONST_ADDI;
- break;
case 'K':
ct->ct |= TCG_CT_CONST_MULI;
break;
- case 'A':
- ct->ct |= TCG_CT_CONST_ANDI;
- break;
case 'O':
ct->ct |= TCG_CT_CONST_ORI;
break;
return 0;
}
-/* Immediates to be used with logical AND. This is an optimization only,
- since a full 64-bit immediate AND can always be performed with 4 sequential
- NI[LH][LH] instructions. What we're looking for is immediates that we
- can load efficiently, and the immediate load plus the reg-reg AND is
- smaller than the sequential NI's. */
-
-static int tcg_match_andi(int ct, tcg_target_ulong val)
-{
- int i;
-
- if (facilities & FACILITY_EXT_IMM) {
- if (ct & TCG_CT_CONST_32) {
- /* All 32-bit ANDs can be performed with 1 48-bit insn. */
- return 1;
- }
-
- /* Zero-extensions. */
- if (val == 0xff || val == 0xffff || val == 0xffffffff) {
- return 1;
- }
- } else {
- if (ct & TCG_CT_CONST_32) {
- val = (uint32_t)val;
- } else if (val == 0xffffffff) {
- return 1;
- }
- }
-
- /* Try all 32-bit insns that can perform it in one go. */
- for (i = 0; i < 4; i++) {
- tcg_target_ulong mask = ~(0xffffull << i*16);
- if ((val & mask) == mask) {
- return 1;
- }
- }
-
- /* Look for 16-bit values performing the mask. These are better
- to load with LLI[LH][LH]. */
- for (i = 0; i < 4; i++) {
- tcg_target_ulong mask = 0xffffull << i*16;
- if ((val & mask) == val) {
- return 0;
- }
- }
-
- /* Look for 32-bit values performing the 64-bit mask. These
- are better to load with LLI[LH]F, or if extended immediates
- not available, with a pair of LLI insns. */
- if ((ct & TCG_CT_CONST_32) == 0) {
- if (val <= 0xffffffff || (val & 0xffffffff) == 0) {
- return 0;
- }
- }
-
- return 1;
-}
-
/* Immediates to be used with logical OR. This is an optimization only,
since a full 64-bit immediate OR can always be performed with 4 sequential
OI[LH][LH] instructions. What we're looking for is immediates that we
}
/* Handle the modifiers. */
- if (ct & TCG_CT_CONST_NEG) {
- val = -val;
- }
if (ct & TCG_CT_CONST_32) {
val = (int32_t)val;
}
/* The following are mutually exclusive. */
- if (ct & TCG_CT_CONST_ADDI) {
- /* Immediates that may be used with add. If we have the
- extended-immediates facility then we have ADD IMMEDIATE
- with signed and unsigned 32-bit, otherwise we have only
- ADD HALFWORD IMMEDIATE with a signed 16-bit. */
- if (facilities & FACILITY_EXT_IMM) {
- return val == (int32_t)val || val == (uint32_t)val;
- } else {
- return val == (int16_t)val;
- }
- } else if (ct & TCG_CT_CONST_MULI) {
+ if (ct & TCG_CT_CONST_MULI) {
/* Immediates that may be used with multiply. If we have the
general-instruction-extensions, then we have MULTIPLY SINGLE
IMMEDIATE with a signed 32-bit, otherwise we have only
} else {
return val == (int16_t)val;
}
- } else if (ct & TCG_CT_CONST_ANDI) {
- return tcg_match_andi(ct, val);
} else if (ct & TCG_CT_CONST_ORI) {
return tcg_match_ori(ct, val);
} else if (ct & TCG_CT_CONST_XORI) {
tcg_out32(s, (op << 16) | (r1 << 4) | r2);
}
+static void tcg_out_insn_RRF(TCGContext *s, S390Opcode op,
+ TCGReg r1, TCGReg r2, int m3)
+{
+ tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2);
+}
+
static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
{
tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff));
/* If we get here, both the high and low parts have non-zero bits. */
/* Recurse to load the lower 32-bits. */
- tcg_out_movi(s, TCG_TYPE_I32, ret, sval);
+ tcg_out_movi(s, TCG_TYPE_I64, ret, uval & 0xffffffff);
/* Insert data into the high 32-bits. */
uval = uval >> 31 >> 1;
tcg_target_long ofs)
{
if (ofs < -0x80000 || ofs >= 0x80000) {
- /* Combine the low 16 bits of the offset with the actual load insn;
- the high 48 bits must come from an immediate load. */
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs & ~0xffff);
- ofs &= 0xffff;
+ /* Combine the low 20 bits of the offset with the actual load insn;
+ the high 44 bits must come from an immediate load. */
+ tcg_target_long low = ((ofs & 0xfffff) ^ 0x80000) - 0x80000;
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - low);
+ ofs = low;
/* If we were already given an index register, add it in. */
if (index != TCG_REG_NONE) {
tcg_out_ld(s, type, dest, dest, addr & 0xffff);
}
+static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
+ int msb, int lsb, int ofs, int z)
+{
+ /* Format RIE-f */
+ tcg_out16(s, (RIE_RISBG & 0xff00) | (dest << 4) | src);
+ tcg_out16(s, (msb << 8) | (z << 7) | lsb);
+ tcg_out16(s, (ofs << 8) | (RIE_RISBG & 0xff));
+}
+
static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
{
if (facilities & FACILITY_EXT_IMM) {
tcg_out_insn(s, RRE, LLGFR, dest, src);
}
-static inline void tgen32_addi(TCGContext *s, TCGReg dest, int32_t val)
+/* Accept bit patterns like these:
+ 0....01....1
+ 1....10....0
+ 1..10..01..1
+ 0..01..10..0
+ Copied from gcc sources. */
+static inline bool risbg_mask(uint64_t c)
{
- if (val == (int16_t)val) {
- tcg_out_insn(s, RI, AHI, dest, val);
- } else {
- tcg_out_insn(s, RIL, AFI, dest, val);
- }
-}
-
-static inline void tgen64_addi(TCGContext *s, TCGReg dest, int64_t val)
-{
- if (val == (int16_t)val) {
- tcg_out_insn(s, RI, AGHI, dest, val);
- } else if (val == (int32_t)val) {
- tcg_out_insn(s, RIL, AGFI, dest, val);
- } else if (val == (uint32_t)val) {
- tcg_out_insn(s, RIL, ALGFI, dest, val);
- } else {
- tcg_abort();
- }
-
+ uint64_t lsb;
+ /* We don't change the number of transitions by inverting,
+ so make sure we start with the LSB zero. */
+ if (c & 1) {
+ c = ~c;
+ }
+ /* Reject all zeros or all ones. */
+ if (c == 0) {
+ return false;
+ }
+ /* Find the first transition. */
+ lsb = c & -c;
+ /* Invert to look for a second transition. */
+ c = ~c;
+ /* Erase the first transition. */
+ c &= -lsb;
+ /* Find the second transition, if any. */
+ lsb = c & -c;
+ /* Match if all the bits are 1's, or if c is zero. */
+ return c == -lsb;
}
-static void tgen64_andi(TCGContext *s, TCGReg dest, tcg_target_ulong val)
+static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
{
static const S390Opcode ni_insns[4] = {
RI_NILL, RI_NILH, RI_NIHL, RI_NIHH
static const S390Opcode nif_insns[2] = {
RIL_NILF, RIL_NIHF
};
-
+ uint64_t valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull);
int i;
- /* Look for no-op. */
- if (val == -1) {
- return;
- }
-
/* Look for the zero-extensions. */
- if (val == 0xffffffff) {
+ if ((val & valid) == 0xffffffff) {
tgen_ext32u(s, dest, dest);
return;
}
-
if (facilities & FACILITY_EXT_IMM) {
- if (val == 0xff) {
+ if ((val & valid) == 0xff) {
tgen_ext8u(s, TCG_TYPE_I64, dest, dest);
return;
}
- if (val == 0xffff) {
+ if ((val & valid) == 0xffff) {
tgen_ext16u(s, TCG_TYPE_I64, dest, dest);
return;
}
+ }
- /* Try all 32-bit insns that can perform it in one go. */
- for (i = 0; i < 4; i++) {
- tcg_target_ulong mask = ~(0xffffull << i*16);
- if ((val & mask) == mask) {
- tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
- return;
- }
+ /* Try all 32-bit insns that can perform it in one go. */
+ for (i = 0; i < 4; i++) {
+ tcg_target_ulong mask = ~(0xffffull << i*16);
+ if (((val | ~valid) & mask) == mask) {
+ tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
+ return;
}
+ }
- /* Try all 48-bit insns that can perform it in one go. */
- if (facilities & FACILITY_EXT_IMM) {
- for (i = 0; i < 2; i++) {
- tcg_target_ulong mask = ~(0xffffffffull << i*32);
- if ((val & mask) == mask) {
- tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
- return;
- }
+ /* Try all 48-bit insns that can perform it in one go. */
+ if (facilities & FACILITY_EXT_IMM) {
+ for (i = 0; i < 2; i++) {
+ tcg_target_ulong mask = ~(0xffffffffull << i*32);
+ if (((val | ~valid) & mask) == mask) {
+ tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
+ return;
}
}
+ }
+ if ((facilities & FACILITY_GEN_INST_EXT) && risbg_mask(val)) {
+ int msb, lsb;
+ if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
+ /* Achieve wraparound by swapping msb and lsb. */
+ msb = 63 - ctz64(~val);
+ lsb = clz64(~val) + 1;
+ } else {
+ msb = clz64(val);
+ lsb = 63 - ctz64(val);
+ }
+ tcg_out_risbg(s, dest, dest, msb, lsb, 0, 1);
+ return;
+ }
- /* Perform the AND via sequential modifications to the high and low
- parts. Do this via recursion to handle 16-bit vs 32-bit masks in
- each half. */
- tgen64_andi(s, dest, val | 0xffffffff00000000ull);
- tgen64_andi(s, dest, val | 0x00000000ffffffffull);
+ /* Fall back to loading the constant. */
+ tcg_out_movi(s, type, TCG_TMP0, val);
+ if (type == TCG_TYPE_I32) {
+ tcg_out_insn(s, RR, NR, dest, TCG_TMP0);
} else {
- /* With no extended-immediate facility, just emit the sequence. */
- for (i = 0; i < 4; i++) {
- tcg_target_ulong mask = 0xffffull << i*16;
- if ((val & mask) != mask) {
- tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
- }
- }
+ tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0);
}
}
}
static void tgen_setcond(TCGContext *s, TCGType type, TCGCond c,
- TCGReg dest, TCGReg r1, TCGArg c2, int c2const)
+ TCGReg dest, TCGReg c1, TCGArg c2, int c2const)
{
- int cc = tgen_cmp(s, type, c, r1, c2, c2const);
+ int cc = tgen_cmp(s, type, c, c1, c2, c2const);
/* Emit: r1 = 1; if (cc) goto over; r1 = 0; over: */
tcg_out_movi(s, type, dest, 1);
tcg_out_movi(s, type, dest, 0);
}
+static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
+ TCGReg c1, TCGArg c2, int c2const, TCGReg r3)
+{
+ int cc;
+ if (facilities & FACILITY_LOAD_ON_COND) {
+ cc = tgen_cmp(s, type, c, c1, c2, c2const);
+ tcg_out_insn(s, RRF, LOCGR, dest, r3, cc);
+ } else {
+ c = tcg_invert_cond(c);
+ cc = tgen_cmp(s, type, c, c1, c2, c2const);
+
+ /* Emit: if (cc) goto over; dest = r3; over: */
+ tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
+ tcg_out_insn(s, RRE, LGR, dest, r3);
+ }
+}
+
+bool tcg_target_deposit_valid(int ofs, int len)
+{
+ return (facilities & FACILITY_GEN_INST_EXT) != 0;
+}
+
+static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
+ int ofs, int len)
+{
+ int lsb = (63 - ofs);
+ int msb = lsb - (len - 1);
+ tcg_out_risbg(s, dest, src, msb, lsb, ofs, 0);
+}
+
static void tgen_gotoi(TCGContext *s, int cc, tcg_target_long dest)
{
tcg_target_long off = (dest - (tcg_target_long)s->code_ptr) >> 1;
int cc;
if (facilities & FACILITY_GEN_INST_EXT) {
- bool is_unsigned = (c > TCG_COND_GT);
+ bool is_unsigned = is_unsigned_cond(c);
bool in_range;
S390Opcode opc;
}
#if defined(CONFIG_SOFTMMU)
-static void tgen64_andi_tmp(TCGContext *s, TCGReg dest, tcg_target_ulong val)
+static TCGReg tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
+ TCGReg addr_reg, int mem_index, int opc,
+ uint16_t **label2_ptr_p, int is_store)
{
- if (tcg_match_andi(0, val)) {
- tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, val);
- tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0);
- } else {
- tgen64_andi(s, dest, val);
- }
-}
-
-static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
- TCGReg addr_reg, int mem_index, int opc,
- uint16_t **label2_ptr_p, int is_store)
-{
- const TCGReg arg0 = TCG_REG_R2;
- const TCGReg arg1 = TCG_REG_R3;
+ const TCGReg arg0 = tcg_target_call_iarg_regs[0];
+ const TCGReg arg1 = tcg_target_call_iarg_regs[1];
+ const TCGReg arg2 = tcg_target_call_iarg_regs[2];
+ const TCGReg arg3 = tcg_target_call_iarg_regs[3];
int s_bits = opc & 3;
uint16_t *label1_ptr;
tcg_target_long ofs;
if (TARGET_LONG_BITS == 32) {
- tgen_ext32u(s, arg0, addr_reg);
+ tgen_ext32u(s, arg1, addr_reg);
} else {
- tcg_out_mov(s, TCG_TYPE_I64, arg0, addr_reg);
+ tcg_out_mov(s, TCG_TYPE_I64, arg1, addr_reg);
}
- tcg_out_sh64(s, RSY_SRLG, arg1, addr_reg, TCG_REG_NONE,
+ tcg_out_sh64(s, RSY_SRLG, arg2, addr_reg, TCG_REG_NONE,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
- tgen64_andi_tmp(s, arg0, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
- tgen64_andi_tmp(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
+ tgen_andi(s, TCG_TYPE_I64, arg1, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
+ tgen_andi(s, TCG_TYPE_I64, arg2, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
if (is_store) {
ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
assert(ofs < 0x80000);
if (TARGET_LONG_BITS == 32) {
- tcg_out_mem(s, RX_C, RXY_CY, arg0, arg1, TCG_AREG0, ofs);
+ tcg_out_mem(s, RX_C, RXY_CY, arg1, arg2, TCG_AREG0, ofs);
} else {
- tcg_out_mem(s, 0, RXY_CG, arg0, arg1, TCG_AREG0, ofs);
+ tcg_out_mem(s, 0, RXY_CG, arg1, arg2, TCG_AREG0, ofs);
}
if (TARGET_LONG_BITS == 32) {
- tgen_ext32u(s, arg0, addr_reg);
+ tgen_ext32u(s, arg1, addr_reg);
} else {
- tcg_out_mov(s, TCG_TYPE_I64, arg0, addr_reg);
+ tcg_out_mov(s, TCG_TYPE_I64, arg1, addr_reg);
}
label1_ptr = (uint16_t*)s->code_ptr;
for the calling convention. */
switch (opc) {
case LD_UINT8:
- tgen_ext8u(s, TCG_TYPE_I64, arg1, data_reg);
+ tgen_ext8u(s, TCG_TYPE_I64, arg2, data_reg);
break;
case LD_UINT16:
- tgen_ext16u(s, TCG_TYPE_I64, arg1, data_reg);
+ tgen_ext16u(s, TCG_TYPE_I64, arg2, data_reg);
break;
case LD_UINT32:
- tgen_ext32u(s, arg1, data_reg);
+ tgen_ext32u(s, arg2, data_reg);
break;
case LD_UINT64:
- tcg_out_mov(s, TCG_TYPE_I64, arg1, data_reg);
+ tcg_out_mov(s, TCG_TYPE_I64, arg2, data_reg);
break;
default:
tcg_abort();
}
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, mem_index);
- /* XXX/FIXME: suboptimal */
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
- tcg_target_call_iarg_regs[2]);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
- tcg_target_call_iarg_regs[1]);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
- tcg_target_call_iarg_regs[0]);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
- TCG_AREG0);
+ tcg_out_movi(s, TCG_TYPE_I32, arg3, mem_index);
+ tcg_out_mov(s, TCG_TYPE_I64, arg0, TCG_AREG0);
tgen_calli(s, (tcg_target_ulong)qemu_st_helpers[s_bits]);
} else {
- tcg_out_movi(s, TCG_TYPE_I32, arg1, mem_index);
- /* XXX/FIXME: suboptimal */
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
- tcg_target_call_iarg_regs[1]);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
- tcg_target_call_iarg_regs[0]);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
- TCG_AREG0);
+ tcg_out_movi(s, TCG_TYPE_I32, arg2, mem_index);
+ tcg_out_mov(s, TCG_TYPE_I64, arg0, TCG_AREG0);
tgen_calli(s, (tcg_target_ulong)qemu_ld_helpers[s_bits]);
/* sign extension */
switch (opc) {
case LD_INT8:
- tgen_ext8s(s, TCG_TYPE_I64, data_reg, arg0);
+ tgen_ext8s(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
break;
case LD_INT16:
- tgen_ext16s(s, TCG_TYPE_I64, data_reg, arg0);
+ tgen_ext16s(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
break;
case LD_INT32:
- tgen_ext32s(s, data_reg, arg0);
+ tgen_ext32s(s, data_reg, TCG_REG_R2);
break;
default:
/* unsigned -> just copy */
- tcg_out_mov(s, TCG_TYPE_I64, data_reg, arg0);
+ tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
break;
}
}
ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
assert(ofs < 0x80000);
- tcg_out_mem(s, 0, RXY_AG, arg0, arg1, TCG_AREG0, ofs);
+ tcg_out_mem(s, 0, RXY_AG, arg1, arg2, TCG_AREG0, ofs);
+
+ return arg1;
}
static void tcg_finish_qemu_ldst(TCGContext* s, uint16_t *label2_ptr)
#if defined(CONFIG_SOFTMMU)
mem_index = *args;
- tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index,
- opc, &label2_ptr, 0);
+ addr_reg = tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index,
+ opc, &label2_ptr, 0);
- tcg_out_qemu_ld_direct(s, opc, data_reg, TCG_REG_R2, TCG_REG_NONE, 0);
+ tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, TCG_REG_NONE, 0);
tcg_finish_qemu_ldst(s, label2_ptr);
#else
#if defined(CONFIG_SOFTMMU)
mem_index = *args;
- tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index,
- opc, &label2_ptr, 1);
+ addr_reg = tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index,
+ opc, &label2_ptr, 1);
- tcg_out_qemu_st_direct(s, opc, data_reg, TCG_REG_R2, TCG_REG_NONE, 0);
+ tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, TCG_REG_NONE, 0);
tcg_finish_qemu_ldst(s, label2_ptr);
#else
#endif
}
-#if TCG_TARGET_REG_BITS == 64
# define OP_32_64(x) \
case glue(glue(INDEX_op_,x),_i32): \
case glue(glue(INDEX_op_,x),_i64)
-#else
-# define OP_32_64(x) \
- case glue(glue(INDEX_op_,x),_i32)
-#endif
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
const TCGArg *args, const int *const_args)
{
S390Opcode op;
+ TCGArg a0, a1, a2;
switch (opc) {
case INDEX_op_exit_tb:
break;
case INDEX_op_add_i32:
+ a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
if (const_args[2]) {
- tgen32_addi(s, args[0], args[2]);
+ do_addi_32:
+ if (a0 == a1) {
+ if (a2 == (int16_t)a2) {
+ tcg_out_insn(s, RI, AHI, a0, a2);
+ break;
+ }
+ if (facilities & FACILITY_EXT_IMM) {
+ tcg_out_insn(s, RIL, AFI, a0, a2);
+ break;
+ }
+ }
+ tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
+ } else if (a0 == a1) {
+ tcg_out_insn(s, RR, AR, a0, a2);
} else {
- tcg_out_insn(s, RR, AR, args[0], args[2]);
+ tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
}
break;
case INDEX_op_sub_i32:
+ a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
if (const_args[2]) {
- tgen32_addi(s, args[0], -args[2]);
- } else {
- tcg_out_insn(s, RR, SR, args[0], args[2]);
+ a2 = -a2;
+ goto do_addi_32;
}
+ tcg_out_insn(s, RR, SR, args[0], args[2]);
break;
case INDEX_op_and_i32:
if (const_args[2]) {
- tgen64_andi(s, args[0], args[2] | 0xffffffff00000000ull);
+ tgen_andi(s, TCG_TYPE_I32, args[0], args[2]);
} else {
tcg_out_insn(s, RR, NR, args[0], args[2]);
}
tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
break;
+ case INDEX_op_add2_i32:
+ /* ??? Make use of ALFI. */
+ tcg_out_insn(s, RR, ALR, args[0], args[4]);
+ tcg_out_insn(s, RRE, ALCR, args[1], args[5]);
+ break;
+ case INDEX_op_sub2_i32:
+ /* ??? Make use of SLFI. */
+ tcg_out_insn(s, RR, SLR, args[0], args[4]);
+ tcg_out_insn(s, RRE, SLBR, args[1], args[5]);
+ break;
+
case INDEX_op_br:
tgen_branch(s, S390_CC_ALWAYS, args[0]);
break;
tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1],
args[2], const_args[2]);
break;
+ case INDEX_op_movcond_i32:
+ tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1],
+ args[2], const_args[2], args[3]);
+ break;
case INDEX_op_qemu_ld8u:
tcg_out_qemu_ld(s, args, LD_UINT8);
tcg_out_qemu_st(s, args, LD_UINT64);
break;
-#if TCG_TARGET_REG_BITS == 64
case INDEX_op_mov_i64:
tcg_out_mov(s, TCG_TYPE_I64, args[0], args[1]);
break;
break;
case INDEX_op_add_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
- tgen64_addi(s, args[0], args[2]);
+ do_addi_64:
+ if (a0 == a1) {
+ if (a2 == (int16_t)a2) {
+ tcg_out_insn(s, RI, AGHI, a0, a2);
+ break;
+ }
+ if (facilities & FACILITY_EXT_IMM) {
+ if (a2 == (int32_t)a2) {
+ tcg_out_insn(s, RIL, AGFI, a0, a2);
+ break;
+ } else if (a2 == (uint32_t)a2) {
+ tcg_out_insn(s, RIL, ALGFI, a0, a2);
+ break;
+ } else if (-a2 == (uint32_t)-a2) {
+ tcg_out_insn(s, RIL, SLGFI, a0, -a2);
+ break;
+ }
+ }
+ }
+ tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
+ } else if (a0 == a1) {
+ tcg_out_insn(s, RRE, AGR, a0, a2);
} else {
- tcg_out_insn(s, RRE, AGR, args[0], args[2]);
+ tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
}
break;
case INDEX_op_sub_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
- tgen64_addi(s, args[0], -args[2]);
+ a2 = -a2;
+ goto do_addi_64;
} else {
tcg_out_insn(s, RRE, SGR, args[0], args[2]);
}
case INDEX_op_and_i64:
if (const_args[2]) {
- tgen64_andi(s, args[0], args[2]);
+ tgen_andi(s, TCG_TYPE_I64, args[0], args[2]);
} else {
tcg_out_insn(s, RRE, NGR, args[0], args[2]);
}
case INDEX_op_divu2_i64:
tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, args[4]);
break;
+ case INDEX_op_mulu2_i64:
+ tcg_out_insn(s, RRE, MLGR, TCG_REG_R2, args[3]);
+ break;
case INDEX_op_shl_i64:
op = RSY_SLLG;
tgen_ext32u(s, args[0], args[1]);
break;
+ case INDEX_op_add2_i64:
+ /* ??? Make use of ALGFI and SLGFI. */
+ tcg_out_insn(s, RRE, ALGR, args[0], args[4]);
+ tcg_out_insn(s, RRE, ALCGR, args[1], args[5]);
+ break;
+ case INDEX_op_sub2_i64:
+ /* ??? Make use of ALGFI and SLGFI. */
+ tcg_out_insn(s, RRE, SLGR, args[0], args[4]);
+ tcg_out_insn(s, RRE, SLBGR, args[1], args[5]);
+ break;
+
case INDEX_op_brcond_i64:
tgen_brcond(s, TCG_TYPE_I64, args[2], args[0],
args[1], const_args[1], args[3]);
tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1],
args[2], const_args[2]);
break;
+ case INDEX_op_movcond_i64:
+ tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1],
+ args[2], const_args[2], args[3]);
+ break;
case INDEX_op_qemu_ld32u:
tcg_out_qemu_ld(s, args, LD_UINT32);
case INDEX_op_qemu_ld32s:
tcg_out_qemu_ld(s, args, LD_INT32);
break;
-#endif /* TCG_TARGET_REG_BITS == 64 */
+
+ OP_32_64(deposit):
+ tgen_deposit(s, args[0], args[2], args[3], args[4]);
+ break;
default:
fprintf(stderr,"unimplemented opc 0x%x\n",opc);
{ INDEX_op_st16_i32, { "r", "r" } },
{ INDEX_op_st_i32, { "r", "r" } },
- { INDEX_op_add_i32, { "r", "0", "rWI" } },
- { INDEX_op_sub_i32, { "r", "0", "rWNI" } },
+ { INDEX_op_add_i32, { "r", "r", "ri" } },
+ { INDEX_op_sub_i32, { "r", "0", "ri" } },
{ INDEX_op_mul_i32, { "r", "0", "rK" } },
{ INDEX_op_div2_i32, { "b", "a", "0", "1", "r" } },
{ INDEX_op_divu2_i32, { "b", "a", "0", "1", "r" } },
- { INDEX_op_and_i32, { "r", "0", "rWA" } },
+ { INDEX_op_and_i32, { "r", "0", "ri" } },
{ INDEX_op_or_i32, { "r", "0", "rWO" } },
{ INDEX_op_xor_i32, { "r", "0", "rWX" } },
{ INDEX_op_bswap16_i32, { "r", "r" } },
{ INDEX_op_bswap32_i32, { "r", "r" } },
+ { INDEX_op_add2_i32, { "r", "r", "0", "1", "r", "r" } },
+ { INDEX_op_sub2_i32, { "r", "r", "0", "1", "r", "r" } },
+
{ INDEX_op_brcond_i32, { "r", "rWC" } },
{ INDEX_op_setcond_i32, { "r", "r", "rWC" } },
+ { INDEX_op_movcond_i32, { "r", "r", "rWC", "r", "0" } },
+ { INDEX_op_deposit_i32, { "r", "0", "r" } },
{ INDEX_op_qemu_ld8u, { "r", "L" } },
{ INDEX_op_qemu_ld8s, { "r", "L" } },
{ INDEX_op_qemu_st32, { "L", "L" } },
{ INDEX_op_qemu_st64, { "L", "L" } },
-#if defined(__s390x__)
{ INDEX_op_mov_i64, { "r", "r" } },
{ INDEX_op_movi_i64, { "r" } },
{ INDEX_op_st32_i64, { "r", "r" } },
{ INDEX_op_st_i64, { "r", "r" } },
- { INDEX_op_add_i64, { "r", "0", "rI" } },
- { INDEX_op_sub_i64, { "r", "0", "rNI" } },
+ { INDEX_op_add_i64, { "r", "r", "ri" } },
+ { INDEX_op_sub_i64, { "r", "0", "ri" } },
{ INDEX_op_mul_i64, { "r", "0", "rK" } },
{ INDEX_op_div2_i64, { "b", "a", "0", "1", "r" } },
{ INDEX_op_divu2_i64, { "b", "a", "0", "1", "r" } },
+ { INDEX_op_mulu2_i64, { "b", "a", "0", "r" } },
- { INDEX_op_and_i64, { "r", "0", "rA" } },
+ { INDEX_op_and_i64, { "r", "0", "ri" } },
{ INDEX_op_or_i64, { "r", "0", "rO" } },
{ INDEX_op_xor_i64, { "r", "0", "rX" } },
{ INDEX_op_bswap32_i64, { "r", "r" } },
{ INDEX_op_bswap64_i64, { "r", "r" } },
+ { INDEX_op_add2_i64, { "r", "r", "0", "1", "r", "r" } },
+ { INDEX_op_sub2_i64, { "r", "r", "0", "1", "r", "r" } },
+
{ INDEX_op_brcond_i64, { "r", "rC" } },
{ INDEX_op_setcond_i64, { "r", "r", "rC" } },
+ { INDEX_op_movcond_i64, { "r", "r", "rC", "r", "0" } },
+ { INDEX_op_deposit_i64, { "r", "0", "r" } },
{ INDEX_op_qemu_ld32u, { "r", "L" } },
{ INDEX_op_qemu_ld32s, { "r", "L" } },
-#endif
{ -1 },
};
tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
tcg_add_target_add_op_defs(s390_op_defs);
- tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf),
- CPU_TEMP_BUF_NLONGS * sizeof(long));
}
static void tcg_target_qemu_prologue(TCGContext *s)
{
+ tcg_target_long frame_size;
+
/* stmg %r6,%r15,48(%r15) (save registers) */
tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48);
- /* aghi %r15,-160 (stack frame) */
- tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -160);
+ /* aghi %r15,-frame_size */
+ frame_size = TCG_TARGET_CALL_STACK_OFFSET;
+ frame_size += TCG_STATIC_CALL_ARGS_SIZE;
+ frame_size += CPU_TEMP_BUF_NLONGS * sizeof(long);
+ tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -frame_size);
+
+ tcg_set_frame(s, TCG_REG_CALL_STACK,
+ TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET,
+ CPU_TEMP_BUF_NLONGS * sizeof(long));
if (GUEST_BASE >= 0x80000) {
tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE);
tb_ret_addr = s->code_ptr;
- /* lmg %r6,%r15,208(%r15) (restore registers) */
- tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 208);
+ /* lmg %r6,%r15,fs+48(%r15) (restore registers) */
+ tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15,
+ frame_size + 48);
/* br %r14 (return) */
tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14);
#define TCG_TARGET_HAS_eqv_i32 0
#define TCG_TARGET_HAS_nand_i32 0
#define TCG_TARGET_HAS_nor_i32 0
-#define TCG_TARGET_HAS_deposit_i32 0
-#define TCG_TARGET_HAS_movcond_i32 0
-#define TCG_TARGET_HAS_add2_i32 0
-#define TCG_TARGET_HAS_sub2_i32 0
+#define TCG_TARGET_HAS_deposit_i32 1
+#define TCG_TARGET_HAS_movcond_i32 1
+#define TCG_TARGET_HAS_add2_i32 1
+#define TCG_TARGET_HAS_sub2_i32 1
#define TCG_TARGET_HAS_mulu2_i32 0
#define TCG_TARGET_HAS_muls2_i32 0
-#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_div2_i64 1
#define TCG_TARGET_HAS_rot_i64 1
#define TCG_TARGET_HAS_ext8s_i64 1
#define TCG_TARGET_HAS_eqv_i64 0
#define TCG_TARGET_HAS_nand_i64 0
#define TCG_TARGET_HAS_nor_i64 0
-#define TCG_TARGET_HAS_deposit_i64 0
-#define TCG_TARGET_HAS_movcond_i64 0
-#define TCG_TARGET_HAS_add2_i64 0
-#define TCG_TARGET_HAS_sub2_i64 0
-#define TCG_TARGET_HAS_mulu2_i64 0
+#define TCG_TARGET_HAS_deposit_i64 1
+#define TCG_TARGET_HAS_movcond_i64 1
+#define TCG_TARGET_HAS_add2_i64 1
+#define TCG_TARGET_HAS_sub2_i64 1
+#define TCG_TARGET_HAS_mulu2_i64 1
#define TCG_TARGET_HAS_muls2_i64 0
-#endif
+
+extern bool tcg_target_deposit_valid(int ofs, int len);
+#define TCG_TARGET_deposit_i32_valid tcg_target_deposit_valid
+#define TCG_TARGET_deposit_i64_valid tcg_target_deposit_valid
/* used for function call generation */
#define TCG_REG_CALL_STACK TCG_REG_R15
#define TCG_TARGET_STACK_ALIGN 8
-#define TCG_TARGET_CALL_STACK_OFFSET 0
+#define TCG_TARGET_CALL_STACK_OFFSET 160
#define TCG_TARGET_EXTEND_ARGS 1
/* Bitfield n...m (in 32 bit value). */
#define BITS(n, m) (((0xffffffffU << (31 - n)) >> (31 - n + m)) << m)
-/* Used for function call generation. */
-#define TCG_REG_CALL_STACK TCG_REG_R4
-#define TCG_TARGET_STACK_ALIGN 16
-#define TCG_TARGET_CALL_STACK_OFFSET 0
-
-/* TODO: documentation. */
-static uint8_t *tb_ret_addr;
-
/* Macros used in tcg_target_op_defs. */
#define R "r"
#define RI "ri"
tcg_out_op_t(s, INDEX_op_ld_i64);
tcg_out_r(s, ret);
tcg_out_r(s, arg1);
- assert(arg2 == (uint32_t)arg2);
+ assert(arg2 == (int32_t)arg2);
tcg_out32(s, arg2);
#else
TODO();
case INDEX_op_st_i64:
tcg_out_r(s, args[0]);
tcg_out_r(s, args[1]);
- assert(args[2] == (uint32_t)args[2]);
+ assert(args[2] == (int32_t)args[2]);
tcg_out32(s, args[2]);
break;
case INDEX_op_add_i32:
/* TODO: Which registers should be set here? */
tcg_regset_set32(tcg_target_call_clobber_regs, 0,
BIT(TCG_TARGET_NB_REGS) - 1);
+
tcg_regset_clear(s->reserved_regs);
tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
tcg_add_target_add_op_defs(tcg_target_op_defs);
- tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf),
+
+ /* We use negative offsets from "sp" so that we can distinguish
+ stores that might pretend to be call arguments. */
+ tcg_set_frame(s, TCG_REG_CALL_STACK,
+ -CPU_TEMP_BUF_NLONGS * sizeof(long),
CPU_TEMP_BUF_NLONGS * sizeof(long));
}
/* Generate global QEMU prologue and epilogue code. */
-static void tcg_target_qemu_prologue(TCGContext *s)
+static inline void tcg_target_qemu_prologue(TCGContext *s)
{
- tb_ret_addr = s->code_ptr;
}
TCG_REG_R5,
TCG_REG_R6,
TCG_REG_R7,
- TCG_AREG0 = TCG_REG_R7,
#if TCG_TARGET_NB_REGS >= 16
TCG_REG_R8,
TCG_REG_R9,
TCG_CONST = UINT8_MAX
} TCGReg;
+#define TCG_AREG0 (TCG_TARGET_NB_REGS - 2)
+
+/* Used for function call generation. */
+#define TCG_REG_CALL_STACK (TCG_TARGET_NB_REGS - 1)
+#define TCG_TARGET_CALL_STACK_OFFSET 0
+#define TCG_TARGET_STACK_ALIGN 16
+
void tci_disas(uint8_t opc);
tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr);
tcg_target_ulong);
#endif
-/* TCI can optionally use a global register variable for env. */
-#if !defined(AREG0)
-CPUArchState *env;
-#endif
-
/* Targets which don't use GETPC also don't need tci_tb_ptr
which makes them a little faster. */
#if defined(GETPC)
{
assert(index < ARRAY_SIZE(tci_reg));
assert(index != TCG_AREG0);
+ assert(index != TCG_REG_CALL_STACK);
tci_reg[index] = value;
}
return value;
}
-/* Read constant (32 bit) from bytecode. */
+/* Read unsigned constant (32 bit) from bytecode. */
static uint32_t tci_read_i32(uint8_t **tb_ptr)
{
uint32_t value = *(uint32_t *)(*tb_ptr);
return value;
}
+/* Read signed constant (32 bit) from bytecode. */
+static int32_t tci_read_s32(uint8_t **tb_ptr)
+{
+ int32_t value = *(int32_t *)(*tb_ptr);
+ *tb_ptr += sizeof(value);
+ return value;
+}
+
#if TCG_TARGET_REG_BITS == 64
/* Read constant (64 bit) from bytecode. */
static uint64_t tci_read_i64(uint8_t **tb_ptr)
}
/* Interpret pseudo code in tb. */
-tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *cpustate, uint8_t *tb_ptr)
+tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
{
+ long tcg_temps[CPU_TEMP_BUF_NLONGS];
+ uintptr_t sp_value = (uintptr_t)(tcg_temps + CPU_TEMP_BUF_NLONGS);
tcg_target_ulong next_tb = 0;
- env = cpustate;
tci_reg[TCG_AREG0] = (tcg_target_ulong)env;
+ tci_reg[TCG_REG_CALL_STACK] = sp_value;
assert(tb_ptr);
for (;;) {
-#if defined(GETPC)
- tci_tb_ptr = (uintptr_t)tb_ptr;
-#endif
TCGOpcode opc = tb_ptr[0];
#if !defined(NDEBUG)
uint8_t op_size = tb_ptr[1];
uint64_t v64;
#endif
+#if defined(GETPC)
+ tci_tb_ptr = (uintptr_t)tb_ptr;
+#endif
+
/* Skip opcode and size entry. */
tb_ptr += 2;
case INDEX_op_ld8u_i32:
t0 = *tb_ptr++;
t1 = tci_read_r(&tb_ptr);
- t2 = tci_read_i32(&tb_ptr);
+ t2 = tci_read_s32(&tb_ptr);
tci_write_reg8(t0, *(uint8_t *)(t1 + t2));
break;
case INDEX_op_ld8s_i32:
case INDEX_op_ld_i32:
t0 = *tb_ptr++;
t1 = tci_read_r(&tb_ptr);
- t2 = tci_read_i32(&tb_ptr);
+ t2 = tci_read_s32(&tb_ptr);
tci_write_reg32(t0, *(uint32_t *)(t1 + t2));
break;
case INDEX_op_st8_i32:
t0 = tci_read_r8(&tb_ptr);
t1 = tci_read_r(&tb_ptr);
- t2 = tci_read_i32(&tb_ptr);
+ t2 = tci_read_s32(&tb_ptr);
*(uint8_t *)(t1 + t2) = t0;
break;
case INDEX_op_st16_i32:
t0 = tci_read_r16(&tb_ptr);
t1 = tci_read_r(&tb_ptr);
- t2 = tci_read_i32(&tb_ptr);
+ t2 = tci_read_s32(&tb_ptr);
*(uint16_t *)(t1 + t2) = t0;
break;
case INDEX_op_st_i32:
t0 = tci_read_r32(&tb_ptr);
t1 = tci_read_r(&tb_ptr);
- t2 = tci_read_i32(&tb_ptr);
+ t2 = tci_read_s32(&tb_ptr);
+ assert(t1 != sp_value || (int32_t)t2 < 0);
*(uint32_t *)(t1 + t2) = t0;
break;
case INDEX_op_ld8u_i64:
t0 = *tb_ptr++;
t1 = tci_read_r(&tb_ptr);
- t2 = tci_read_i32(&tb_ptr);
+ t2 = tci_read_s32(&tb_ptr);
tci_write_reg8(t0, *(uint8_t *)(t1 + t2));
break;
case INDEX_op_ld8s_i64:
case INDEX_op_ld32u_i64:
t0 = *tb_ptr++;
t1 = tci_read_r(&tb_ptr);
- t2 = tci_read_i32(&tb_ptr);
+ t2 = tci_read_s32(&tb_ptr);
tci_write_reg32(t0, *(uint32_t *)(t1 + t2));
break;
case INDEX_op_ld32s_i64:
t0 = *tb_ptr++;
t1 = tci_read_r(&tb_ptr);
- t2 = tci_read_i32(&tb_ptr);
+ t2 = tci_read_s32(&tb_ptr);
tci_write_reg32s(t0, *(int32_t *)(t1 + t2));
break;
case INDEX_op_ld_i64:
t0 = *tb_ptr++;
t1 = tci_read_r(&tb_ptr);
- t2 = tci_read_i32(&tb_ptr);
+ t2 = tci_read_s32(&tb_ptr);
tci_write_reg64(t0, *(uint64_t *)(t1 + t2));
break;
case INDEX_op_st8_i64:
t0 = tci_read_r8(&tb_ptr);
t1 = tci_read_r(&tb_ptr);
- t2 = tci_read_i32(&tb_ptr);
+ t2 = tci_read_s32(&tb_ptr);
*(uint8_t *)(t1 + t2) = t0;
break;
case INDEX_op_st16_i64:
t0 = tci_read_r16(&tb_ptr);
t1 = tci_read_r(&tb_ptr);
- t2 = tci_read_i32(&tb_ptr);
+ t2 = tci_read_s32(&tb_ptr);
*(uint16_t *)(t1 + t2) = t0;
break;
case INDEX_op_st32_i64:
t0 = tci_read_r32(&tb_ptr);
t1 = tci_read_r(&tb_ptr);
- t2 = tci_read_i32(&tb_ptr);
+ t2 = tci_read_s32(&tb_ptr);
*(uint32_t *)(t1 + t2) = t0;
break;
case INDEX_op_st_i64:
t0 = tci_read_r64(&tb_ptr);
t1 = tci_read_r(&tb_ptr);
- t2 = tci_read_i32(&tb_ptr);
+ t2 = tci_read_s32(&tb_ptr);
+ assert(t1 != sp_value || (int32_t)t2 < 0);
*(uint64_t *)(t1 + t2) = t0;
break;
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "qemu-common.h"
-#include "block/aes.h"
+#include "qemu/aes.h"
#ifndef NDEBUG
#define NDEBUG
# define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >> 8); (ct)[3] = (u8)(st); }
/*
-Te0[x] = S [x].[02, 01, 01, 03];
-Te1[x] = S [x].[03, 02, 01, 01];
-Te2[x] = S [x].[01, 03, 02, 01];
-Te3[x] = S [x].[01, 01, 03, 02];
-Te4[x] = S [x].[01, 01, 01, 01];
+AES_Te0[x] = S [x].[02, 01, 01, 03];
+AES_Te1[x] = S [x].[03, 02, 01, 01];
+AES_Te2[x] = S [x].[01, 03, 02, 01];
+AES_Te3[x] = S [x].[01, 01, 03, 02];
+AES_Te4[x] = S [x].[01, 01, 01, 01];
-Td0[x] = Si[x].[0e, 09, 0d, 0b];
-Td1[x] = Si[x].[0b, 0e, 09, 0d];
-Td2[x] = Si[x].[0d, 0b, 0e, 09];
-Td3[x] = Si[x].[09, 0d, 0b, 0e];
-Td4[x] = Si[x].[01, 01, 01, 01];
+AES_Td0[x] = Si[x].[0e, 09, 0d, 0b];
+AES_Td1[x] = Si[x].[0b, 0e, 09, 0d];
+AES_Td2[x] = Si[x].[0d, 0b, 0e, 09];
+AES_Td3[x] = Si[x].[09, 0d, 0b, 0e];
+AES_Td4[x] = Si[x].[01, 01, 01, 01];
*/
-static const u32 Te0[256] = {
+const uint32_t AES_Te0[256] = {
0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
};
-static const u32 Te1[256] = {
+const uint32_t AES_Te1[256] = {
0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
};
-static const u32 Te2[256] = {
+const uint32_t AES_Te2[256] = {
0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
};
-static const u32 Te3[256] = {
+const uint32_t AES_Te3[256] = {
0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
};
-static const u32 Te4[256] = {
+const uint32_t AES_Te4[256] = {
0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU,
0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U,
0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU,
0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU,
0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U,
};
-static const u32 Td0[256] = {
+const uint32_t AES_Td0[256] = {
0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
};
-static const u32 Td1[256] = {
+const uint32_t AES_Td1[256] = {
0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
};
-static const u32 Td2[256] = {
+const uint32_t AES_Td2[256] = {
0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
};
-static const u32 Td3[256] = {
+const uint32_t AES_Td3[256] = {
0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
};
-static const u32 Td4[256] = {
+const uint32_t AES_Td4[256] = {
0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U,
0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U,
0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU,
while (1) {
temp = rk[3];
rk[4] = rk[0] ^
- (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
- (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
- (Te4[(temp ) & 0xff] & 0x0000ff00) ^
- (Te4[(temp >> 24) ] & 0x000000ff) ^
+ (AES_Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+ (AES_Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(temp ) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(temp >> 24) ] & 0x000000ff) ^
rcon[i];
rk[5] = rk[1] ^ rk[4];
rk[6] = rk[2] ^ rk[5];
while (1) {
temp = rk[ 5];
rk[ 6] = rk[ 0] ^
- (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
- (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
- (Te4[(temp ) & 0xff] & 0x0000ff00) ^
- (Te4[(temp >> 24) ] & 0x000000ff) ^
+ (AES_Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+ (AES_Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(temp ) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(temp >> 24) ] & 0x000000ff) ^
rcon[i];
rk[ 7] = rk[ 1] ^ rk[ 6];
rk[ 8] = rk[ 2] ^ rk[ 7];
while (1) {
temp = rk[ 7];
rk[ 8] = rk[ 0] ^
- (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
- (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
- (Te4[(temp ) & 0xff] & 0x0000ff00) ^
- (Te4[(temp >> 24) ] & 0x000000ff) ^
+ (AES_Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+ (AES_Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(temp ) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(temp >> 24) ] & 0x000000ff) ^
rcon[i];
rk[ 9] = rk[ 1] ^ rk[ 8];
rk[10] = rk[ 2] ^ rk[ 9];
}
temp = rk[11];
rk[12] = rk[ 4] ^
- (Te4[(temp >> 24) ] & 0xff000000) ^
- (Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^
- (Te4[(temp >> 8) & 0xff] & 0x0000ff00) ^
- (Te4[(temp ) & 0xff] & 0x000000ff);
+ (AES_Te4[(temp >> 24) ] & 0xff000000) ^
+ (AES_Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(temp >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(temp ) & 0xff] & 0x000000ff);
rk[13] = rk[ 5] ^ rk[12];
rk[14] = rk[ 6] ^ rk[13];
rk[15] = rk[ 7] ^ rk[14];
for (i = 1; i < (key->rounds); i++) {
rk += 4;
rk[0] =
- Td0[Te4[(rk[0] >> 24) ] & 0xff] ^
- Td1[Te4[(rk[0] >> 16) & 0xff] & 0xff] ^
- Td2[Te4[(rk[0] >> 8) & 0xff] & 0xff] ^
- Td3[Te4[(rk[0] ) & 0xff] & 0xff];
+ AES_Td0[AES_Te4[(rk[0] >> 24) ] & 0xff] ^
+ AES_Td1[AES_Te4[(rk[0] >> 16) & 0xff] & 0xff] ^
+ AES_Td2[AES_Te4[(rk[0] >> 8) & 0xff] & 0xff] ^
+ AES_Td3[AES_Te4[(rk[0] ) & 0xff] & 0xff];
rk[1] =
- Td0[Te4[(rk[1] >> 24) ] & 0xff] ^
- Td1[Te4[(rk[1] >> 16) & 0xff] & 0xff] ^
- Td2[Te4[(rk[1] >> 8) & 0xff] & 0xff] ^
- Td3[Te4[(rk[1] ) & 0xff] & 0xff];
+ AES_Td0[AES_Te4[(rk[1] >> 24) ] & 0xff] ^
+ AES_Td1[AES_Te4[(rk[1] >> 16) & 0xff] & 0xff] ^
+ AES_Td2[AES_Te4[(rk[1] >> 8) & 0xff] & 0xff] ^
+ AES_Td3[AES_Te4[(rk[1] ) & 0xff] & 0xff];
rk[2] =
- Td0[Te4[(rk[2] >> 24) ] & 0xff] ^
- Td1[Te4[(rk[2] >> 16) & 0xff] & 0xff] ^
- Td2[Te4[(rk[2] >> 8) & 0xff] & 0xff] ^
- Td3[Te4[(rk[2] ) & 0xff] & 0xff];
+ AES_Td0[AES_Te4[(rk[2] >> 24) ] & 0xff] ^
+ AES_Td1[AES_Te4[(rk[2] >> 16) & 0xff] & 0xff] ^
+ AES_Td2[AES_Te4[(rk[2] >> 8) & 0xff] & 0xff] ^
+ AES_Td3[AES_Te4[(rk[2] ) & 0xff] & 0xff];
rk[3] =
- Td0[Te4[(rk[3] >> 24) ] & 0xff] ^
- Td1[Te4[(rk[3] >> 16) & 0xff] & 0xff] ^
- Td2[Te4[(rk[3] >> 8) & 0xff] & 0xff] ^
- Td3[Te4[(rk[3] ) & 0xff] & 0xff];
+ AES_Td0[AES_Te4[(rk[3] >> 24) ] & 0xff] ^
+ AES_Td1[AES_Te4[(rk[3] >> 16) & 0xff] & 0xff] ^
+ AES_Td2[AES_Te4[(rk[3] >> 8) & 0xff] & 0xff] ^
+ AES_Td3[AES_Te4[(rk[3] ) & 0xff] & 0xff];
}
return 0;
}
s3 = GETU32(in + 12) ^ rk[3];
#ifdef FULL_UNROLL
/* round 1: */
- t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
- t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
- t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
- t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
+ t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[ 4];
+ t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[ 5];
+ t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[ 6];
+ t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[ 7];
/* round 2: */
- s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
- s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
- s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
- s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
+ s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >> 8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[ 8];
+ s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >> 8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[ 9];
+ s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >> 8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[10];
+ s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >> 8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[11];
/* round 3: */
- t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
- t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13];
- t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14];
- t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15];
+ t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[12];
+ t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[13];
+ t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[14];
+ t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[15];
/* round 4: */
- s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16];
- s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17];
- s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18];
- s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19];
+ s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >> 8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[16];
+ s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >> 8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[17];
+ s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >> 8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[18];
+ s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >> 8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[19];
/* round 5: */
- t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20];
- t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21];
- t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22];
- t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23];
+ t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[20];
+ t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[21];
+ t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[22];
+ t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[23];
/* round 6: */
- s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24];
- s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25];
- s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26];
- s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27];
+ s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >> 8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[24];
+ s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >> 8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[25];
+ s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >> 8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[26];
+ s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >> 8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[27];
/* round 7: */
- t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28];
- t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29];
- t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30];
- t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31];
+ t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[28];
+ t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[29];
+ t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[30];
+ t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[31];
/* round 8: */
- s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32];
- s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33];
- s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34];
- s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35];
+ s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >> 8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[32];
+ s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >> 8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[33];
+ s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >> 8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[34];
+ s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >> 8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[35];
/* round 9: */
- t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36];
- t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37];
- t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38];
- t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39];
+ t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[36];
+ t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[37];
+ t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[38];
+ t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[39];
if (key->rounds > 10) {
/* round 10: */
- s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40];
- s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41];
- s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42];
- s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43];
+ s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >> 8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[40];
+ s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >> 8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[41];
+ s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >> 8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[42];
+ s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >> 8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[43];
/* round 11: */
- t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44];
- t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45];
- t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46];
- t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47];
+ t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[44];
+ t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[45];
+ t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[46];
+ t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[47];
if (key->rounds > 12) {
/* round 12: */
- s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48];
- s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49];
- s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50];
- s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51];
+ s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >> 8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[48];
+ s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >> 8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[49];
+ s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >> 8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[50];
+ s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >> 8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[51];
/* round 13: */
- t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52];
- t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53];
- t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54];
- t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55];
+ t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[52];
+ t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[53];
+ t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[54];
+ t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[55];
}
}
rk += key->rounds << 2;
r = key->rounds >> 1;
for (;;) {
t0 =
- Te0[(s0 >> 24) ] ^
- Te1[(s1 >> 16) & 0xff] ^
- Te2[(s2 >> 8) & 0xff] ^
- Te3[(s3 ) & 0xff] ^
+ AES_Te0[(s0 >> 24) ] ^
+ AES_Te1[(s1 >> 16) & 0xff] ^
+ AES_Te2[(s2 >> 8) & 0xff] ^
+ AES_Te3[(s3 ) & 0xff] ^
rk[4];
t1 =
- Te0[(s1 >> 24) ] ^
- Te1[(s2 >> 16) & 0xff] ^
- Te2[(s3 >> 8) & 0xff] ^
- Te3[(s0 ) & 0xff] ^
+ AES_Te0[(s1 >> 24) ] ^
+ AES_Te1[(s2 >> 16) & 0xff] ^
+ AES_Te2[(s3 >> 8) & 0xff] ^
+ AES_Te3[(s0 ) & 0xff] ^
rk[5];
t2 =
- Te0[(s2 >> 24) ] ^
- Te1[(s3 >> 16) & 0xff] ^
- Te2[(s0 >> 8) & 0xff] ^
- Te3[(s1 ) & 0xff] ^
+ AES_Te0[(s2 >> 24) ] ^
+ AES_Te1[(s3 >> 16) & 0xff] ^
+ AES_Te2[(s0 >> 8) & 0xff] ^
+ AES_Te3[(s1 ) & 0xff] ^
rk[6];
t3 =
- Te0[(s3 >> 24) ] ^
- Te1[(s0 >> 16) & 0xff] ^
- Te2[(s1 >> 8) & 0xff] ^
- Te3[(s2 ) & 0xff] ^
+ AES_Te0[(s3 >> 24) ] ^
+ AES_Te1[(s0 >> 16) & 0xff] ^
+ AES_Te2[(s1 >> 8) & 0xff] ^
+ AES_Te3[(s2 ) & 0xff] ^
rk[7];
rk += 8;
}
s0 =
- Te0[(t0 >> 24) ] ^
- Te1[(t1 >> 16) & 0xff] ^
- Te2[(t2 >> 8) & 0xff] ^
- Te3[(t3 ) & 0xff] ^
+ AES_Te0[(t0 >> 24) ] ^
+ AES_Te1[(t1 >> 16) & 0xff] ^
+ AES_Te2[(t2 >> 8) & 0xff] ^
+ AES_Te3[(t3 ) & 0xff] ^
rk[0];
s1 =
- Te0[(t1 >> 24) ] ^
- Te1[(t2 >> 16) & 0xff] ^
- Te2[(t3 >> 8) & 0xff] ^
- Te3[(t0 ) & 0xff] ^
+ AES_Te0[(t1 >> 24) ] ^
+ AES_Te1[(t2 >> 16) & 0xff] ^
+ AES_Te2[(t3 >> 8) & 0xff] ^
+ AES_Te3[(t0 ) & 0xff] ^
rk[1];
s2 =
- Te0[(t2 >> 24) ] ^
- Te1[(t3 >> 16) & 0xff] ^
- Te2[(t0 >> 8) & 0xff] ^
- Te3[(t1 ) & 0xff] ^
+ AES_Te0[(t2 >> 24) ] ^
+ AES_Te1[(t3 >> 16) & 0xff] ^
+ AES_Te2[(t0 >> 8) & 0xff] ^
+ AES_Te3[(t1 ) & 0xff] ^
rk[2];
s3 =
- Te0[(t3 >> 24) ] ^
- Te1[(t0 >> 16) & 0xff] ^
- Te2[(t1 >> 8) & 0xff] ^
- Te3[(t2 ) & 0xff] ^
+ AES_Te0[(t3 >> 24) ] ^
+ AES_Te1[(t0 >> 16) & 0xff] ^
+ AES_Te2[(t1 >> 8) & 0xff] ^
+ AES_Te3[(t2 ) & 0xff] ^
rk[3];
}
#endif /* ?FULL_UNROLL */
* map cipher state to byte array block:
*/
s0 =
- (Te4[(t0 >> 24) ] & 0xff000000) ^
- (Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
- (Te4[(t2 >> 8) & 0xff] & 0x0000ff00) ^
- (Te4[(t3 ) & 0xff] & 0x000000ff) ^
+ (AES_Te4[(t0 >> 24) ] & 0xff000000) ^
+ (AES_Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(t2 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(t3 ) & 0xff] & 0x000000ff) ^
rk[0];
PUTU32(out , s0);
s1 =
- (Te4[(t1 >> 24) ] & 0xff000000) ^
- (Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
- (Te4[(t3 >> 8) & 0xff] & 0x0000ff00) ^
- (Te4[(t0 ) & 0xff] & 0x000000ff) ^
+ (AES_Te4[(t1 >> 24) ] & 0xff000000) ^
+ (AES_Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(t3 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(t0 ) & 0xff] & 0x000000ff) ^
rk[1];
PUTU32(out + 4, s1);
s2 =
- (Te4[(t2 >> 24) ] & 0xff000000) ^
- (Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
- (Te4[(t0 >> 8) & 0xff] & 0x0000ff00) ^
- (Te4[(t1 ) & 0xff] & 0x000000ff) ^
+ (AES_Te4[(t2 >> 24) ] & 0xff000000) ^
+ (AES_Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(t0 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(t1 ) & 0xff] & 0x000000ff) ^
rk[2];
PUTU32(out + 8, s2);
s3 =
- (Te4[(t3 >> 24) ] & 0xff000000) ^
- (Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
- (Te4[(t1 >> 8) & 0xff] & 0x0000ff00) ^
- (Te4[(t2 ) & 0xff] & 0x000000ff) ^
+ (AES_Te4[(t3 >> 24) ] & 0xff000000) ^
+ (AES_Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(t1 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(t2 ) & 0xff] & 0x000000ff) ^
rk[3];
PUTU32(out + 12, s3);
}
s3 = GETU32(in + 12) ^ rk[3];
#ifdef FULL_UNROLL
/* round 1: */
- t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4];
- t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5];
- t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6];
- t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7];
+ t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[ 4];
+ t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[ 5];
+ t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[ 6];
+ t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[ 7];
/* round 2: */
- s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8];
- s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9];
- s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10];
- s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11];
+ s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >> 8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[ 8];
+ s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >> 8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[ 9];
+ s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >> 8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[10];
+ s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >> 8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[11];
/* round 3: */
- t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12];
- t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13];
- t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14];
- t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15];
+ t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[12];
+ t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[13];
+ t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[14];
+ t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[15];
/* round 4: */
- s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16];
- s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17];
- s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18];
- s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19];
+ s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >> 8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[16];
+ s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >> 8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[17];
+ s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >> 8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[18];
+ s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >> 8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[19];
/* round 5: */
- t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20];
- t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21];
- t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22];
- t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23];
+ t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[20];
+ t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[21];
+ t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[22];
+ t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[23];
/* round 6: */
- s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24];
- s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25];
- s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26];
- s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27];
+ s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >> 8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[24];
+ s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >> 8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[25];
+ s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >> 8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[26];
+ s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >> 8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[27];
/* round 7: */
- t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28];
- t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29];
- t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30];
- t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31];
+ t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[28];
+ t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[29];
+ t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[30];
+ t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[31];
/* round 8: */
- s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32];
- s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33];
- s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34];
- s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35];
+ s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >> 8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[32];
+ s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >> 8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[33];
+ s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >> 8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[34];
+ s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >> 8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[35];
/* round 9: */
- t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36];
- t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37];
- t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38];
- t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39];
+ t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[36];
+ t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[37];
+ t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[38];
+ t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[39];
if (key->rounds > 10) {
/* round 10: */
- s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40];
- s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41];
- s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42];
- s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43];
+ s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >> 8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[40];
+ s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >> 8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[41];
+ s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >> 8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[42];
+ s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >> 8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[43];
/* round 11: */
- t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44];
- t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45];
- t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46];
- t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47];
+ t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[44];
+ t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[45];
+ t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[46];
+ t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[47];
if (key->rounds > 12) {
/* round 12: */
- s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48];
- s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49];
- s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50];
- s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51];
+ s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >> 8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[48];
+ s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >> 8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[49];
+ s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >> 8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[50];
+ s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >> 8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[51];
/* round 13: */
- t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52];
- t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53];
- t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54];
- t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55];
+ t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[52];
+ t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[53];
+ t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[54];
+ t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[55];
}
}
rk += key->rounds << 2;
r = key->rounds >> 1;
for (;;) {
t0 =
- Td0[(s0 >> 24) ] ^
- Td1[(s3 >> 16) & 0xff] ^
- Td2[(s2 >> 8) & 0xff] ^
- Td3[(s1 ) & 0xff] ^
+ AES_Td0[(s0 >> 24) ] ^
+ AES_Td1[(s3 >> 16) & 0xff] ^
+ AES_Td2[(s2 >> 8) & 0xff] ^
+ AES_Td3[(s1 ) & 0xff] ^
rk[4];
t1 =
- Td0[(s1 >> 24) ] ^
- Td1[(s0 >> 16) & 0xff] ^
- Td2[(s3 >> 8) & 0xff] ^
- Td3[(s2 ) & 0xff] ^
+ AES_Td0[(s1 >> 24) ] ^
+ AES_Td1[(s0 >> 16) & 0xff] ^
+ AES_Td2[(s3 >> 8) & 0xff] ^
+ AES_Td3[(s2 ) & 0xff] ^
rk[5];
t2 =
- Td0[(s2 >> 24) ] ^
- Td1[(s1 >> 16) & 0xff] ^
- Td2[(s0 >> 8) & 0xff] ^
- Td3[(s3 ) & 0xff] ^
+ AES_Td0[(s2 >> 24) ] ^
+ AES_Td1[(s1 >> 16) & 0xff] ^
+ AES_Td2[(s0 >> 8) & 0xff] ^
+ AES_Td3[(s3 ) & 0xff] ^
rk[6];
t3 =
- Td0[(s3 >> 24) ] ^
- Td1[(s2 >> 16) & 0xff] ^
- Td2[(s1 >> 8) & 0xff] ^
- Td3[(s0 ) & 0xff] ^
+ AES_Td0[(s3 >> 24) ] ^
+ AES_Td1[(s2 >> 16) & 0xff] ^
+ AES_Td2[(s1 >> 8) & 0xff] ^
+ AES_Td3[(s0 ) & 0xff] ^
rk[7];
rk += 8;
}
s0 =
- Td0[(t0 >> 24) ] ^
- Td1[(t3 >> 16) & 0xff] ^
- Td2[(t2 >> 8) & 0xff] ^
- Td3[(t1 ) & 0xff] ^
+ AES_Td0[(t0 >> 24) ] ^
+ AES_Td1[(t3 >> 16) & 0xff] ^
+ AES_Td2[(t2 >> 8) & 0xff] ^
+ AES_Td3[(t1 ) & 0xff] ^
rk[0];
s1 =
- Td0[(t1 >> 24) ] ^
- Td1[(t0 >> 16) & 0xff] ^
- Td2[(t3 >> 8) & 0xff] ^
- Td3[(t2 ) & 0xff] ^
+ AES_Td0[(t1 >> 24) ] ^
+ AES_Td1[(t0 >> 16) & 0xff] ^
+ AES_Td2[(t3 >> 8) & 0xff] ^
+ AES_Td3[(t2 ) & 0xff] ^
rk[1];
s2 =
- Td0[(t2 >> 24) ] ^
- Td1[(t1 >> 16) & 0xff] ^
- Td2[(t0 >> 8) & 0xff] ^
- Td3[(t3 ) & 0xff] ^
+ AES_Td0[(t2 >> 24) ] ^
+ AES_Td1[(t1 >> 16) & 0xff] ^
+ AES_Td2[(t0 >> 8) & 0xff] ^
+ AES_Td3[(t3 ) & 0xff] ^
rk[2];
s3 =
- Td0[(t3 >> 24) ] ^
- Td1[(t2 >> 16) & 0xff] ^
- Td2[(t1 >> 8) & 0xff] ^
- Td3[(t0 ) & 0xff] ^
+ AES_Td0[(t3 >> 24) ] ^
+ AES_Td1[(t2 >> 16) & 0xff] ^
+ AES_Td2[(t1 >> 8) & 0xff] ^
+ AES_Td3[(t0 ) & 0xff] ^
rk[3];
}
#endif /* ?FULL_UNROLL */
* map cipher state to byte array block:
*/
s0 =
- (Td4[(t0 >> 24) ] & 0xff000000) ^
- (Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
- (Td4[(t2 >> 8) & 0xff] & 0x0000ff00) ^
- (Td4[(t1 ) & 0xff] & 0x000000ff) ^
+ (AES_Td4[(t0 >> 24) ] & 0xff000000) ^
+ (AES_Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Td4[(t2 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Td4[(t1 ) & 0xff] & 0x000000ff) ^
rk[0];
PUTU32(out , s0);
s1 =
- (Td4[(t1 >> 24) ] & 0xff000000) ^
- (Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
- (Td4[(t3 >> 8) & 0xff] & 0x0000ff00) ^
- (Td4[(t2 ) & 0xff] & 0x000000ff) ^
+ (AES_Td4[(t1 >> 24) ] & 0xff000000) ^
+ (AES_Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Td4[(t3 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Td4[(t2 ) & 0xff] & 0x000000ff) ^
rk[1];
PUTU32(out + 4, s1);
s2 =
- (Td4[(t2 >> 24) ] & 0xff000000) ^
- (Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
- (Td4[(t0 >> 8) & 0xff] & 0x0000ff00) ^
- (Td4[(t3 ) & 0xff] & 0x000000ff) ^
+ (AES_Td4[(t2 >> 24) ] & 0xff000000) ^
+ (AES_Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Td4[(t0 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Td4[(t3 ) & 0xff] & 0x000000ff) ^
rk[2];
PUTU32(out + 8, s2);
s3 =
- (Td4[(t3 >> 24) ] & 0xff000000) ^
- (Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
- (Td4[(t1 >> 8) & 0xff] & 0x0000ff00) ^
- (Td4[(t0 ) & 0xff] & 0x000000ff) ^
+ (AES_Td4[(t3 >> 24) ] & 0xff000000) ^
+ (AES_Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Td4[(t1 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Td4[(t0 ) & 0xff] & 0x000000ff) ^
rk[3];
PUTU32(out + 12, s3);
}