/* ----------------------------------------------------------------------- *
*
- * Copyright 1996-2012 The NASM Authors - All Rights Reserved
+ * Copyright 1996-2014 The NASM Authors - All Rights Reserved
* See the file AUTHORS included with the NASM distribution for
* the specific copyright holders.
*
* \7 - add 4 to both the primary and the secondary operand number
* \10..\13 - a literal byte follows in the code stream, to be added
* to the register value of operand 0..3
+ * \14..\17 - the position of index register operand in MIB (BND insns)
* \20..\23 - a byte immediate operand, from operand 0..3
* \24..\27 - a zero-extended byte immediate operand, from operand 0..3
* \30..\33 - a word immediate operand, from operand 0..3
* an arbitrary value in bits 3..0 (assembled as zero.)
* \2ab - a ModRM, calculated on EA in operand a, with the spare
* field equal to digit b.
+ *
+ * \240..\243 - this instruction uses EVEX rather than REX or VEX/XOP, with the
+ * V field taken from operand 0..3.
+ * \250 - this instruction uses EVEX rather than REX or VEX/XOP, with the
+ * V field set to 1111b.
+ * EVEX prefixes are followed by the sequence:
+ * \cm\wlp\tup where cm is:
+ * cc 000 0mm
+ * c = 2 for EVEX and m is the legacy escape (0f, 0f38, 0f3a)
+ * and wlp is:
+ * 00 wwl lpp
+ * [l0] ll = 0 (.128, .lz)
+ * [l1] ll = 1 (.256)
+ * [l2] ll = 2 (.512)
+ * [lig] ll = 3 for EVEX.L'L don't care (always assembled as 0)
+ *
+ * [w0] ww = 0 for W = 0
+ * [w1] ww = 1 for W = 1
+ * [wig] ww = 2 for W don't care (always assembled as 0)
+ * [ww] ww = 3 for W used as REX.W
+ *
+ * [p0] pp = 0 for no prefix
+ * [60] pp = 1 for legacy prefix 60
+ * [f3] pp = 2
+ * [f2] pp = 3
+ *
+ * tup is tuple type for Disp8*N from %tuple_codes in insns.pl
+ * (compressed displacement encoding)
+ *
* \254..\257 - a signed 32-bit operand to be extended to 64 bits.
* \260..\263 - this instruction uses VEX/XOP rather than REX, with the
* V field taken from operand 0..3.
* VEX/XOP prefixes are followed by the sequence:
* \tmm\wlp where mm is the M field; and wlp is:
* 00 wwl lpp
- * [l0] ll = 0 for L = 0 (.128, .lz)
- * [l1] ll = 1 for L = 1 (.256)
- * [lig] ll = 2 for L don't care (always assembled as 0)
+ * [l0] ll = 0 for L = 0 (.128, .lz)
+ * [l1] ll = 1 for L = 1 (.256)
+ * [lig] ll = 2 for L don't care (always assembled as 0)
*
* [w0] ww = 0 for W = 0
* [w1 ] ww = 1 for W = 1
* \341 - this instruction needs a WAIT "prefix"
* \360 - no SSE prefix (== \364\331)
* \361 - 66 SSE prefix (== \366\331)
- * \362 - F2 SSE prefix (== \364\332)
- * \363 - F3 SSE prefix (== \364\333)
* \364 - operand-size prefix (0x66) not permitted
* \365 - address-size prefix (0x67) not permitted
* \366 - operand-size prefix (0x66) used as opcode extension
* used for conditional jump over longer jump
* \374 - this instruction takes an XMM VSIB memory EA
* \375 - this instruction takes an YMM VSIB memory EA
+ * \376 - this instruction takes an ZMM VSIB memory EA
*/
#include "compiler.h"
#include "assemble.h"
#include "insns.h"
#include "tables.h"
+#include "disp8.h"
enum match_result {
/*
MERR_INVALOP,
MERR_OPSIZEMISSING,
MERR_OPSIZEMISMATCH,
+ MERR_BRNUMMISMATCH,
MERR_BADCPU,
MERR_BADMODE,
MERR_BADHLE,
+ MERR_ENCMISMATCH,
+ MERR_BADBND,
+ MERR_BADREPNE,
/*
* Matching success; the conditional ones first
*/
int bytes; /* # of bytes of offset needed */
int size; /* lazy - this is sib+bytes+1 */
uint8_t modrm, sib, rex, rip; /* the bytes themselves */
+ int8_t disp8; /* compressed displacement for EVEX */
} ea;
#define GEN_SIB(scale, index, base) \
#define GEN_MODRM(mod, reg, rm) \
(((mod) << 6) | (((reg) & 7) << 3) | ((rm) & 7))
-static uint32_t cpu; /* cpu level received from nasm.c */
+static iflag_t cpu; /* cpu level received from nasm.c */
static efunc errfunc;
static struct ofmt *outfmt;
static ListGen *list;
static int32_t regval(const operand *);
static int rexflags(int, opflags_t, int);
static int op_rexflags(const operand *, int);
+static int op_evexflags(const operand *, int, uint8_t);
static void add_asp(insn *, int);
-static enum ea_type process_ea(operand *, ea *, int, int, int, opflags_t);
+static enum ea_type process_ea(operand *, ea *, int, int, opflags_t, insn *);
static int has_prefix(insn * ins, enum prefix_pos pos, int prefix)
{
return "oword";
case 32:
return "yword";
+ case 64:
+ return "zword";
default:
return "???";
}
int64_t isize;
const uint8_t *code = temp->code;
uint8_t c = code[0];
+ bool is_byte;
if (((c & ~1) != 0370) || (ins->oprs[0].type & STRICT))
return false;
return false;
isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
- return (isize >= -128 && isize <= 127); /* is it byte size? */
+ is_byte = (isize >= -128 && isize <= 127); /* is it byte size? */
+
+ if (is_byte && c == 0371 && ins->prefixes[PPS_REP] == P_BND) {
+ /* jmp short (opcode eb) cannot be used with bnd prefix. */
+ ins->prefixes[PPS_REP] = P_none;
+ errfunc(ERR_WARNING | ERR_WARN_BND | ERR_PASS2 ,
+ "jmp short does not init bnd regs - bnd prefix dropped.");
+ }
+
+ return is_byte;
}
-int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
+int64_t assemble(int32_t segment, int64_t offset, int bits, iflag_t cp,
insn * instruction, struct ofmt *output, efunc error,
ListGen * listgen)
{
} else if (fseek(fp, 0L, SEEK_END) < 0) {
error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
fname);
+ fclose(fp);
} else {
static char buf[4096];
size_t t = instruction->times;
case P_REPNE:
case P_REPNZ:
case P_XACQUIRE:
+ case P_BND:
c = 0xF2;
break;
case P_REPE:
case P_OSP:
c = 0x66;
break;
+ case P_EVEX:
+ case P_VEX3:
+ case P_VEX2:
+ case P_NOBND:
case P_none:
break;
default:
case MERR_OPSIZEMISMATCH:
error(ERR_NONFATAL, "mismatch in operand sizes");
break;
+ case MERR_BRNUMMISMATCH:
+ error(ERR_NONFATAL,
+ "mismatch in the number of broadcasting elements");
+ break;
case MERR_BADCPU:
error(ERR_NONFATAL, "no instruction for this cpu level");
break;
error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
bits);
break;
+ case MERR_ENCMISMATCH:
+ error(ERR_NONFATAL, "specific encoding scheme not available");
+ break;
+ case MERR_BADBND:
+ error(ERR_NONFATAL, "bnd prefix is not allowed");
+ break;
+ case MERR_BADREPNE:
+ error(ERR_NONFATAL, "%s prefix is not allowed",
+ (has_prefix(instruction, PPS_REP, P_REPNE) ?
+ "repne" : "repnz"));
+ break;
default:
error(ERR_NONFATAL,
"invalid combination of opcode and operands");
return 0;
}
-int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
+int64_t insn_size(int32_t segment, int64_t offset, int bits, iflag_t cp,
insn * instruction, efunc error)
{
const struct itemplate *temp;
break;
case P_A64:
case P_O64:
+ case P_EVEX:
+ case P_VEX3:
+ case P_VEX2:
+ case P_NOBND:
case P_none:
break;
default:
enum ea_type eat;
uint8_t hleok = 0;
bool lockcheck = true;
+ enum reg_enum mib_index = R_none; /* For a separate index MIB reg form */
ins->rex = 0; /* Ensure REX is reset */
eat = EA_SCALAR; /* Expect a scalar EA */
+ memset(ins->evex_p, 0, 3); /* Ensure EVEX is reset */
if (ins->prefixes[PPS_OSIZE] == P_O64)
ins->rex |= REX_W;
codes++, length++;
break;
+ case4(014):
+ /* this is an index reg of MIB operand */
+ mib_index = opx->basereg;
+ break;
+
case4(020):
case4(024):
length++;
length++;
break;
+ case4(0240):
+ ins->rex |= REX_EV;
+ ins->vexreg = regval(opx);
+ ins->evex_p[2] |= op_evexflags(opx, EVEX_P2VP, 2); /* High-16 NDS */
+ ins->vex_cm = *codes++;
+ ins->vex_wlp = *codes++;
+ ins->evex_tuple = (*codes++ - 0300);
+ break;
+
+ case 0250:
+ ins->rex |= REX_EV;
+ ins->vexreg = 0;
+ ins->vex_cm = *codes++;
+ ins->vex_wlp = *codes++;
+ ins->evex_tuple = (*codes++ - 0300);
+ break;
+
case4(0254):
length += 4;
break;
case 0360:
break;
- case3(0361):
+ case 0361:
length++;
break;
length++;
break;
- case3(0370):
+ case 0370:
+ case 0371:
break;
case 0373:
eat = EA_YMMVSIB;
break;
+ case 0376:
+ eat = EA_ZMMVSIB;
+ break;
+
case4(0100):
case4(0110):
case4(0120):
int rfield;
opflags_t rflags;
struct operand *opy = &ins->oprs[op2];
+ struct operand *op_er_sae;
ea_data.rex = 0; /* Ensure ea.REX is initially 0 */
rflags = 0;
rfield = c & 7;
}
- if (process_ea(opy, &ea_data, bits,ins->addr_size,
- rfield, rflags) != eat) {
+
+ /* EVEX.b1 : evex_brerop contains the operand position */
+ op_er_sae = (ins->evex_brerop >= 0 ?
+ &ins->oprs[ins->evex_brerop] : NULL);
+
+ if (op_er_sae && (op_er_sae->decoflags & (ER | SAE))) {
+ /* set EVEX.b */
+ ins->evex_p[2] |= EVEX_P2B;
+ if (op_er_sae->decoflags & ER) {
+ /* set EVEX.RC (rounding control) */
+ ins->evex_p[2] |= ((ins->evex_rm - BRC_RN) << 5)
+ & EVEX_P2RC;
+ }
+ } else {
+ /* set EVEX.L'L (vector length) */
+ ins->evex_p[2] |= ((ins->vex_wlp << (5 - 2)) & EVEX_P2LL);
+ ins->evex_p[1] |= ((ins->vex_wlp << (7 - 4)) & EVEX_P1W);
+ if (opy->decoflags & BRDCAST_MASK) {
+ /* set EVEX.b */
+ ins->evex_p[2] |= EVEX_P2B;
+ }
+ }
+
+ if (itemp_has(temp, IF_MIB)) {
+ opy->eaflags |= EAF_MIB;
+ /*
+ * if a separate form of MIB (ICC style) is used,
+ * the index reg info is merged into mem operand
+ */
+ if (mib_index != R_none) {
+ opy->indexreg = mib_index;
+ opy->scale = 1;
+ opy->hintbase = mib_index;
+ opy->hinttype = EAH_NOTBASE;
+ }
+ }
+
+ if (process_ea(opy, &ea_data, bits,
+ rfield, rflags, ins) != eat) {
errfunc(ERR_NONFATAL, "invalid effective address");
return -1;
} else {
ins->rex &= ~REX_P; /* Don't force REX prefix due to high reg */
}
- if (ins->rex & REX_V) {
+ switch (ins->prefixes[PPS_VEX]) {
+ case P_EVEX:
+ if (!(ins->rex & REX_EV))
+ return -1;
+ break;
+ case P_VEX3:
+ case P_VEX2:
+ if (!(ins->rex & REX_V))
+ return -1;
+ break;
+ default:
+ break;
+ }
+
+ if (ins->rex & (REX_V | REX_EV)) {
int bad32 = REX_R|REX_W|REX_X|REX_B;
if (ins->rex & REX_H) {
- errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
+ errfunc(ERR_NONFATAL, "cannot use high register in AVX instruction");
return -1;
}
switch (ins->vex_wlp & 060) {
if (bits != 64 && ((ins->rex & bad32) || ins->vexreg > 7)) {
errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
return -1;
+ } else if (!(ins->rex & REX_EV) &&
+ ((ins->vexreg > 15) || (ins->evex_p[0] & 0xf0))) {
+ errfunc(ERR_NONFATAL, "invalid high-16 register in non-AVX-512");
+ return -1;
}
- if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B)))
+ if (ins->rex & REX_EV)
+ length += 4;
+ else if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B)) ||
+ ins->prefixes[PPS_VEX] == P_VEX3)
length += 3;
else
length += 2;
length++;
} else if ((ins->rex & REX_L) &&
!(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
- cpu >= IF_X86_64) {
+ iflag_ffs(&cpu) >= IF_X86_64) {
/* LOCK-as-REX.R */
assert_no_prefix(ins, PPS_LOCK);
lockcheck = false; /* Already errored, no need for warning */
}
if (has_prefix(ins, PPS_LOCK, P_LOCK) && lockcheck &&
- (!(temp->flags & IF_LOCK) || !is_class(MEMORY, ins->oprs[0].type))) {
+ (!itemp_has(temp,IF_LOCK) || !is_class(MEMORY, ins->oprs[0].type))) {
errfunc(ERR_WARNING | ERR_WARN_LOCK | ERR_PASS2 ,
"instruction is not lockable");
}
bad_hle_warn(ins, hleok);
+ /*
+ * when BND prefix is set by DEFAULT directive,
+ * BND prefix is added to every appropriate instruction line
+ * unless it is overridden by NOBND prefix.
+ */
+ if (globalbnd &&
+ (itemp_has(temp, IF_BND) && !has_prefix(ins, PPS_REP, P_NOBND)))
+ ins->prefixes[PPS_REP] = P_BND;
+
return length;
}
static inline unsigned int emit_rex(insn *ins, int32_t segment, int64_t offset, int bits)
{
if (bits == 64) {
- if ((ins->rex & REX_REAL) && !(ins->rex & REX_V)) {
- ins->rex = (ins->rex & REX_REAL) | REX_P;
- out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
- ins->rex = 0;
+ if ((ins->rex & REX_REAL) &&
+ !(ins->rex & (REX_V | REX_EV)) &&
+ !ins->rex_done) {
+ int rex = (ins->rex & REX_REAL) | REX_P;
+ out(offset, segment, &rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
+ ins->rex_done = true;
return 1;
}
}
uint8_t opex = 0;
enum ea_type eat = EA_SCALAR;
+ ins->rex_done = false;
+
while (*codes) {
c = *codes++;
op1 = (c & 3) + ((opex & 1) << 2);
offset += 1;
break;
+ case4(014):
+ break;
+
case4(020):
if (opx->offset < -256 || opx->offset > 255) {
errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
offset += 4;
break;
+ case4(0240):
+ case 0250:
+ codes += 3;
+ ins->evex_p[2] |= op_evexflags(&ins->oprs[0],
+ EVEX_P2Z | EVEX_P2AAA, 2);
+ ins->evex_p[2] ^= EVEX_P2VP; /* 1's complement */
+ bytes[0] = 0x62;
+ /* EVEX.X can be set by either REX or EVEX for different reasons */
+ bytes[1] = ((((ins->rex & 7) << 5) |
+ (ins->evex_p[0] & (EVEX_P0X | EVEX_P0RP))) ^ 0xf0) |
+ (ins->vex_cm & 3);
+ bytes[2] = ((ins->rex & REX_W) << (7 - 3)) |
+ ((~ins->vexreg & 15) << 3) |
+ (1 << 2) | (ins->vex_wlp & 3);
+ bytes[3] = ins->evex_p[2];
+ out(offset, segment, &bytes, OUT_RAWDATA, 4, NO_SEG, NO_SEG);
+ offset += 4;
+ break;
+
case4(0260):
case 0270:
codes += 2;
- if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
+ if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B)) ||
+ ins->prefixes[PPS_VEX] == P_VEX3) {
bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
bytes[2] = ((ins->rex & REX_W) << (7-3)) |
offset += 1;
break;
- case 0362:
- case 0363:
- bytes[0] = c - 0362 + 0xf2;
- out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
- offset += 1;
- break;
-
case 0364:
case 0365:
break;
offset += 1;
break;
- case 0370:
- case 0371:
+ case3(0370):
break;
case 0373:
eat = EA_YMMVSIB;
break;
+ case 0376:
+ eat = EA_ZMMVSIB;
+ break;
+
case4(0100):
case4(0110):
case4(0120):
rfield = c & 7;
}
- if (process_ea(opy, &ea_data, bits, ins->addr_size,
- rfield, rflags) != eat)
+ if (process_ea(opy, &ea_data, bits,
+ rfield, rflags, ins) != eat)
errfunc(ERR_NONFATAL, "invalid effective address");
p = bytes;
case 2:
case 4:
case 8:
- data = opy->offset;
+ /* use compressed displacement, if available */
+ data = ea_data.disp8 ? ea_data.disp8 : opy->offset;
s += ea_data.bytes;
if (ea_data.rip) {
if (opy->segment == segment) {
insn_end - offset, opy->segment, opy->wrt);
}
} else {
- if (overflow_general(opy->offset, ins->addr_size >> 3) ||
- signed_bits(opy->offset, ins->addr_size) !=
- signed_bits(opy->offset, ea_data.bytes * 8))
+ if (overflow_general(data, ins->addr_size >> 3) ||
+ signed_bits(data, ins->addr_size) !=
+ signed_bits(data, ea_data.bytes * 8))
warn_overflow(ERR_PASS2, ea_data.bytes);
out(offset, segment, &data, OUT_ADDRESS,
{
int rex = 0;
- if (val >= 8)
+ if (val >= 0 && (val & 8))
rex |= REX_B|REX_X|REX_R;
if (flags & BITS64)
rex |= REX_W;
return rex & mask;
}
+static int evexflags(int val, decoflags_t deco,
+ int mask, uint8_t byte)
+{
+ int evex = 0;
+
+ switch (byte) {
+ case 0:
+ if (val >= 0 && (val & 16))
+ evex |= (EVEX_P0RP | EVEX_P0X);
+ break;
+ case 2:
+ if (val >= 0 && (val & 16))
+ evex |= EVEX_P2VP;
+ if (deco & Z)
+ evex |= EVEX_P2Z;
+ if (deco & OPMASK_MASK)
+ evex |= deco & EVEX_P2AAA;
+ break;
+ }
+ return evex & mask;
+}
+
+static int op_evexflags(const operand * o, int mask, uint8_t byte)
+{
+ int val;
+
+ val = nasm_regvals[o->basereg];
+
+ return evexflags(val, o->decoflags, mask, byte);
+}
+
static enum match_result find_match(const struct itemplate **tempp,
insn *instruction,
int32_t segment, int64_t offset, int bits)
enum match_result m, merr;
opflags_t xsizeflags[MAX_OPERANDS];
bool opsizemissing = false;
+ int8_t broadcast = instruction->evex_brerop;
int i;
+ /* broadcasting uses a different data element size */
for (i = 0; i < instruction->operands; i++)
- xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
+ if (i == broadcast)
+ xsizeflags[i] = instruction->oprs[i].decoflags & BRSIZE_MASK;
+ else
+ xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
merr = MERR_INVALOP;
m = MOK_GOOD;
else
m = MERR_INVALOP;
- } else if (m == MERR_OPSIZEMISSING &&
- (temp->flags & IF_SMASK) != IF_SX) {
+ } else if (m == MERR_OPSIZEMISSING && !itemp_has(temp, IF_SX)) {
/*
* Missing operand size and a candidate for fuzzy matching...
*/
for (i = 0; i < temp->operands; i++)
- xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
+ if (i == broadcast)
+ xsizeflags[i] |= temp->deco[i] & BRSIZE_MASK;
+ else
+ xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
opsizemissing = true;
}
if (m > merr)
if ((xsizeflags[i] & (xsizeflags[i]-1)))
goto done; /* No luck */
- instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
+ if (i == broadcast) {
+ instruction->oprs[i].decoflags |= xsizeflags[i];
+ instruction->oprs[i].type |= (xsizeflags[i] == BR_BITS32 ?
+ BITS32 : BITS64);
+ } else {
+ instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
+ }
}
/* Try matching again... */
/*
* Is it legal?
*/
- if (!(optimizing > 0) && (itemp->flags & IF_OPT))
+ if (!(optimizing > 0) && itemp_has(itemp, IF_OPT))
return MERR_INVALOP;
/*
+ * {evex} available?
+ */
+ switch (instruction->prefixes[PPS_VEX]) {
+ case P_EVEX:
+ if (!itemp_has(itemp, IF_EVEX))
+ return MERR_ENCMISMATCH;
+ break;
+ case P_VEX3:
+ case P_VEX2:
+ if (!itemp_has(itemp, IF_VEX))
+ return MERR_ENCMISMATCH;
+ break;
+ default:
+ break;
+ }
+
+ /*
* Check that no spurious colons or TOs are present
*/
for (i = 0; i < itemp->operands; i++)
/*
* Process size flags
*/
- switch (itemp->flags & IF_SMASK) {
- case IF_SB:
+ switch (itemp_smask(itemp)) {
+ case IF_GENBIT(IF_SB):
asize = BITS8;
break;
- case IF_SW:
+ case IF_GENBIT(IF_SW):
asize = BITS16;
break;
- case IF_SD:
+ case IF_GENBIT(IF_SD):
asize = BITS32;
break;
- case IF_SQ:
+ case IF_GENBIT(IF_SQ):
asize = BITS64;
break;
- case IF_SO:
+ case IF_GENBIT(IF_SO):
asize = BITS128;
break;
- case IF_SY:
+ case IF_GENBIT(IF_SY):
asize = BITS256;
break;
- case IF_SZ:
+ case IF_GENBIT(IF_SZ):
+ asize = BITS512;
+ break;
+ case IF_GENBIT(IF_SIZE):
switch (bits) {
case 16:
asize = BITS16;
break;
}
- if (itemp->flags & IF_ARMASK) {
+ if (itemp_armask(itemp)) {
/* S- flags only apply to a specific operand */
- i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
+ i = itemp_arg(itemp);
memset(size, 0, sizeof size);
size[i] = asize;
} else {
*/
for (i = 0; i < itemp->operands; i++) {
opflags_t type = instruction->oprs[i].type;
+ decoflags_t deco = instruction->oprs[i].decoflags;
+ bool is_broadcast = deco & BRDCAST_MASK;
+ uint8_t brcast_num = 0;
+ opflags_t template_opsize, insn_opsize;
+
if (!(type & SIZE_MASK))
type |= size[i];
- if (itemp->opd[i] & ~type & ~SIZE_MASK) {
+ insn_opsize = type & SIZE_MASK;
+ if (!is_broadcast) {
+ template_opsize = itemp->opd[i] & SIZE_MASK;
+ } else {
+ decoflags_t deco_brsize = itemp->deco[i] & BRSIZE_MASK;
+ /*
+ * when broadcasting, the element size depends on
+ * the instruction type. decorator flag should match.
+ */
+
+ if (deco_brsize) {
+ template_opsize = (deco_brsize == BR_BITS32 ? BITS32 : BITS64);
+ /* calculate the proper number : {1to<brcast_num>} */
+ brcast_num = (itemp->opd[i] & SIZE_MASK) / BITS128 *
+ BITS64 / template_opsize * 2;
+ } else {
+ template_opsize = 0;
+ }
+ }
+
+ if ((itemp->opd[i] & ~type & ~SIZE_MASK) ||
+ (deco & ~itemp->deco[i] & ~BRNUM_MASK)) {
return MERR_INVALOP;
- } else if ((itemp->opd[i] & SIZE_MASK) &&
- (itemp->opd[i] & SIZE_MASK) != (type & SIZE_MASK)) {
- if (type & SIZE_MASK) {
- return MERR_INVALOP;
- } else if (!is_class(REGISTER, type)) {
+ } else if (template_opsize) {
+ if (template_opsize != insn_opsize) {
+ if (insn_opsize) {
+ return MERR_INVALOP;
+ } else if (!is_class(REGISTER, type)) {
+ /*
+ * Note: we don't honor extrinsic operand sizes for registers,
+ * so "missing operand size" for a register should be
+ * considered a wildcard match rather than an error.
+ */
+ opsizemissing = true;
+ }
+ } else if (is_broadcast &&
+ (brcast_num !=
+ (8U << ((deco & BRNUM_MASK) >> BRNUM_SHIFT)))) {
/*
- * Note: we don't honor extrinsic operand sizes for registers,
- * so "missing operand size" for a register should be
- * considered a wildcard match rather than an error.
+ * broadcasting opsize matches but the number of repeated memory
+ * element does not match.
+ * if 64b double precision float is broadcasted to zmm (512b),
+ * broadcasting decorator must be {1to8}.
*/
- opsizemissing = true;
+ return MERR_BRNUMMISMATCH;
}
}
}
/*
* Check operand sizes
*/
- if (itemp->flags & (IF_SM | IF_SM2)) {
- oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
+ if (itemp_has(itemp, IF_SM) || itemp_has(itemp, IF_SM2)) {
+ oprs = (itemp_has(itemp, IF_SM2) ? 2 : itemp->operands);
for (i = 0; i < oprs; i++) {
asize = itemp->opd[i] & SIZE_MASK;
if (asize) {
/*
* Check template is okay at the set cpu level
*/
- if (((itemp->flags & IF_PLEVEL) > cpu))
+ if (iflag_cmp_cpu_level(&insns_flags[itemp->iflag_idx], &cpu) > 0)
return MERR_BADCPU;
/*
* Verify the appropriate long mode flag.
*/
- if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
+ if (itemp_has(itemp, (bits == 64 ? IF_NOLONG : IF_LONG)))
return MERR_BADMODE;
/*
* If we have a HLE prefix, look for the NOHLE flag
*/
- if ((itemp->flags & IF_NOHLE) &&
+ if (itemp_has(itemp, IF_NOHLE) &&
(has_prefix(instruction, PPS_REP, P_XACQUIRE) ||
has_prefix(instruction, PPS_REP, P_XRELEASE)))
return MERR_BADHLE;
if ((itemp->code[0] & ~1) == 0370)
return MOK_JUMP;
+ /*
+ * Check if BND prefix is allowed.
+ * Other 0xF2 (REPNE/REPNZ) prefix is prohibited.
+ */
+ if (!itemp_has(itemp, IF_BND) &&
+ (has_prefix(instruction, PPS_REP, P_BND) ||
+ has_prefix(instruction, PPS_REP, P_NOBND)))
+ return MERR_BADBND;
+ else if (itemp_has(itemp, IF_BND) &&
+ (has_prefix(instruction, PPS_REP, P_REPNE) ||
+ has_prefix(instruction, PPS_REP, P_REPNZ)))
+ return MERR_BADREPNE;
+
return MOK_GOOD;
}
+/*
+ * Check if ModR/M.mod should/can be 01.
+ * - EAF_BYTEOFFS is set
+ * - offset can fit in a byte when EVEX is not used
+ * - offset can be compressed when EVEX is used
+ */
+#define IS_MOD_01() (input->eaflags & EAF_BYTEOFFS || \
+ (o >= -128 && o <= 127 && \
+ seg == NO_SEG && !forw_ref && \
+ !(input->eaflags & EAF_WORDOFFS) && \
+ !(ins->rex & REX_EV)) || \
+ (ins->rex & REX_EV && \
+ is_disp8n(input, ins, &output->disp8)))
+
static enum ea_type process_ea(operand *input, ea *output, int bits,
- int addrbits, int rfield, opflags_t rflags)
+ int rfield, opflags_t rflags, insn *ins)
{
bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
+ int addrbits = ins->addr_size;
+ int eaflags = input->eaflags;
output->type = EA_SCALAR;
output->rip = false;
+ output->disp8 = 0;
/* REX flags for the rfield operand */
output->rex |= rexflags(rfield, rflags, REX_R | REX_P | REX_W | REX_H);
+ /* EVEX.R' flag for the REG operand */
+ ins->evex_p[0] |= evexflags(rfield, 0, EVEX_P0RP, 0);
if (is_class(REGISTER, input->type)) {
/*
if (!is_register(input->basereg))
goto err;
- if (!is_class(REG_EA, regflag(input)))
+ if (!is_reg_class(REG_EA, input->basereg))
goto err;
+ /* broadcasting is not available with a direct register operand. */
+ if (input->decoflags & BRDCAST_MASK) {
+ nasm_error(ERR_NONFATAL, "Broadcasting not allowed from a register");
+ goto err;
+ }
+
output->rex |= op_rexflags(input, REX_B | REX_P | REX_W | REX_H);
+ ins->evex_p[0] |= op_evexflags(input, EVEX_P0X, 0);
output->sib_present = false; /* no SIB necessary */
output->bytes = 0; /* no offset necessary either */
output->modrm = GEN_MODRM(3, rfield, nasm_regvals[input->basereg]);
/*
* It's a memory reference.
*/
+
+ /* Embedded rounding or SAE is not available with a mem ref operand. */
+ if (input->decoflags & (ER | SAE)) {
+ nasm_error(ERR_NONFATAL,
+ "Embedded rounding is available only with reg-reg op.");
+ return -1;
+ }
+
if (input->basereg == -1 &&
(input->indexreg == -1 || input->scale == 0)) {
/*
input->type |= MEMORY;
}
- if (input->eaflags & EAF_BYTEOFFS ||
- (input->eaflags & EAF_WORDOFFS &&
+ if (bits == 64 &&
+ !(IP_REL & ~input->type) && (eaflags & EAF_MIB)) {
+ nasm_error(ERR_NONFATAL, "RIP-relative addressing is prohibited for mib.");
+ return -1;
+ }
+
+ if (eaflags & EAF_BYTEOFFS ||
+ (eaflags & EAF_WORDOFFS &&
input->disp_size != (addrbits != 16 ? 32 : 16))) {
nasm_error(ERR_WARNING | ERR_PASS1, "displacement size ignored on absolute address");
}
}
/* if either one are a vector register... */
- if ((ix|bx) & (XMMREG|YMMREG) & ~REG_EA) {
+ if ((ix|bx) & (XMMREG|YMMREG|ZMMREG) & ~REG_EA) {
opflags_t sok = BITS32 | BITS64;
int32_t o = input->offset;
int mod, scale, index, base;
* For a vector SIB, one has to be a vector and the other,
* if present, a GPR. The vector must be the index operand.
*/
- if (it == -1 || (bx & (XMMREG|YMMREG) & ~REG_EA)) {
+ if (it == -1 || (bx & (XMMREG|YMMREG|ZMMREG) & ~REG_EA)) {
if (s == 0)
s = 1;
else if (s != 1)
(addrbits == 64 && !(sok & BITS64)))
goto err;
- output->type = (ix & YMMREG & ~REG_EA)
- ? EA_YMMVSIB : EA_XMMVSIB;
+ output->type = ((ix & ZMMREG & ~REG_EA) ? EA_ZMMVSIB
+ : ((ix & YMMREG & ~REG_EA)
+ ? EA_YMMVSIB : EA_XMMVSIB));
- output->rex |= rexflags(it, ix, REX_X);
- output->rex |= rexflags(bt, bx, REX_B);
+ output->rex |= rexflags(it, ix, REX_X);
+ output->rex |= rexflags(bt, bx, REX_B);
+ ins->evex_p[2] |= evexflags(it, 0, EVEX_P2VP, 2);
index = it & 7; /* it is known to be != -1 */
base = (bt & 7);
if (base != REG_NUM_EBP && o == 0 &&
seg == NO_SEG && !forw_ref &&
- !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
+ !(eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
mod = 0;
- else if (input->eaflags & EAF_BYTEOFFS ||
- (o >= -128 && o <= 127 &&
- seg == NO_SEG && !forw_ref &&
- !(input->eaflags & EAF_WORDOFFS)))
+ else if (IS_MOD_01())
mod = 1;
else
mod = 2;
t = bt, bt = it, it = t;
x = bx, bx = ix, ix = x;
}
- if (bt == it) /* convert EAX+2*EAX to 3*EAX */
- bt = -1, bx = 0, s++;
- if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
+
+ if (bt == -1 && s == 1 && !(hb == i && ht == EAH_NOTBASE)) {
/* make single reg base, unless hint */
bt = it, bx = ix, it = -1, ix = 0;
}
- if (((s == 2 && it != REG_NUM_ESP && !(input->eaflags & EAF_TIMESTWO)) ||
- s == 3 || s == 5 || s == 9) && bt == -1)
- bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
- if (it == -1 && (bt & 7) != REG_NUM_ESP &&
- (input->eaflags & EAF_TIMESTWO))
- it = bt, ix = bx, bt = -1, bx = 0, s = 1;
- /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
+ if (eaflags & EAF_MIB) {
+ /* only for mib operands */
+ if (it == -1 && (hb == b && ht == EAH_NOTBASE)) {
+ /*
+ * make a single reg index [reg*1].
+ * gas uses this form for an explicit index register.
+ */
+ it = bt, ix = bx, bt = -1, bx = 0, s = 1;
+ }
+ if ((ht == EAH_SUMMED) && bt == -1) {
+ /* separate once summed index into [base, index] */
+ bt = it, bx = ix, s--;
+ }
+ } else {
+ if (((s == 2 && it != REG_NUM_ESP &&
+ (!(eaflags & EAF_TIMESTWO) || (ht == EAH_SUMMED))) ||
+ s == 3 || s == 5 || s == 9) && bt == -1) {
+ /* convert 3*EAX to EAX+2*EAX */
+ bt = it, bx = ix, s--;
+ }
+ if (it == -1 && (bt & 7) != REG_NUM_ESP &&
+ (eaflags & EAF_TIMESTWO) &&
+ (hb == b && ht == EAH_NOTBASE)) {
+ /*
+ * convert [NOSPLIT EAX*1]
+ * to sib format with 0x0 displacement - [EAX*1+0].
+ */
+ it = bt, ix = bx, bt = -1, bx = 0, s = 1;
+ }
+ }
if (s == 1 && it == REG_NUM_ESP) {
/* swap ESP into base if scale is 1 */
t = it, it = bt, bt = t;
rm = (bt & 7);
if (rm != REG_NUM_EBP && o == 0 &&
seg == NO_SEG && !forw_ref &&
- !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
+ !(eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
mod = 0;
- else if (input->eaflags & EAF_BYTEOFFS ||
- (o >= -128 && o <= 127 &&
- seg == NO_SEG && !forw_ref &&
- !(input->eaflags & EAF_WORDOFFS)))
+ else if (IS_MOD_01())
mod = 1;
else
mod = 2;
base = (bt & 7);
if (base != REG_NUM_EBP && o == 0 &&
seg == NO_SEG && !forw_ref &&
- !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
+ !(eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
mod = 0;
- else if (input->eaflags & EAF_BYTEOFFS ||
- (o >= -128 && o <= 127 &&
- seg == NO_SEG && !forw_ref &&
- !(input->eaflags & EAF_WORDOFFS)))
+ else if (IS_MOD_01())
mod = 1;
else
mod = 2;
goto err; /* so panic if it does */
if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
- !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
+ !(eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
mod = 0;
- else if (input->eaflags & EAF_BYTEOFFS ||
- (o >= -128 && o <= 127 && seg == NO_SEG &&
- !forw_ref && !(input->eaflags & EAF_WORDOFFS)))
+ else if (IS_MOD_01())
mod = 1;
else
mod = 2;