assemble.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2012 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * assemble.c   code generation for the Netwide Assembler
  36  *
  37  * the actual codes (C syntax, i.e. octal):
  38  * \0            - terminates the code. (Unless it's a literal of course.)
  39  * \1..\4        - that many literal bytes follow in the code stream
  40  * \5            - add 4 to the primary operand number (b, low octdigit)
  41  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  42  * \7            - add 4 to both the primary and the secondary operand number
  43  * \10..\13      - a literal byte follows in the code stream, to be added
  44  *                 to the register value of operand 0..3
  45  * \14..\17      - a signed byte immediate operand, from operand 0..3
  46  * \20..\23      - a byte immediate operand, from operand 0..3
  47  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  48  * \30..\33      - a word immediate operand, from operand 0..3
  49  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  50  *                 assembly mode or the operand-size override on the operand
  51  * \40..\43      - a long immediate operand, from operand 0..3
  52  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  53  *                 depending on the address size of the instruction.
  54  * \50..\53      - a byte relative operand, from operand 0..3
  55  * \54..\57      - a qword immediate operand, from operand 0..3
  56  * \60..\63      - a word relative operand, from operand 0..3
  57  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  58  *                 assembly mode or the operand-size override on the operand
  59  * \70..\73      - a long relative operand, from operand 0..3
  60  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  61  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  62  *                 field the register value of operand b.
  63  * \140..\143    - an immediate word or signed byte for operand 0..3
  64  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  65  *                  is a signed byte rather than a word.  Opcode byte follows.
  66  * \150..\153    - an immediate dword or signed byte for operand 0..3
  67  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  68  *                  is a signed byte rather than a dword.  Opcode byte follows.
  69  * \172\ab       - the register number from operand a in bits 7..4, with
  70  *                 the 4-bit immediate from operand b in bits 3..0.
  71  * \173\xab      - the register number from operand a in bits 7..4, with
  72  *                 the value b in bits 3..0.
  73  * \174..\177    - the register number from operand 0..3 in bits 7..4, and
  74  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  75  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  76  *                 field equal to digit b.
  77  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  78  *                 is not equal to the truncated and sign-extended 32-bit
  79  *                 operand; used for 32-bit immediates in 64-bit mode.
  80  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  81  * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
  82  *                 V field taken from operand 0..3.
  83  * \270          - this instruction uses VEX/XOP rather than REX, with the
  84  *                 V field set to 1111b.
  85  *
  86  * VEX/XOP prefixes are followed by the sequence:
  87  * \tmm\wlp        where mm is the M field; and wlp is:
  88  *                 00 wwl lpp
  89  *                 [l0]  ll = 0 for L = 0 (.128, .lz)
  90  *                 [l1]  ll = 1 for L = 1 (.256)
  91  *                 [lig] ll = 2 for L don't care (always assembled as 0)
  92  *
  93  *                 [w0]  ww = 0 for W = 0
  94  *                 [w1 ] ww = 1 for W = 1
  95  *                 [wig] ww = 2 for W don't care (always assembled as 0)
  96  *                 [ww]  ww = 3 for W used as REX.W
  97  *
  98  * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
  99  *
 100  * \271          - instruction takes XRELEASE (F3) with or without lock
 101  * \272          - instruction takes XACQUIRE/XRELEASE with or without lock
 102  * \273          - instruction takes XACQUIRE/XRELEASE with lock only
 103  * \274..\277    - a signed byte immediate operand, from operand 0..3,
 104  *                 which is to be extended to the operand size.
 105  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
 106  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
 107  * \312          - (disassembler only) invalid with non-default address size.
 108  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
 109  * \314          - (disassembler only) invalid with REX.B
 110  * \315          - (disassembler only) invalid with REX.X
 111  * \316          - (disassembler only) invalid with REX.R
 112  * \317          - (disassembler only) invalid with REX.W
 113  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 114  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 115  * \322          - indicates that this instruction is only valid when the
 116  *                 operand size is the default (instruction to disassembler,
 117  *                 generates no code in the assembler)
 118  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
 119  * \324          - indicates 64-bit operand size requiring REX prefix.
 120  * \325          - instruction which always uses spl/bpl/sil/dil
 121  * \326          - instruction not valid with 0xF3 REP prefix.  Hint for
 122                    disassembler only; for SSE instructions.
 123  * \330          - a literal byte follows in the code stream, to be added
 124  *                 to the condition code value of the instruction.
 125  * \331          - instruction not valid with REP prefix.  Hint for
 126  *                 disassembler only; for SSE instructions.
 127  * \332          - REP prefix (0xF2 byte) used as opcode extension.
 128  * \333          - REP prefix (0xF3 byte) used as opcode extension.
 129  * \334          - LOCK prefix used as REX.R (used in non-64-bit mode)
 130  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
 131  * \336          - force a REP(E) prefix (0xF3) even if not specified.
 132  * \337          - force a REPNE prefix (0xF2) even if not specified.
 133  *                 \336-\337 are still listed as prefixes in the disassembler.
 134  * \340          - reserve <operand 0> bytes of uninitialized storage.
 135  *                 Operand 0 had better be a segmentless constant.
 136  * \341          - this instruction needs a WAIT "prefix"
 137  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 138  *                 (POP is never used for CS) depending on operand 0
 139  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 140  *                 on operand 0
 141  * \360          - no SSE prefix (== \364\331)
 142  * \361          - 66 SSE prefix (== \366\331)
 143  * \362          - F2 SSE prefix (== \364\332)
 144  * \363          - F3 SSE prefix (== \364\333)
 145  * \364          - operand-size prefix (0x66) not permitted
 146  * \365          - address-size prefix (0x67) not permitted
 147  * \366          - operand-size prefix (0x66) used as opcode extension
 148  * \367          - address-size prefix (0x67) used as opcode extension
 149  * \370,\371     - match only if operand 0 meets byte jump criteria.
 150  *                 370 is used for Jcc, 371 is used for JMP.
 151  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 152  *                 used for conditional jump over longer jump
 153  * \374          - this instruction takes an XMM VSIB memory EA
 154  * \375          - this instruction takes an YMM VSIB memory EA
 155  */
 156
 157 #include "compiler.h"
 158
 159 #include <stdio.h>
 160 #include <string.h>
 161 #include <inttypes.h>
 162
 163 #include "nasm.h"
 164 #include "nasmlib.h"
 165 #include "assemble.h"
 166 #include "insns.h"
 167 #include "tables.h"
 168
 169 enum match_result {
 170     /*
 171      * Matching errors.  These should be sorted so that more specific
 172      * errors come later in the sequence.
 173      */
 174     MERR_INVALOP,
 175     MERR_OPSIZEMISSING,
 176     MERR_OPSIZEMISMATCH,
 177     MERR_BADCPU,
 178     MERR_BADMODE,
 179     MERR_BADHLE,
 180     /*
 181      * Matching success; the conditional ones first
 182      */
 183     MOK_JUMP,   /* Matching OK but needs jmp_match() */
 184     MOK_GOOD    /* Matching unconditionally OK */
 185 };
 186
 187 typedef struct {
 188     enum ea_type type;            /* what kind of EA is this? */
 189     int sib_present;              /* is a SIB byte necessary? */
 190     int bytes;                    /* # of bytes of offset needed */
 191     int size;                     /* lazy - this is sib+bytes+1 */
 192     uint8_t modrm, sib, rex, rip; /* the bytes themselves */
 193 } ea;
 194
 195 #define GEN_SIB(scale, index, base)                 \
 196         (((scale) << 6) | ((index) << 3) | ((base)))
 197
 198 #define GEN_MODRM(mod, reg, rm)                     \
 199         (((mod) << 6) | (((reg) & 7) << 3) | ((rm) & 7))
 200
 201 static uint32_t cpu;            /* cpu level received from nasm.c */
 202 static efunc errfunc;
 203 static struct ofmt *outfmt;
 204 static ListGen *list;
 205
 206 static int64_t calcsize(int32_t, int64_t, int, insn *,
 207                         const struct itemplate *);
 208 static void gencode(int32_t segment, int64_t offset, int bits,
 209                     insn * ins, const struct itemplate *temp,
 210                     int64_t insn_end);
 211 static enum match_result find_match(const struct itemplate **tempp,
 212                                     insn *instruction,
 213                                     int32_t segment, int64_t offset, int bits);
 214 static enum match_result matches(const struct itemplate *, insn *, int bits);
 215 static opflags_t regflag(const operand *);
 216 static int32_t regval(const operand *);
 217 static int rexflags(int, opflags_t, int);
 218 static int op_rexflags(const operand *, int);
 219 static void add_asp(insn *, int);
 220
 221 static enum ea_type process_ea(operand *, ea *, int, int, int, opflags_t);
 222
 223 static int has_prefix(insn * ins, enum prefix_pos pos, int prefix)
 224 {
 225     return ins->prefixes[pos] == prefix;
 226 }
 227
 228 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 229 {
 230     if (ins->prefixes[pos])
 231         errfunc(ERR_NONFATAL, "invalid %s prefix",
 232                 prefix_name(ins->prefixes[pos]));
 233 }
 234
 235 static const char *size_name(int size)
 236 {
 237     switch (size) {
 238     case 1:
 239         return "byte";
 240     case 2:
 241         return "word";
 242     case 4:
 243         return "dword";
 244     case 8:
 245         return "qword";
 246     case 10:
 247         return "tword";
 248     case 16:
 249         return "oword";
 250     case 32:
 251         return "yword";
 252     default:
 253         return "???";
 254     }
 255 }
 256
 257 static void warn_overflow(int pass, int size)
 258 {
 259     errfunc(ERR_WARNING | pass | ERR_WARN_NOV,
 260             "%s data exceeds bounds", size_name(size));
 261 }
 262
 263 static void warn_overflow_const(int64_t data, int size)
 264 {
 265     if (overflow_general(data, size))
 266         warn_overflow(ERR_PASS1, size);
 267 }
 268
 269 static void warn_overflow_opd(const struct operand *o, int size)
 270 {
 271     if (o->wrt == NO_SEG && o->segment == NO_SEG) {
 272         if (overflow_general(o->offset, size))
 273             warn_overflow(ERR_PASS2, size);
 274     }
 275 }
 276
 277 /*
 278  * This routine wrappers the real output format's output routine,
 279  * in order to pass a copy of the data off to the listing file
 280  * generator at the same time.
 281  */
 282 static void out(int64_t offset, int32_t segto, const void *data,
 283                 enum out_type type, uint64_t size,
 284                 int32_t segment, int32_t wrt)
 285 {
 286     static int32_t lineno = 0;     /* static!!! */
 287     static char *lnfname = NULL;
 288     uint8_t p[8];
 289
 290     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 291         /*
 292          * This is a non-relocated address, and we're going to
 293          * convert it into RAWDATA format.
 294          */
 295         uint8_t *q = p;
 296
 297         if (size > 8) {
 298             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 299             return;
 300         }
 301
 302         WRITEADDR(q, *(int64_t *)data, size);
 303         data = p;
 304         type = OUT_RAWDATA;
 305     }
 306
 307     list->output(offset, data, type, size);
 308
 309     /*
 310      * this call to src_get determines when we call the
 311      * debug-format-specific "linenum" function
 312      * it updates lineno and lnfname to the current values
 313      * returning 0 if "same as last time", -2 if lnfname
 314      * changed, and the amount by which lineno changed,
 315      * if it did. thus, these variables must be static
 316      */
 317
 318     if (src_get(&lineno, &lnfname))
 319         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 320
 321     outfmt->output(segto, data, type, size, segment, wrt);
 322 }
 323
 324 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 325                       insn * ins, const struct itemplate *temp)
 326 {
 327     int64_t isize;
 328     const uint8_t *code = temp->code;
 329     uint8_t c = code[0];
 330
 331     if (((c & ~1) != 0370) || (ins->oprs[0].type & STRICT))
 332         return false;
 333     if (!optimizing)
 334         return false;
 335     if (optimizing < 0 && c == 0371)
 336         return false;
 337
 338     isize = calcsize(segment, offset, bits, ins, temp);
 339
 340     if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
 341         /* Be optimistic in pass 1 */
 342         return true;
 343
 344     if (ins->oprs[0].segment != segment)
 345         return false;
 346
 347     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 348     return (isize >= -128 && isize <= 127); /* is it byte size? */
 349 }
 350
 351 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 352                  insn * instruction, struct ofmt *output, efunc error,
 353                  ListGen * listgen)
 354 {
 355     const struct itemplate *temp;
 356     int j;
 357     enum match_result m;
 358     int64_t insn_end;
 359     int32_t itimes;
 360     int64_t start = offset;
 361     int64_t wsize;              /* size for DB etc. */
 362
 363     errfunc = error;            /* to pass to other functions */
 364     cpu = cp;
 365     outfmt = output;            /* likewise */
 366     list = listgen;             /* and again */
 367
 368     wsize = idata_bytes(instruction->opcode);
 369     if (wsize == -1)
 370         return 0;
 371
 372     if (wsize) {
 373         extop *e;
 374         int32_t t = instruction->times;
 375         if (t < 0)
 376             errfunc(ERR_PANIC,
 377                     "instruction->times < 0 (%ld) in assemble()", t);
 378
 379         while (t--) {           /* repeat TIMES times */
 380             list_for_each(e, instruction->eops) {
 381                 if (e->type == EOT_DB_NUMBER) {
 382                     if (wsize > 8) {
 383                         errfunc(ERR_NONFATAL,
 384                                 "integer supplied to a DT, DO or DY"
 385                                 " instruction");
 386                     } else {
 387                         out(offset, segment, &e->offset,
 388                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 389                         offset += wsize;
 390                     }
 391                 } else if (e->type == EOT_DB_STRING ||
 392                            e->type == EOT_DB_STRING_FREE) {
 393                     int align;
 394
 395                     out(offset, segment, e->stringval,
 396                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 397                     align = e->stringlen % wsize;
 398
 399                     if (align) {
 400                         align = wsize - align;
 401                         out(offset, segment, zero_buffer,
 402                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 403                     }
 404                     offset += e->stringlen + align;
 405                 }
 406             }
 407             if (t > 0 && t == instruction->times - 1) {
 408                 /*
 409                  * Dummy call to list->output to give the offset to the
 410                  * listing module.
 411                  */
 412                 list->output(offset, NULL, OUT_RAWDATA, 0);
 413                 list->uplevel(LIST_TIMES);
 414             }
 415         }
 416         if (instruction->times > 1)
 417             list->downlevel(LIST_TIMES);
 418         return offset - start;
 419     }
 420
 421     if (instruction->opcode == I_INCBIN) {
 422         const char *fname = instruction->eops->stringval;
 423         FILE *fp;
 424
 425         fp = fopen(fname, "rb");
 426         if (!fp) {
 427             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 428                   fname);
 429         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 430             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 431                   fname);
 432         } else {
 433             static char buf[4096];
 434             size_t t = instruction->times;
 435             size_t base = 0;
 436             size_t len;
 437
 438             len = ftell(fp);
 439             if (instruction->eops->next) {
 440                 base = instruction->eops->next->offset;
 441                 len -= base;
 442                 if (instruction->eops->next->next &&
 443                     len > (size_t)instruction->eops->next->next->offset)
 444                     len = (size_t)instruction->eops->next->next->offset;
 445             }
 446             /*
 447              * Dummy call to list->output to give the offset to the
 448              * listing module.
 449              */
 450             list->output(offset, NULL, OUT_RAWDATA, 0);
 451             list->uplevel(LIST_INCBIN);
 452             while (t--) {
 453                 size_t l;
 454
 455                 fseek(fp, base, SEEK_SET);
 456                 l = len;
 457                 while (l > 0) {
 458                     int32_t m;
 459                     m = fread(buf, 1, l > sizeof(buf) ? sizeof(buf) : l, fp);
 460                     if (!m) {
 461                         /*
 462                          * This shouldn't happen unless the file
 463                          * actually changes while we are reading
 464                          * it.
 465                          */
 466                         error(ERR_NONFATAL,
 467                               "`incbin': unexpected EOF while"
 468                               " reading file `%s'", fname);
 469                         t = 0;  /* Try to exit cleanly */
 470                         break;
 471                     }
 472                     out(offset, segment, buf, OUT_RAWDATA, m,
 473                         NO_SEG, NO_SEG);
 474                     l -= m;
 475                 }
 476             }
 477             list->downlevel(LIST_INCBIN);
 478             if (instruction->times > 1) {
 479                 /*
 480                  * Dummy call to list->output to give the offset to the
 481                  * listing module.
 482                  */
 483                 list->output(offset, NULL, OUT_RAWDATA, 0);
 484                 list->uplevel(LIST_TIMES);
 485                 list->downlevel(LIST_TIMES);
 486             }
 487             fclose(fp);
 488             return instruction->times * len;
 489         }
 490         return 0;               /* if we're here, there's an error */
 491     }
 492
 493     /* Check to see if we need an address-size prefix */
 494     add_asp(instruction, bits);
 495
 496     m = find_match(&temp, instruction, segment, offset, bits);
 497
 498     if (m == MOK_GOOD) {
 499         /* Matches! */
 500         int64_t insn_size = calcsize(segment, offset, bits, instruction, temp);
 501         itimes = instruction->times;
 502         if (insn_size < 0)  /* shouldn't be, on pass two */
 503             error(ERR_PANIC, "errors made it through from pass one");
 504         else
 505             while (itimes--) {
 506                 for (j = 0; j < MAXPREFIX; j++) {
 507                     uint8_t c = 0;
 508                     switch (instruction->prefixes[j]) {
 509                     case P_WAIT:
 510                         c = 0x9B;
 511                         break;
 512                     case P_LOCK:
 513                         c = 0xF0;
 514                         break;
 515                     case P_REPNE:
 516                     case P_REPNZ:
 517                     case P_XACQUIRE:
 518                         c = 0xF2;
 519                         break;
 520                     case P_REPE:
 521                     case P_REPZ:
 522                     case P_REP:
 523                     case P_XRELEASE:
 524                         c = 0xF3;
 525                         break;
 526                     case R_CS:
 527                         if (bits == 64) {
 528                             error(ERR_WARNING | ERR_PASS2,
 529                                   "cs segment base generated, but will be ignored in 64-bit mode");
 530                         }
 531                         c = 0x2E;
 532                         break;
 533                     case R_DS:
 534                         if (bits == 64) {
 535                             error(ERR_WARNING | ERR_PASS2,
 536                                   "ds segment base generated, but will be ignored in 64-bit mode");
 537                         }
 538                         c = 0x3E;
 539                         break;
 540                     case R_ES:
 541                         if (bits == 64) {
 542                             error(ERR_WARNING | ERR_PASS2,
 543                                   "es segment base generated, but will be ignored in 64-bit mode");
 544                         }
 545                         c = 0x26;
 546                         break;
 547                     case R_FS:
 548                         c = 0x64;
 549                         break;
 550                     case R_GS:
 551                         c = 0x65;
 552                         break;
 553                     case R_SS:
 554                         if (bits == 64) {
 555                             error(ERR_WARNING | ERR_PASS2,
 556                                   "ss segment base generated, but will be ignored in 64-bit mode");
 557                         }
 558                         c = 0x36;
 559                         break;
 560                     case R_SEGR6:
 561                     case R_SEGR7:
 562                         error(ERR_NONFATAL,
 563                               "segr6 and segr7 cannot be used as prefixes");
 564                         break;
 565                     case P_A16:
 566                         if (bits == 64) {
 567                             error(ERR_NONFATAL,
 568                                   "16-bit addressing is not supported "
 569                                   "in 64-bit mode");
 570                         } else if (bits != 16)
 571                             c = 0x67;
 572                         break;
 573                     case P_A32:
 574                         if (bits != 32)
 575                             c = 0x67;
 576                         break;
 577                     case P_A64:
 578                         if (bits != 64) {
 579                             error(ERR_NONFATAL,
 580                                   "64-bit addressing is only supported "
 581                                   "in 64-bit mode");
 582                         }
 583                         break;
 584                     case P_ASP:
 585                         c = 0x67;
 586                         break;
 587                     case P_O16:
 588                         if (bits != 16)
 589                             c = 0x66;
 590                         break;
 591                     case P_O32:
 592                         if (bits == 16)
 593                             c = 0x66;
 594                         break;
 595                     case P_O64:
 596                         /* REX.W */
 597                         break;
 598                     case P_OSP:
 599                         c = 0x66;
 600                         break;
 601                     case P_none:
 602                         break;
 603                     default:
 604                         error(ERR_PANIC, "invalid instruction prefix");
 605                     }
 606                     if (c != 0) {
 607                         out(offset, segment, &c, OUT_RAWDATA, 1,
 608                             NO_SEG, NO_SEG);
 609                         offset++;
 610                     }
 611                 }
 612                 insn_end = offset + insn_size;
 613                 gencode(segment, offset, bits, instruction,
 614                         temp, insn_end);
 615                 offset += insn_size;
 616                 if (itimes > 0 && itimes == instruction->times - 1) {
 617                     /*
 618                      * Dummy call to list->output to give the offset to the
 619                      * listing module.
 620                      */
 621                     list->output(offset, NULL, OUT_RAWDATA, 0);
 622                     list->uplevel(LIST_TIMES);
 623                 }
 624             }
 625         if (instruction->times > 1)
 626             list->downlevel(LIST_TIMES);
 627         return offset - start;
 628     } else {
 629         /* No match */
 630         switch (m) {
 631         case MERR_OPSIZEMISSING:
 632             error(ERR_NONFATAL, "operation size not specified");
 633             break;
 634         case MERR_OPSIZEMISMATCH:
 635             error(ERR_NONFATAL, "mismatch in operand sizes");
 636             break;
 637         case MERR_BADCPU:
 638             error(ERR_NONFATAL, "no instruction for this cpu level");
 639             break;
 640         case MERR_BADMODE:
 641             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 642                   bits);
 643             break;
 644         default:
 645             error(ERR_NONFATAL,
 646                   "invalid combination of opcode and operands");
 647             break;
 648         }
 649     }
 650     return 0;
 651 }
 652
 653 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 654                   insn * instruction, efunc error)
 655 {
 656     const struct itemplate *temp;
 657     enum match_result m;
 658
 659     errfunc = error;            /* to pass to other functions */
 660     cpu = cp;
 661
 662     if (instruction->opcode == I_none)
 663         return 0;
 664
 665     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 666         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 667         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 668         instruction->opcode == I_DY) {
 669         extop *e;
 670         int32_t isize, osize, wsize;
 671
 672         isize = 0;
 673         wsize = idata_bytes(instruction->opcode);
 674
 675         list_for_each(e, instruction->eops) {
 676             int32_t align;
 677
 678             osize = 0;
 679             if (e->type == EOT_DB_NUMBER) {
 680                 osize = 1;
 681                 warn_overflow_const(e->offset, wsize);
 682             } else if (e->type == EOT_DB_STRING ||
 683                        e->type == EOT_DB_STRING_FREE)
 684                 osize = e->stringlen;
 685
 686             align = (-osize) % wsize;
 687             if (align < 0)
 688                 align += wsize;
 689             isize += osize + align;
 690         }
 691         return isize * instruction->times;
 692     }
 693
 694     if (instruction->opcode == I_INCBIN) {
 695         const char *fname = instruction->eops->stringval;
 696         FILE *fp;
 697         int64_t val = 0;
 698         size_t len;
 699
 700         fp = fopen(fname, "rb");
 701         if (!fp)
 702             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 703                   fname);
 704         else if (fseek(fp, 0L, SEEK_END) < 0)
 705             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 706                   fname);
 707         else {
 708             len = ftell(fp);
 709             if (instruction->eops->next) {
 710                 len -= instruction->eops->next->offset;
 711                 if (instruction->eops->next->next &&
 712                     len > (size_t)instruction->eops->next->next->offset) {
 713                     len = (size_t)instruction->eops->next->next->offset;
 714                 }
 715             }
 716             val = instruction->times * len;
 717         }
 718         if (fp)
 719             fclose(fp);
 720         return val;
 721     }
 722
 723     /* Check to see if we need an address-size prefix */
 724     add_asp(instruction, bits);
 725
 726     m = find_match(&temp, instruction, segment, offset, bits);
 727     if (m == MOK_GOOD) {
 728         /* we've matched an instruction. */
 729         int64_t isize;
 730         int j;
 731
 732         isize = calcsize(segment, offset, bits, instruction, temp);
 733         if (isize < 0)
 734             return -1;
 735         for (j = 0; j < MAXPREFIX; j++) {
 736             switch (instruction->prefixes[j]) {
 737             case P_A16:
 738                 if (bits != 16)
 739                     isize++;
 740                 break;
 741             case P_A32:
 742                 if (bits != 32)
 743                     isize++;
 744                 break;
 745             case P_O16:
 746                 if (bits != 16)
 747                     isize++;
 748                 break;
 749             case P_O32:
 750                 if (bits == 16)
 751                     isize++;
 752                 break;
 753             case P_A64:
 754             case P_O64:
 755             case P_none:
 756                 break;
 757             default:
 758                 isize++;
 759                 break;
 760             }
 761         }
 762         return isize * instruction->times;
 763     } else {
 764         return -1;                  /* didn't match any instruction */
 765     }
 766 }
 767
 768 static bool possible_sbyte(operand *o)
 769 {
 770     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 771         !(o->opflags & OPFLAG_UNKNOWN) &&
 772         optimizing >= 0 && !(o->type & STRICT);
 773 }
 774
 775 /* check that opn[op]  is a signed byte of size 16 or 32 */
 776 static bool is_sbyte16(operand *o)
 777 {
 778     int16_t v;
 779
 780     if (!possible_sbyte(o))
 781         return false;
 782
 783     v = o->offset;
 784     return v >= -128 && v <= 127;
 785 }
 786
 787 static bool is_sbyte32(operand *o)
 788 {
 789     int32_t v;
 790
 791     if (!possible_sbyte(o))
 792         return false;
 793
 794     v = o->offset;
 795     return v >= -128 && v <= 127;
 796 }
 797
 798 static void bad_hle_warn(const insn * ins, uint8_t hleok)
 799 {
 800     enum prefixes rep_pfx = ins->prefixes[PPS_REP];
 801     enum whatwarn { w_none, w_lock, w_inval } ww;
 802     static const enum whatwarn warn[2][4] =
 803     {
 804         { w_inval, w_inval, w_none, w_lock }, /* XACQUIRE */
 805         { w_inval, w_none,  w_none, w_lock }, /* XRELEASE */
 806     };
 807     unsigned int n;
 808
 809     n = (unsigned int)rep_pfx - P_XACQUIRE;
 810     if (n > 1)
 811         return;                 /* Not XACQUIRE/XRELEASE */
 812
 813     ww = warn[n][hleok];
 814     if (!is_class(MEMORY, ins->oprs[0].type))
 815         ww = w_inval;           /* HLE requires operand 0 to be memory */
 816
 817     switch (ww) {
 818     case w_none:
 819         break;
 820
 821     case w_lock:
 822         if (ins->prefixes[PPS_LOCK] != P_LOCK) {
 823             errfunc(ERR_WARNING | ERR_WARN_HLE | ERR_PASS2,
 824                     "%s with this instruction requires lock",
 825                     prefix_name(rep_pfx));
 826         }
 827         break;
 828
 829     case w_inval:
 830         errfunc(ERR_WARNING | ERR_WARN_HLE | ERR_PASS2,
 831                 "%s invalid with this instruction",
 832                 prefix_name(rep_pfx));
 833         break;
 834     }
 835 }
 836
 837 /* Common construct */
 838 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 839
 840 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 841                         insn * ins, const struct itemplate *temp)
 842 {
 843     const uint8_t *codes = temp->code;
 844     int64_t length = 0;
 845     uint8_t c;
 846     int rex_mask = ~0;
 847     int op1, op2;
 848     struct operand *opx;
 849     uint8_t opex = 0;
 850     enum ea_type eat;
 851     uint8_t hleok = 0;
 852     bool lockcheck = true;
 853
 854     ins->rex = 0;               /* Ensure REX is reset */
 855     eat = EA_SCALAR;            /* Expect a scalar EA */
 856
 857     if (ins->prefixes[PPS_OSIZE] == P_O64)
 858         ins->rex |= REX_W;
 859
 860     (void)segment;              /* Don't warn that this parameter is unused */
 861     (void)offset;               /* Don't warn that this parameter is unused */
 862
 863     while (*codes) {
 864         c = *codes++;
 865         op1 = (c & 3) + ((opex & 1) << 2);
 866         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 867         opx = &ins->oprs[op1];
 868         opex = 0;               /* For the next iteration */
 869
 870         switch (c) {
 871         case 01:
 872         case 02:
 873         case 03:
 874         case 04:
 875             codes += c, length += c;
 876             break;
 877
 878         case 05:
 879         case 06:
 880         case 07:
 881             opex = c;
 882             break;
 883
 884         case4(010):
 885             ins->rex |=
 886                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 887             codes++, length++;
 888             break;
 889
 890         case4(014):
 891         case4(020):
 892         case4(024):
 893             length++;
 894             break;
 895
 896         case4(030):
 897             length += 2;
 898             break;
 899
 900         case4(034):
 901             if (opx->type & (BITS16 | BITS32 | BITS64))
 902                 length += (opx->type & BITS16) ? 2 : 4;
 903             else
 904                 length += (bits == 16) ? 2 : 4;
 905             break;
 906
 907         case4(040):
 908             length += 4;
 909             break;
 910
 911         case4(044):
 912             length += ins->addr_size >> 3;
 913             break;
 914
 915         case4(050):
 916             length++;
 917             break;
 918
 919         case4(054):
 920             length += 8; /* MOV reg64/imm */
 921             break;
 922
 923         case4(060):
 924             length += 2;
 925             break;
 926
 927         case4(064):
 928             if (opx->type & (BITS16 | BITS32 | BITS64))
 929                 length += (opx->type & BITS16) ? 2 : 4;
 930             else
 931                 length += (bits == 16) ? 2 : 4;
 932             break;
 933
 934         case4(070):
 935             length += 4;
 936             break;
 937
 938         case4(074):
 939             length += 2;
 940             break;
 941
 942         case4(0140):
 943             length += is_sbyte16(opx) ? 1 : 2;
 944             break;
 945
 946         case4(0144):
 947             codes++;
 948             length++;
 949             break;
 950
 951         case4(0150):
 952             length += is_sbyte32(opx) ? 1 : 4;
 953             break;
 954
 955         case4(0154):
 956             codes++;
 957             length++;
 958             break;
 959
 960         case 0172:
 961         case 0173:
 962             codes++;
 963             length++;
 964             break;
 965
 966         case4(0174):
 967             length++;
 968             break;
 969
 970         case4(0250):
 971             length += is_sbyte32(opx) ? 1 : 4;
 972             break;
 973
 974         case4(0254):
 975             length += 4;
 976             break;
 977
 978         case4(0260):
 979             ins->rex |= REX_V;
 980             ins->vexreg = regval(opx);
 981             ins->vex_cm = *codes++;
 982             ins->vex_wlp = *codes++;
 983             break;
 984
 985         case 0270:
 986             ins->rex |= REX_V;
 987             ins->vexreg = 0;
 988             ins->vex_cm = *codes++;
 989             ins->vex_wlp = *codes++;
 990             break;
 991
 992         case 0271:
 993         case 0272:
 994         case 0273:
 995             hleok = c & 3;
 996             break;
 997
 998         case4(0274):
 999             length++;
1000             break;
1001
1002         case4(0300):
1003             break;
1004
1005         case 0310:
1006             if (bits == 64)
1007                 return -1;
1008             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
1009             break;
1010
1011         case 0311:
1012             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
1013             break;
1014
1015         case 0312:
1016             break;
1017
1018         case 0313:
1019             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
1020                 has_prefix(ins, PPS_ASIZE, P_A32))
1021                 return -1;
1022             break;
1023
1024         case4(0314):
1025             break;
1026
1027         case 0320:
1028         {
1029             enum prefixes pfx = ins->prefixes[PPS_OSIZE];
1030             if (pfx == P_O16)
1031                 break;
1032             if (pfx != P_none)
1033                 errfunc(ERR_WARNING | ERR_PASS2, "invalid operand size prefix");
1034             else
1035                 ins->prefixes[PPS_OSIZE] = P_O16;
1036             break;
1037         }
1038
1039         case 0321:
1040         {
1041             enum prefixes pfx = ins->prefixes[PPS_OSIZE];
1042             if (pfx == P_O32)
1043                 break;
1044             if (pfx != P_none)
1045                 errfunc(ERR_WARNING | ERR_PASS2, "invalid operand size prefix");
1046             else
1047                 ins->prefixes[PPS_OSIZE] = P_O32;
1048             break;
1049         }
1050
1051         case 0322:
1052             break;
1053
1054         case 0323:
1055             rex_mask &= ~REX_W;
1056             break;
1057
1058         case 0324:
1059             ins->rex |= REX_W;
1060             break;
1061
1062         case 0325:
1063             ins->rex |= REX_NH;
1064             break;
1065
1066         case 0326:
1067             break;
1068
1069         case 0330:
1070             codes++, length++;
1071             break;
1072
1073         case 0331:
1074             break;
1075
1076         case 0332:
1077         case 0333:
1078             length++;
1079             break;
1080
1081         case 0334:
1082             ins->rex |= REX_L;
1083             break;
1084
1085         case 0335:
1086             break;
1087
1088         case 0336:
1089             if (!ins->prefixes[PPS_REP])
1090                 ins->prefixes[PPS_REP] = P_REP;
1091             break;
1092
1093         case 0337:
1094             if (!ins->prefixes[PPS_REP])
1095                 ins->prefixes[PPS_REP] = P_REPNE;
1096             break;
1097
1098         case 0340:
1099             if (ins->oprs[0].segment != NO_SEG)
1100                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1101                         " quantity of BSS space");
1102             else
1103                 length += ins->oprs[0].offset;
1104             break;
1105
1106         case 0341:
1107             if (!ins->prefixes[PPS_WAIT])
1108                 ins->prefixes[PPS_WAIT] = P_WAIT;
1109             break;
1110
1111         case4(0344):
1112             length++;
1113             break;
1114
1115         case 0360:
1116             break;
1117
1118         case 0361:
1119         case 0362:
1120         case 0363:
1121             length++;
1122             break;
1123
1124         case 0364:
1125         case 0365:
1126             break;
1127
1128         case 0366:
1129         case 0367:
1130             length++;
1131             break;
1132
1133         case 0370:
1134         case 0371:
1135         case 0372:
1136             break;
1137
1138         case 0373:
1139             length++;
1140             break;
1141
1142         case 0374:
1143             eat = EA_XMMVSIB;
1144             break;
1145
1146         case 0375:
1147             eat = EA_YMMVSIB;
1148             break;
1149
1150         case4(0100):
1151         case4(0110):
1152         case4(0120):
1153         case4(0130):
1154         case4(0200):
1155         case4(0204):
1156         case4(0210):
1157         case4(0214):
1158         case4(0220):
1159         case4(0224):
1160         case4(0230):
1161         case4(0234):
1162             {
1163                 ea ea_data;
1164                 int rfield;
1165                 opflags_t rflags;
1166                 struct operand *opy = &ins->oprs[op2];
1167
1168                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1169
1170                 if (c <= 0177) {
1171                     /* pick rfield from operand b (opx) */
1172                     rflags = regflag(opx);
1173                     rfield = nasm_regvals[opx->basereg];
1174                 } else {
1175                     rflags = 0;
1176                     rfield = c & 7;
1177                 }
1178                 if (process_ea(opy, &ea_data, bits,ins->addr_size,
1179                                rfield, rflags) != eat) {
1180                     errfunc(ERR_NONFATAL, "invalid effective address");
1181                     return -1;
1182                 } else {
1183                     ins->rex |= ea_data.rex;
1184                     length += ea_data.size;
1185                 }
1186             }
1187             break;
1188
1189         default:
1190             errfunc(ERR_PANIC, "internal instruction table corrupt"
1191                     ": instruction code \\%o (0x%02X) given", c, c);
1192             break;
1193         }
1194     }
1195
1196     ins->rex &= rex_mask;
1197
1198     if (ins->rex & REX_NH) {
1199         if (ins->rex & REX_H) {
1200             errfunc(ERR_NONFATAL, "instruction cannot use high registers");
1201             return -1;
1202         }
1203         ins->rex &= ~REX_P;        /* Don't force REX prefix due to high reg */
1204     }
1205
1206     if (ins->rex & REX_V) {
1207         int bad32 = REX_R|REX_W|REX_X|REX_B;
1208
1209         if (ins->rex & REX_H) {
1210             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1211             return -1;
1212         }
1213         switch (ins->vex_wlp & 060) {
1214         case 000:
1215         case 040:
1216             ins->rex &= ~REX_W;
1217             break;
1218         case 020:
1219             ins->rex |= REX_W;
1220             bad32 &= ~REX_W;
1221             break;
1222         case 060:
1223             /* Follow REX_W */
1224             break;
1225         }
1226
1227         if (bits != 64 && ((ins->rex & bad32) || ins->vexreg > 7)) {
1228             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1229             return -1;
1230         }
1231         if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B)))
1232             length += 3;
1233         else
1234             length += 2;
1235     } else if (ins->rex & REX_REAL) {
1236         if (ins->rex & REX_H) {
1237             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1238             return -1;
1239         } else if (bits == 64) {
1240             length++;
1241         } else if ((ins->rex & REX_L) &&
1242                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1243                    cpu >= IF_X86_64) {
1244             /* LOCK-as-REX.R */
1245             assert_no_prefix(ins, PPS_LOCK);
1246             lockcheck = false;  /* Already errored, no need for warning */
1247             length++;
1248         } else {
1249             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1250             return -1;
1251         }
1252     }
1253
1254     if (has_prefix(ins, PPS_LOCK, P_LOCK) && lockcheck &&
1255         (!(temp->flags & IF_LOCK) || !is_class(MEMORY, ins->oprs[0].type))) {
1256         errfunc(ERR_WARNING | ERR_WARN_LOCK | ERR_PASS2 ,
1257                 "instruction is not lockable");
1258     }
1259
1260     bad_hle_warn(ins, hleok);
1261
1262     return length;
1263 }
1264
1265 #define EMIT_REX()                                                              \
1266     if (!(ins->rex & REX_V) && (ins->rex & REX_REAL) && (bits == 64)) { \
1267         ins->rex = (ins->rex & REX_REAL)|REX_P;                                 \
1268         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG);        \
1269         ins->rex = 0;                                                           \
1270         offset += 1;                                                            \
1271     }
1272
1273 static void gencode(int32_t segment, int64_t offset, int bits,
1274                     insn * ins, const struct itemplate *temp,
1275                     int64_t insn_end)
1276 {
1277     static const char condval[] = {   /* conditional opcodes */
1278         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1279         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1280         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1281     };
1282     uint8_t c;
1283     uint8_t bytes[4];
1284     int64_t size;
1285     int64_t data;
1286     int op1, op2;
1287     struct operand *opx;
1288     const uint8_t *codes = temp->code;
1289     uint8_t opex = 0;
1290     enum ea_type eat = EA_SCALAR;
1291
1292     while (*codes) {
1293         c = *codes++;
1294         op1 = (c & 3) + ((opex & 1) << 2);
1295         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1296         opx = &ins->oprs[op1];
1297         opex = 0;                /* For the next iteration */
1298
1299         switch (c) {
1300         case 01:
1301         case 02:
1302         case 03:
1303         case 04:
1304             EMIT_REX();
1305             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1306             codes += c;
1307             offset += c;
1308             break;
1309
1310         case 05:
1311         case 06:
1312         case 07:
1313             opex = c;
1314             break;
1315
1316         case4(010):
1317             EMIT_REX();
1318             bytes[0] = *codes++ + (regval(opx) & 7);
1319             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1320             offset += 1;
1321             break;
1322
1323         case4(014):
1324             /*
1325              * The test for BITS8 and SBYTE here is intended to avoid
1326              * warning on optimizer actions due to SBYTE, while still
1327              * warn on explicit BYTE directives.  Also warn, obviously,
1328              * if the optimizer isn't enabled.
1329              */
1330             if (((opx->type & BITS8) ||
1331                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1332                 (opx->offset < -128 || opx->offset > 127)) {
1333                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1334                         "signed byte value exceeds bounds");
1335             }
1336             if (opx->segment != NO_SEG) {
1337                 data = opx->offset;
1338                 out(offset, segment, &data, OUT_ADDRESS, 1,
1339                     opx->segment, opx->wrt);
1340             } else {
1341                 bytes[0] = opx->offset;
1342                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1343                     NO_SEG);
1344             }
1345             offset += 1;
1346             break;
1347
1348         case4(020):
1349             if (opx->offset < -256 || opx->offset > 255) {
1350                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1351                         "byte value exceeds bounds");
1352             }
1353             if (opx->segment != NO_SEG) {
1354                 data = opx->offset;
1355                 out(offset, segment, &data, OUT_ADDRESS, 1,
1356                     opx->segment, opx->wrt);
1357             } else {
1358                 bytes[0] = opx->offset;
1359                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1360                     NO_SEG);
1361             }
1362             offset += 1;
1363             break;
1364
1365         case4(024):
1366             if (opx->offset < 0 || opx->offset > 255)
1367                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1368                         "unsigned byte value exceeds bounds");
1369             if (opx->segment != NO_SEG) {
1370                 data = opx->offset;
1371                 out(offset, segment, &data, OUT_ADDRESS, 1,
1372                     opx->segment, opx->wrt);
1373             } else {
1374                 bytes[0] = opx->offset;
1375                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1376                     NO_SEG);
1377             }
1378             offset += 1;
1379             break;
1380
1381         case4(030):
1382             warn_overflow_opd(opx, 2);
1383             data = opx->offset;
1384             out(offset, segment, &data, OUT_ADDRESS, 2,
1385                 opx->segment, opx->wrt);
1386             offset += 2;
1387             break;
1388
1389         case4(034):
1390             if (opx->type & (BITS16 | BITS32))
1391                 size = (opx->type & BITS16) ? 2 : 4;
1392             else
1393                 size = (bits == 16) ? 2 : 4;
1394             warn_overflow_opd(opx, size);
1395             data = opx->offset;
1396             out(offset, segment, &data, OUT_ADDRESS, size,
1397                 opx->segment, opx->wrt);
1398             offset += size;
1399             break;
1400
1401         case4(040):
1402             warn_overflow_opd(opx, 4);
1403             data = opx->offset;
1404             out(offset, segment, &data, OUT_ADDRESS, 4,
1405                 opx->segment, opx->wrt);
1406             offset += 4;
1407             break;
1408
1409         case4(044):
1410             data = opx->offset;
1411             size = ins->addr_size >> 3;
1412             warn_overflow_opd(opx, size);
1413             out(offset, segment, &data, OUT_ADDRESS, size,
1414                 opx->segment, opx->wrt);
1415             offset += size;
1416             break;
1417
1418         case4(050):
1419             if (opx->segment != segment) {
1420                 data = opx->offset;
1421                 out(offset, segment, &data,
1422                     OUT_REL1ADR, insn_end - offset,
1423                     opx->segment, opx->wrt);
1424             } else {
1425                 data = opx->offset - insn_end;
1426                 if (data > 127 || data < -128)
1427                     errfunc(ERR_NONFATAL, "short jump is out of range");
1428                 out(offset, segment, &data,
1429                     OUT_ADDRESS, 1, NO_SEG, NO_SEG);
1430             }
1431             offset += 1;
1432             break;
1433
1434         case4(054):
1435             data = (int64_t)opx->offset;
1436             out(offset, segment, &data, OUT_ADDRESS, 8,
1437                 opx->segment, opx->wrt);
1438             offset += 8;
1439             break;
1440
1441         case4(060):
1442             if (opx->segment != segment) {
1443                 data = opx->offset;
1444                 out(offset, segment, &data,
1445                     OUT_REL2ADR, insn_end - offset,
1446                     opx->segment, opx->wrt);
1447             } else {
1448                 data = opx->offset - insn_end;
1449                 out(offset, segment, &data,
1450                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1451             }
1452             offset += 2;
1453             break;
1454
1455         case4(064):
1456             if (opx->type & (BITS16 | BITS32 | BITS64))
1457                 size = (opx->type & BITS16) ? 2 : 4;
1458             else
1459                 size = (bits == 16) ? 2 : 4;
1460             if (opx->segment != segment) {
1461                 data = opx->offset;
1462                 out(offset, segment, &data,
1463                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1464                     insn_end - offset, opx->segment, opx->wrt);
1465             } else {
1466                 data = opx->offset - insn_end;
1467                 out(offset, segment, &data,
1468                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1469             }
1470             offset += size;
1471             break;
1472
1473         case4(070):
1474             if (opx->segment != segment) {
1475                 data = opx->offset;
1476                 out(offset, segment, &data,
1477                     OUT_REL4ADR, insn_end - offset,
1478                     opx->segment, opx->wrt);
1479             } else {
1480                 data = opx->offset - insn_end;
1481                 out(offset, segment, &data,
1482                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1483             }
1484             offset += 4;
1485             break;
1486
1487         case4(074):
1488             if (opx->segment == NO_SEG)
1489                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1490                         " relocatable");
1491             data = 0;
1492             out(offset, segment, &data, OUT_ADDRESS, 2,
1493                 outfmt->segbase(1 + opx->segment),
1494                 opx->wrt);
1495             offset += 2;
1496             break;
1497
1498         case4(0140):
1499             data = opx->offset;
1500             warn_overflow_opd(opx, 2);
1501             if (is_sbyte16(opx)) {
1502                 bytes[0] = data;
1503                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1504                     NO_SEG);
1505                 offset++;
1506             } else {
1507                 out(offset, segment, &data, OUT_ADDRESS, 2,
1508                     opx->segment, opx->wrt);
1509                 offset += 2;
1510             }
1511             break;
1512
1513         case4(0144):
1514             EMIT_REX();
1515             bytes[0] = *codes++;
1516             if (is_sbyte16(opx))
1517                 bytes[0] |= 2;  /* s-bit */
1518             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1519             offset++;
1520             break;
1521
1522         case4(0150):
1523             data = opx->offset;
1524             warn_overflow_opd(opx, 4);
1525             if (is_sbyte32(opx)) {
1526                 bytes[0] = data;
1527                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1528                     NO_SEG);
1529                 offset++;
1530             } else {
1531                 out(offset, segment, &data, OUT_ADDRESS, 4,
1532                     opx->segment, opx->wrt);
1533                 offset += 4;
1534             }
1535             break;
1536
1537         case4(0154):
1538             EMIT_REX();
1539             bytes[0] = *codes++;
1540             if (is_sbyte32(opx))
1541                 bytes[0] |= 2;  /* s-bit */
1542             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1543             offset++;
1544             break;
1545
1546         case 0172:
1547             c = *codes++;
1548             opx = &ins->oprs[c >> 3];
1549             bytes[0] = nasm_regvals[opx->basereg] << 4;
1550             opx = &ins->oprs[c & 7];
1551             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1552                 errfunc(ERR_NONFATAL,
1553                         "non-absolute expression not permitted as argument %d",
1554                         c & 7);
1555             } else {
1556                 if (opx->offset & ~15) {
1557                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1558                             "four-bit argument exceeds bounds");
1559                 }
1560                 bytes[0] |= opx->offset & 15;
1561             }
1562             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1563             offset++;
1564             break;
1565
1566         case 0173:
1567             c = *codes++;
1568             opx = &ins->oprs[c >> 4];
1569             bytes[0] = nasm_regvals[opx->basereg] << 4;
1570             bytes[0] |= c & 15;
1571             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1572             offset++;
1573             break;
1574
1575         case4(0174):
1576             bytes[0] = nasm_regvals[opx->basereg] << 4;
1577             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1578             offset++;
1579             break;
1580
1581         case4(0250):
1582             data = opx->offset;
1583             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1584                 (int32_t)data != (int64_t)data) {
1585                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1586                         "signed dword immediate exceeds bounds");
1587             }
1588             if (is_sbyte32(opx)) {
1589                 bytes[0] = data;
1590                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1591                     NO_SEG);
1592                 offset++;
1593             } else {
1594                 out(offset, segment, &data, OUT_ADDRESS, 4,
1595                     opx->segment, opx->wrt);
1596                 offset += 4;
1597             }
1598             break;
1599
1600         case4(0254):
1601             data = opx->offset;
1602             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1603                 (int32_t)data != (int64_t)data) {
1604                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1605                         "signed dword immediate exceeds bounds");
1606             }
1607             out(offset, segment, &data, OUT_ADDRESS, 4,
1608                 opx->segment, opx->wrt);
1609             offset += 4;
1610             break;
1611
1612         case4(0260):
1613         case 0270:
1614             codes += 2;
1615             if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1616                 bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
1617                 bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
1618                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1619                     ((~ins->vexreg & 15)<< 3) | (ins->vex_wlp & 07);
1620                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1621                 offset += 3;
1622             } else {
1623                 bytes[0] = 0xc5;
1624                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1625                     ((~ins->vexreg & 15) << 3) | (ins->vex_wlp & 07);
1626                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1627                 offset += 2;
1628             }
1629             break;
1630
1631         case 0271:
1632         case 0272:
1633         case 0273:
1634             break;
1635
1636         case4(0274):
1637         {
1638             uint64_t uv, um;
1639             int s;
1640
1641             if (ins->rex & REX_W)
1642                 s = 64;
1643             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1644                 s = 16;
1645             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1646                 s = 32;
1647             else
1648                 s = bits;
1649
1650             um = (uint64_t)2 << (s-1);
1651             uv = opx->offset;
1652
1653             if (uv > 127 && uv < (uint64_t)-128 &&
1654                 (uv < um-128 || uv > um-1)) {
1655                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1656                         "signed byte value exceeds bounds");
1657             }
1658             if (opx->segment != NO_SEG) {
1659                 data = uv;
1660                 out(offset, segment, &data, OUT_ADDRESS, 1,
1661                     opx->segment, opx->wrt);
1662             } else {
1663                 bytes[0] = uv;
1664                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1665                     NO_SEG);
1666             }
1667             offset += 1;
1668             break;
1669         }
1670
1671         case4(0300):
1672             break;
1673
1674         case 0310:
1675             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1676                 *bytes = 0x67;
1677                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1678                 offset += 1;
1679             } else
1680                 offset += 0;
1681             break;
1682
1683         case 0311:
1684             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1685                 *bytes = 0x67;
1686                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1687                 offset += 1;
1688             } else
1689                 offset += 0;
1690             break;
1691
1692         case 0312:
1693             break;
1694
1695         case 0313:
1696             ins->rex = 0;
1697             break;
1698
1699         case4(0314):
1700             break;
1701
1702         case 0320:
1703         case 0321:
1704             break;
1705
1706         case 0322:
1707         case 0323:
1708             break;
1709
1710         case 0324:
1711             ins->rex |= REX_W;
1712             break;
1713
1714         case 0325:
1715             break;
1716
1717         case 0326:
1718             break;
1719
1720         case 0330:
1721             *bytes = *codes++ ^ condval[ins->condition];
1722             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1723             offset += 1;
1724             break;
1725
1726         case 0331:
1727             break;
1728
1729         case 0332:
1730         case 0333:
1731             *bytes = c - 0332 + 0xF2;
1732             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1733             offset += 1;
1734             break;
1735
1736         case 0334:
1737             if (ins->rex & REX_R) {
1738                 *bytes = 0xF0;
1739                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1740                 offset += 1;
1741             }
1742             ins->rex &= ~(REX_L|REX_R);
1743             break;
1744
1745         case 0335:
1746             break;
1747
1748         case 0336:
1749         case 0337:
1750             break;
1751
1752         case 0340:
1753             if (ins->oprs[0].segment != NO_SEG)
1754                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1755             else {
1756                 int64_t size = ins->oprs[0].offset;
1757                 if (size > 0)
1758                     out(offset, segment, NULL,
1759                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1760                 offset += size;
1761             }
1762             break;
1763
1764         case 0341:
1765             break;
1766
1767         case 0344:
1768         case 0345:
1769             bytes[0] = c & 1;
1770             switch (ins->oprs[0].basereg) {
1771             case R_CS:
1772                 bytes[0] += 0x0E;
1773                 break;
1774             case R_DS:
1775                 bytes[0] += 0x1E;
1776                 break;
1777             case R_ES:
1778                 bytes[0] += 0x06;
1779                 break;
1780             case R_SS:
1781                 bytes[0] += 0x16;
1782                 break;
1783             default:
1784                 errfunc(ERR_PANIC,
1785                         "bizarre 8086 segment register received");
1786             }
1787             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1788             offset++;
1789             break;
1790
1791         case 0346:
1792         case 0347:
1793             bytes[0] = c & 1;
1794             switch (ins->oprs[0].basereg) {
1795             case R_FS:
1796                 bytes[0] += 0xA0;
1797                 break;
1798             case R_GS:
1799                 bytes[0] += 0xA8;
1800                 break;
1801             default:
1802                 errfunc(ERR_PANIC,
1803                         "bizarre 386 segment register received");
1804             }
1805             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1806             offset++;
1807             break;
1808
1809         case 0360:
1810             break;
1811
1812         case 0361:
1813             bytes[0] = 0x66;
1814             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1815             offset += 1;
1816             break;
1817
1818         case 0362:
1819         case 0363:
1820             bytes[0] = c - 0362 + 0xf2;
1821             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1822             offset += 1;
1823             break;
1824
1825         case 0364:
1826         case 0365:
1827             break;
1828
1829         case 0366:
1830         case 0367:
1831             *bytes = c - 0366 + 0x66;
1832             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1833             offset += 1;
1834             break;
1835
1836         case 0370:
1837         case 0371:
1838             break;
1839
1840         case 0373:
1841             *bytes = bits == 16 ? 3 : 5;
1842             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1843             offset += 1;
1844             break;
1845
1846         case 0374:
1847             eat = EA_XMMVSIB;
1848             break;
1849
1850         case 0375:
1851             eat = EA_YMMVSIB;
1852             break;
1853
1854         case4(0100):
1855         case4(0110):
1856         case4(0120):
1857         case4(0130):
1858         case4(0200):
1859         case4(0204):
1860         case4(0210):
1861         case4(0214):
1862         case4(0220):
1863         case4(0224):
1864         case4(0230):
1865         case4(0234):
1866             {
1867                 ea ea_data;
1868                 int rfield;
1869                 opflags_t rflags;
1870                 uint8_t *p;
1871                 int32_t s;
1872                 struct operand *opy = &ins->oprs[op2];
1873
1874                 if (c <= 0177) {
1875                     /* pick rfield from operand b (opx) */
1876                     rflags = regflag(opx);
1877                     rfield = nasm_regvals[opx->basereg];
1878                 } else {
1879                     /* rfield is constant */
1880                     rflags = 0;
1881                     rfield = c & 7;
1882                 }
1883
1884                 if (process_ea(opy, &ea_data, bits, ins->addr_size,
1885                                rfield, rflags) != eat)
1886                     errfunc(ERR_NONFATAL, "invalid effective address");
1887
1888                 p = bytes;
1889                 *p++ = ea_data.modrm;
1890                 if (ea_data.sib_present)
1891                     *p++ = ea_data.sib;
1892
1893                 s = p - bytes;
1894                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1895
1896                 /*
1897                  * Make sure the address gets the right offset in case
1898                  * the line breaks in the .lst file (BR 1197827)
1899                  */
1900                 offset += s;
1901                 s = 0;
1902
1903                 switch (ea_data.bytes) {
1904                 case 0:
1905                     break;
1906                 case 1:
1907                 case 2:
1908                 case 4:
1909                 case 8:
1910                     data = opy->offset;
1911                     s += ea_data.bytes;
1912                     if (ea_data.rip) {
1913                         if (opy->segment == segment) {
1914                             data -= insn_end;
1915                             if (overflow_signed(data, ea_data.bytes))
1916                                 warn_overflow(ERR_PASS2, ea_data.bytes);
1917                             out(offset, segment, &data, OUT_ADDRESS,
1918                                 ea_data.bytes, NO_SEG, NO_SEG);
1919                         } else {
1920                             /* overflow check in output/linker? */
1921                             out(offset, segment, &data,        OUT_REL4ADR,
1922                                 insn_end - offset, opy->segment, opy->wrt);
1923                         }
1924                     } else {
1925                         if (overflow_general(opy->offset, ins->addr_size >> 3) ||
1926                             signed_bits(opy->offset, ins->addr_size) !=
1927                             signed_bits(opy->offset, ea_data.bytes * 8))
1928                             warn_overflow(ERR_PASS2, ea_data.bytes);
1929
1930                         out(offset, segment, &data, OUT_ADDRESS,
1931                             ea_data.bytes, opy->segment, opy->wrt);
1932                     }
1933                     break;
1934                 default:
1935                     /* Impossible! */
1936                     errfunc(ERR_PANIC,
1937                             "Invalid amount of bytes (%d) for offset?!",
1938                             ea_data.bytes);
1939                     break;
1940                 }
1941                 offset += s;
1942             }
1943             break;
1944
1945         default:
1946             errfunc(ERR_PANIC, "internal instruction table corrupt"
1947                     ": instruction code \\%o (0x%02X) given", c, c);
1948             break;
1949         }
1950     }
1951 }
1952
1953 static opflags_t regflag(const operand * o)
1954 {
1955     if (!is_register(o->basereg))
1956         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1957     return nasm_reg_flags[o->basereg];
1958 }
1959
1960 static int32_t regval(const operand * o)
1961 {
1962     if (!is_register(o->basereg))
1963         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1964     return nasm_regvals[o->basereg];
1965 }
1966
1967 static int op_rexflags(const operand * o, int mask)
1968 {
1969     opflags_t flags;
1970     int val;
1971
1972     if (!is_register(o->basereg))
1973         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1974
1975     flags = nasm_reg_flags[o->basereg];
1976     val = nasm_regvals[o->basereg];
1977
1978     return rexflags(val, flags, mask);
1979 }
1980
1981 static int rexflags(int val, opflags_t flags, int mask)
1982 {
1983     int rex = 0;
1984
1985     if (val >= 8)
1986         rex |= REX_B|REX_X|REX_R;
1987     if (flags & BITS64)
1988         rex |= REX_W;
1989     if (!(REG_HIGH & ~flags))                   /* AH, CH, DH, BH */
1990         rex |= REX_H;
1991     else if (!(REG8 & ~flags) && val >= 4)      /* SPL, BPL, SIL, DIL */
1992         rex |= REX_P;
1993
1994     return rex & mask;
1995 }
1996
1997 static enum match_result find_match(const struct itemplate **tempp,
1998                                     insn *instruction,
1999                                     int32_t segment, int64_t offset, int bits)
2000 {
2001     const struct itemplate *temp;
2002     enum match_result m, merr;
2003     opflags_t xsizeflags[MAX_OPERANDS];
2004     bool opsizemissing = false;
2005     int i;
2006
2007     for (i = 0; i < instruction->operands; i++)
2008         xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
2009
2010     merr = MERR_INVALOP;
2011
2012     for (temp = nasm_instructions[instruction->opcode];
2013          temp->opcode != I_none; temp++) {
2014         m = matches(temp, instruction, bits);
2015         if (m == MOK_JUMP) {
2016             if (jmp_match(segment, offset, bits, instruction, temp))
2017                 m = MOK_GOOD;
2018             else
2019                 m = MERR_INVALOP;
2020         } else if (m == MERR_OPSIZEMISSING &&
2021                    (temp->flags & IF_SMASK) != IF_SX) {
2022             /*
2023              * Missing operand size and a candidate for fuzzy matching...
2024              */
2025             for (i = 0; i < temp->operands; i++) {
2026                 if ((temp->opd[i] & SAME_AS) == 0)
2027                     xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
2028             }
2029             opsizemissing = true;
2030         }
2031         if (m > merr)
2032             merr = m;
2033         if (merr == MOK_GOOD)
2034             goto done;
2035     }
2036
2037     /* No match, but see if we can get a fuzzy operand size match... */
2038     if (!opsizemissing)
2039         goto done;
2040
2041     for (i = 0; i < instruction->operands; i++) {
2042         /*
2043          * We ignore extrinsic operand sizes on registers, so we should
2044          * never try to fuzzy-match on them.  This also resolves the case
2045          * when we have e.g. "xmmrm128" in two different positions.
2046          */
2047         if (is_class(REGISTER, instruction->oprs[i].type))
2048             continue;
2049
2050         /* This tests if xsizeflags[i] has more than one bit set */
2051         if ((xsizeflags[i] & (xsizeflags[i]-1)))
2052             goto done;                /* No luck */
2053
2054         instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
2055     }
2056
2057     /* Try matching again... */
2058     for (temp = nasm_instructions[instruction->opcode];
2059          temp->opcode != I_none; temp++) {
2060         m = matches(temp, instruction, bits);
2061         if (m == MOK_JUMP) {
2062             if (jmp_match(segment, offset, bits, instruction, temp))
2063                 m = MOK_GOOD;
2064             else
2065                 m = MERR_INVALOP;
2066         }
2067         if (m > merr)
2068             merr = m;
2069         if (merr == MOK_GOOD)
2070             goto done;
2071     }
2072
2073 done:
2074     *tempp = temp;
2075     return merr;
2076 }
2077
2078 static enum match_result matches(const struct itemplate *itemp,
2079                                  insn *instruction, int bits)
2080 {
2081     opflags_t size[MAX_OPERANDS], asize;
2082     bool opsizemissing = false;
2083     int i, oprs;
2084
2085     /*
2086      * Check the opcode
2087      */
2088     if (itemp->opcode != instruction->opcode)
2089         return MERR_INVALOP;
2090
2091     /*
2092      * Count the operands
2093      */
2094     if (itemp->operands != instruction->operands)
2095         return MERR_INVALOP;
2096
2097     /*
2098      * Is it legal?
2099      */
2100     if (!(optimizing > 0) && (itemp->flags & IF_OPT))
2101         return MERR_INVALOP;
2102
2103     /*
2104      * Check that no spurious colons or TOs are present
2105      */
2106     for (i = 0; i < itemp->operands; i++)
2107         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
2108             return MERR_INVALOP;
2109
2110     /*
2111      * Process size flags
2112      */
2113     switch (itemp->flags & IF_SMASK) {
2114     case IF_SB:
2115         asize = BITS8;
2116         break;
2117     case IF_SW:
2118         asize = BITS16;
2119         break;
2120     case IF_SD:
2121         asize = BITS32;
2122         break;
2123     case IF_SQ:
2124         asize = BITS64;
2125         break;
2126     case IF_SO:
2127         asize = BITS128;
2128         break;
2129     case IF_SY:
2130         asize = BITS256;
2131         break;
2132     case IF_SZ:
2133         switch (bits) {
2134         case 16:
2135             asize = BITS16;
2136             break;
2137         case 32:
2138             asize = BITS32;
2139             break;
2140         case 64:
2141             asize = BITS64;
2142             break;
2143         default:
2144             asize = 0;
2145             break;
2146         }
2147         break;
2148     default:
2149         asize = 0;
2150         break;
2151     }
2152
2153     if (itemp->flags & IF_ARMASK) {
2154         /* S- flags only apply to a specific operand */
2155         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
2156         memset(size, 0, sizeof size);
2157         size[i] = asize;
2158     } else {
2159         /* S- flags apply to all operands */
2160         for (i = 0; i < MAX_OPERANDS; i++)
2161             size[i] = asize;
2162     }
2163
2164     /*
2165      * Check that the operand flags all match up,
2166      * it's a bit tricky so lets be verbose:
2167      *
2168      * 1) Find out the size of operand. If instruction
2169      *    doesn't have one specified -- we're trying to
2170      *    guess it either from template (IF_S* flag) or
2171      *    from code bits.
2172      *
2173      * 2) If template operand (i) has SAME_AS flag [used for registers only]
2174      *    (ie the same operand as was specified somewhere in template, and
2175      *    this referred operand index is being achieved via ~SAME_AS)
2176      *    we are to be sure that both registers (in template and instruction)
2177      *    do exactly match.
2178      *
2179      * 3) If template operand do not match the instruction OR
2180      *    template has an operand size specified AND this size differ
2181      *    from which instruction has (perhaps we got it from code bits)
2182      *    we are:
2183      *      a)  Check that only size of instruction and operand is differ
2184      *          other characteristics do match
2185      *      b)  Perhaps it's a register specified in instruction so
2186      *          for such a case we just mark that operand as "size
2187      *          missing" and this will turn on fuzzy operand size
2188      *          logic facility (handled by a caller)
2189      */
2190     for (i = 0; i < itemp->operands; i++) {
2191         opflags_t type = instruction->oprs[i].type;
2192         if (!(type & SIZE_MASK))
2193             type |= size[i];
2194
2195         if (itemp->opd[i] & SAME_AS) {
2196             int j = itemp->opd[i] & ~SAME_AS;
2197             if (type != instruction->oprs[j].type ||
2198                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2199                 return MERR_INVALOP;
2200         } else if (itemp->opd[i] & ~type ||
2201             ((itemp->opd[i] & SIZE_MASK) &&
2202              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2203             if ((itemp->opd[i] & ~type & ~SIZE_MASK) || (type & SIZE_MASK)) {
2204                 return MERR_INVALOP;
2205             } else if (!is_class(REGISTER, type)) {
2206                 /*
2207                  * Note: we don't honor extrinsic operand sizes for registers,
2208                  * so "missing operand size" for a register should be
2209                  * considered a wildcard match rather than an error.
2210                  */
2211                 opsizemissing = true;
2212             }
2213         }
2214     }
2215
2216     if (opsizemissing)
2217         return MERR_OPSIZEMISSING;
2218
2219     /*
2220      * Check operand sizes
2221      */
2222     if (itemp->flags & (IF_SM | IF_SM2)) {
2223         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2224         for (i = 0; i < oprs; i++) {
2225             asize = itemp->opd[i] & SIZE_MASK;
2226             if (asize) {
2227                 for (i = 0; i < oprs; i++)
2228                     size[i] = asize;
2229                 break;
2230             }
2231         }
2232     } else {
2233         oprs = itemp->operands;
2234     }
2235
2236     for (i = 0; i < itemp->operands; i++) {
2237         if (!(itemp->opd[i] & SIZE_MASK) &&
2238             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2239             return MERR_OPSIZEMISMATCH;
2240     }
2241
2242     /*
2243      * Check template is okay at the set cpu level
2244      */
2245     if (((itemp->flags & IF_PLEVEL) > cpu))
2246         return MERR_BADCPU;
2247
2248     /*
2249      * Verify the appropriate long mode flag.
2250      */
2251     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2252         return MERR_BADMODE;
2253
2254     /*
2255      * If we have a HLE prefix, look for the NOHLE flag
2256      */
2257     if ((itemp->flags & IF_NOHLE) &&
2258         (has_prefix(instruction, PPS_REP, P_XACQUIRE) ||
2259          has_prefix(instruction, PPS_REP, P_XRELEASE)))
2260         return MERR_BADHLE;
2261
2262     /*
2263      * Check if special handling needed for Jumps
2264      */
2265     if ((itemp->code[0] & ~1) == 0370)
2266         return MOK_JUMP;
2267
2268     return MOK_GOOD;
2269 }
2270
2271 static enum ea_type process_ea(operand *input, ea *output, int bits,
2272                                int addrbits, int rfield, opflags_t rflags)
2273 {
2274     bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
2275
2276     output->type    = EA_SCALAR;
2277     output->rip     = false;
2278
2279     /* REX flags for the rfield operand */
2280     output->rex     |= rexflags(rfield, rflags, REX_R | REX_P | REX_W | REX_H);
2281
2282     if (is_class(REGISTER, input->type)) {
2283         /*
2284          * It's a direct register.
2285          */
2286         opflags_t f;
2287
2288         if (!is_register(input->basereg))
2289             goto err;
2290
2291         f = regflag(input);
2292
2293         if (!is_class(REG_EA, f))
2294             goto err;
2295
2296         output->rex         |= op_rexflags(input, REX_B | REX_P | REX_W | REX_H);
2297         output->sib_present = false;    /* no SIB necessary */
2298         output->bytes       = 0;        /* no offset necessary either */
2299         output->modrm       = GEN_MODRM(3, rfield, nasm_regvals[input->basereg]);
2300     } else {
2301         /*
2302          * It's a memory reference.
2303          */
2304         if (input->basereg == -1 &&
2305             (input->indexreg == -1 || input->scale == 0)) {
2306             /*
2307              * It's a pure offset.
2308              */
2309             if (bits == 64 && ((input->type & IP_REL) == IP_REL) &&
2310                 input->segment == NO_SEG) {
2311                 nasm_error(ERR_WARNING | ERR_PASS1, "absolute address can not be RIP-relative");
2312                 input->type &= ~IP_REL;
2313                 input->type |= MEMORY;
2314             }
2315
2316             if (input->eaflags & EAF_BYTEOFFS ||
2317                 (input->eaflags & EAF_WORDOFFS &&
2318                  input->disp_size != (addrbits != 16 ? 32 : 16))) {
2319                 nasm_error(ERR_WARNING | ERR_PASS1, "displacement size ignored on absolute address");
2320             }
2321
2322             if (bits == 64 && (~input->type & IP_REL)) {
2323                 output->sib_present = true;
2324                 output->sib         = GEN_SIB(0, 4, 5);
2325                 output->bytes       = 4;
2326                 output->modrm       = GEN_MODRM(0, rfield, 4);
2327                 output->rip         = false;
2328             } else {
2329                 output->sib_present = false;
2330                 output->bytes       = (addrbits != 16 ? 4 : 2);
2331                 output->modrm       = GEN_MODRM(0, rfield, (addrbits != 16 ? 5 : 6));
2332                 output->rip         = bits == 64;
2333             }
2334         } else {
2335             /*
2336              * It's an indirection.
2337              */
2338             int i = input->indexreg, b = input->basereg, s = input->scale;
2339             int32_t seg = input->segment;
2340             int hb = input->hintbase, ht = input->hinttype;
2341             int t, it, bt;              /* register numbers */
2342             opflags_t x, ix, bx;        /* register flags */
2343
2344             if (s == 0)
2345                 i = -1;         /* make this easy, at least */
2346
2347             if (is_register(i)) {
2348                 it = nasm_regvals[i];
2349                 ix = nasm_reg_flags[i];
2350             } else {
2351                 it = -1;
2352                 ix = 0;
2353             }
2354
2355             if (is_register(b)) {
2356                 bt = nasm_regvals[b];
2357                 bx = nasm_reg_flags[b];
2358             } else {
2359                 bt = -1;
2360                 bx = 0;
2361             }
2362
2363             /* if either one are a vector register... */
2364             if ((ix|bx) & (XMMREG|YMMREG) & ~REG_EA) {
2365                 opflags_t sok = BITS32 | BITS64;
2366                 int32_t o = input->offset;
2367                 int mod, scale, index, base;
2368
2369                 /*
2370                  * For a vector SIB, one has to be a vector and the other,
2371                  * if present, a GPR.  The vector must be the index operand.
2372                  */
2373                 if (it == -1 || (bx & (XMMREG|YMMREG) & ~REG_EA)) {
2374                     if (s == 0)
2375                         s = 1;
2376                     else if (s != 1)
2377                         goto err;
2378
2379                     t = bt, bt = it, it = t;
2380                     x = bx, bx = ix, ix = x;
2381                 }
2382
2383                 if (bt != -1) {
2384                     if (REG_GPR & ~bx)
2385                         goto err;
2386                     if (!(REG64 & ~bx) || !(REG32 & ~bx))
2387                         sok &= bx;
2388                     else
2389                         goto err;
2390                 }
2391
2392                 /*
2393                  * While we're here, ensure the user didn't specify
2394                  * WORD or QWORD
2395                  */
2396                 if (input->disp_size == 16 || input->disp_size == 64)
2397                     goto err;
2398
2399                 if (addrbits == 16 ||
2400                     (addrbits == 32 && !(sok & BITS32)) ||
2401                     (addrbits == 64 && !(sok & BITS64)))
2402                     goto err;
2403
2404                 output->type = (ix & YMMREG & ~REG_EA)
2405                     ? EA_YMMVSIB : EA_XMMVSIB;
2406
2407                 output->rex |= rexflags(it, ix, REX_X);
2408                 output->rex |= rexflags(bt, bx, REX_B);
2409
2410                 index = it & 7; /* it is known to be != -1 */
2411
2412                 switch (s) {
2413                 case 1:
2414                     scale = 0;
2415                     break;
2416                 case 2:
2417                     scale = 1;
2418                     break;
2419                 case 4:
2420                     scale = 2;
2421                     break;
2422                 case 8:
2423                     scale = 3;
2424                     break;
2425                 default:   /* then what the smeg is it? */
2426                     goto err;    /* panic */
2427                 }
2428
2429                 if (bt == -1) {
2430                     base = 5;
2431                     mod = 0;
2432                 } else {
2433                     base = (bt & 7);
2434                     if (base != REG_NUM_EBP && o == 0 &&
2435                         seg == NO_SEG && !forw_ref &&
2436                         !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2437                         mod = 0;
2438                     else if (input->eaflags & EAF_BYTEOFFS ||
2439                              (o >= -128 && o <= 127 &&
2440                               seg == NO_SEG && !forw_ref &&
2441                               !(input->eaflags & EAF_WORDOFFS)))
2442                         mod = 1;
2443                     else
2444                         mod = 2;
2445                 }
2446
2447                 output->sib_present = true;
2448                 output->bytes       = (bt == -1 || mod == 2 ? 4 : mod);
2449                 output->modrm       = GEN_MODRM(mod, rfield, 4);
2450                 output->sib         = GEN_SIB(scale, index, base);
2451             } else if ((ix|bx) & (BITS32|BITS64)) {
2452                 /*
2453                  * it must be a 32/64-bit memory reference. Firstly we have
2454                  * to check that all registers involved are type E/Rxx.
2455                  */
2456                 opflags_t sok = BITS32 | BITS64;
2457                 int32_t o = input->offset;
2458
2459                 if (it != -1) {
2460                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2461                         sok &= ix;
2462                     else
2463                         goto err;
2464                 }
2465
2466                 if (bt != -1) {
2467                     if (REG_GPR & ~bx)
2468                         goto err; /* Invalid register */
2469                     if (~sok & bx & SIZE_MASK)
2470                         goto err; /* Invalid size */
2471                     sok &= bx;
2472                 }
2473
2474                 /*
2475                  * While we're here, ensure the user didn't specify
2476                  * WORD or QWORD
2477                  */
2478                 if (input->disp_size == 16 || input->disp_size == 64)
2479                     goto err;
2480
2481                 if (addrbits == 16 ||
2482                     (addrbits == 32 && !(sok & BITS32)) ||
2483                     (addrbits == 64 && !(sok & BITS64)))
2484                     goto err;
2485
2486                 /* now reorganize base/index */
2487                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2488                     ((hb == b && ht == EAH_NOTBASE) ||
2489                      (hb == i && ht == EAH_MAKEBASE))) {
2490                     /* swap if hints say so */
2491                     t = bt, bt = it, it = t;
2492                     x = bx, bx = ix, ix = x;
2493                 }
2494                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2495                     bt = -1, bx = 0, s++;
2496                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2497                     /* make single reg base, unless hint */
2498                     bt = it, bx = ix, it = -1, ix = 0;
2499                 }
2500                 if (((s == 2 && it != REG_NUM_ESP && !(input->eaflags & EAF_TIMESTWO)) ||
2501                       s == 3 || s == 5 || s == 9) && bt == -1)
2502                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2503                 if (it == -1 && (bt & 7) != REG_NUM_ESP &&
2504                     (input->eaflags & EAF_TIMESTWO))
2505                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2506                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2507                 if (s == 1 && it == REG_NUM_ESP) {
2508                     /* swap ESP into base if scale is 1 */
2509                     t = it, it = bt, bt = t;
2510                     x = ix, ix = bx, bx = x;
2511                 }
2512                 if (it == REG_NUM_ESP ||
2513                     (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2514                     goto err;        /* wrong, for various reasons */
2515
2516                 output->rex |= rexflags(it, ix, REX_X);
2517                 output->rex |= rexflags(bt, bx, REX_B);
2518
2519                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2520                     /* no SIB needed */
2521                     int mod, rm;
2522
2523                     if (bt == -1) {
2524                         rm = 5;
2525                         mod = 0;
2526                     } else {
2527                         rm = (bt & 7);
2528                         if (rm != REG_NUM_EBP && o == 0 &&
2529                             seg == NO_SEG && !forw_ref &&
2530                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2531                             mod = 0;
2532                         else if (input->eaflags & EAF_BYTEOFFS ||
2533                                  (o >= -128 && o <= 127 &&
2534                                   seg == NO_SEG && !forw_ref &&
2535                                   !(input->eaflags & EAF_WORDOFFS)))
2536                             mod = 1;
2537                         else
2538                             mod = 2;
2539                     }
2540
2541                     output->sib_present = false;
2542                     output->bytes       = (bt == -1 || mod == 2 ? 4 : mod);
2543                     output->modrm       = GEN_MODRM(mod, rfield, rm);
2544                 } else {
2545                     /* we need a SIB */
2546                     int mod, scale, index, base;
2547
2548                     if (it == -1)
2549                         index = 4, s = 1;
2550                     else
2551                         index = (it & 7);
2552
2553                     switch (s) {
2554                     case 1:
2555                         scale = 0;
2556                         break;
2557                     case 2:
2558                         scale = 1;
2559                         break;
2560                     case 4:
2561                         scale = 2;
2562                         break;
2563                     case 8:
2564                         scale = 3;
2565                         break;
2566                     default:   /* then what the smeg is it? */
2567                         goto err;    /* panic */
2568                     }
2569
2570                     if (bt == -1) {
2571                         base = 5;
2572                         mod = 0;
2573                     } else {
2574                         base = (bt & 7);
2575                         if (base != REG_NUM_EBP && o == 0 &&
2576                             seg == NO_SEG && !forw_ref &&
2577                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2578                             mod = 0;
2579                         else if (input->eaflags & EAF_BYTEOFFS ||
2580                                  (o >= -128 && o <= 127 &&
2581                                   seg == NO_SEG && !forw_ref &&
2582                                   !(input->eaflags & EAF_WORDOFFS)))
2583                             mod = 1;
2584                         else
2585                             mod = 2;
2586                     }
2587
2588                     output->sib_present = true;
2589                     output->bytes       = (bt == -1 || mod == 2 ? 4 : mod);
2590                     output->modrm       = GEN_MODRM(mod, rfield, 4);
2591                     output->sib         = GEN_SIB(scale, index, base);
2592                 }
2593             } else {            /* it's 16-bit */
2594                 int mod, rm;
2595                 int16_t o = input->offset;
2596
2597                 /* check for 64-bit long mode */
2598                 if (addrbits == 64)
2599                     goto err;
2600
2601                 /* check all registers are BX, BP, SI or DI */
2602                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI && b != R_DI) ||
2603                     (i != -1 && i != R_BP && i != R_BX && i != R_SI && i != R_DI))
2604                     goto err;
2605
2606                 /* ensure the user didn't specify DWORD/QWORD */
2607                 if (input->disp_size == 32 || input->disp_size == 64)
2608                     goto err;
2609
2610                 if (s != 1 && i != -1)
2611                     goto err;        /* no can do, in 16-bit EA */
2612                 if (b == -1 && i != -1) {
2613                     int tmp = b;
2614                     b = i;
2615                     i = tmp;
2616                 }               /* swap */
2617                 if ((b == R_SI || b == R_DI) && i != -1) {
2618                     int tmp = b;
2619                     b = i;
2620                     i = tmp;
2621                 }
2622                 /* have BX/BP as base, SI/DI index */
2623                 if (b == i)
2624                     goto err;        /* shouldn't ever happen, in theory */
2625                 if (i != -1 && b != -1 &&
2626                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2627                     goto err;        /* invalid combinations */
2628                 if (b == -1)            /* pure offset: handled above */
2629                     goto err;        /* so if it gets to here, panic! */
2630
2631                 rm = -1;
2632                 if (i != -1)
2633                     switch (i * 256 + b) {
2634                     case R_SI * 256 + R_BX:
2635                         rm = 0;
2636                         break;
2637                     case R_DI * 256 + R_BX:
2638                         rm = 1;
2639                         break;
2640                     case R_SI * 256 + R_BP:
2641                         rm = 2;
2642                         break;
2643                     case R_DI * 256 + R_BP:
2644                         rm = 3;
2645                         break;
2646                 } else
2647                     switch (b) {
2648                     case R_SI:
2649                         rm = 4;
2650                         break;
2651                     case R_DI:
2652                         rm = 5;
2653                         break;
2654                     case R_BP:
2655                         rm = 6;
2656                         break;
2657                     case R_BX:
2658                         rm = 7;
2659                         break;
2660                     }
2661                 if (rm == -1)           /* can't happen, in theory */
2662                     goto err;        /* so panic if it does */
2663
2664                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2665                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2666                     mod = 0;
2667                 else if (input->eaflags & EAF_BYTEOFFS ||
2668                          (o >= -128 && o <= 127 && seg == NO_SEG &&
2669                           !forw_ref && !(input->eaflags & EAF_WORDOFFS)))
2670                     mod = 1;
2671                 else
2672                     mod = 2;
2673
2674                 output->sib_present = false;    /* no SIB - it's 16-bit */
2675                 output->bytes       = mod;      /* bytes of offset needed */
2676                 output->modrm       = GEN_MODRM(mod, rfield, rm);
2677             }
2678         }
2679     }
2680
2681     output->size = 1 + output->sib_present + output->bytes;
2682     return output->type;
2683
2684 err:
2685     return output->type = EA_INVALID;
2686 }
2687
2688 static void add_asp(insn *ins, int addrbits)
2689 {
2690     int j, valid;
2691     int defdisp;
2692
2693     valid = (addrbits == 64) ? 64|32 : 32|16;
2694
2695     switch (ins->prefixes[PPS_ASIZE]) {
2696     case P_A16:
2697         valid &= 16;
2698         break;
2699     case P_A32:
2700         valid &= 32;
2701         break;
2702     case P_A64:
2703         valid &= 64;
2704         break;
2705     case P_ASP:
2706         valid &= (addrbits == 32) ? 16 : 32;
2707         break;
2708     default:
2709         break;
2710     }
2711
2712     for (j = 0; j < ins->operands; j++) {
2713         if (is_class(MEMORY, ins->oprs[j].type)) {
2714             opflags_t i, b;
2715
2716             /* Verify as Register */
2717             if (!is_register(ins->oprs[j].indexreg))
2718                 i = 0;
2719             else
2720                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2721
2722             /* Verify as Register */
2723             if (!is_register(ins->oprs[j].basereg))
2724                 b = 0;
2725             else
2726                 b = nasm_reg_flags[ins->oprs[j].basereg];
2727
2728             if (ins->oprs[j].scale == 0)
2729                 i = 0;
2730
2731             if (!i && !b) {
2732                 int ds = ins->oprs[j].disp_size;
2733                 if ((addrbits != 64 && ds > 8) ||
2734                     (addrbits == 64 && ds == 16))
2735                     valid &= ds;
2736             } else {
2737                 if (!(REG16 & ~b))
2738                     valid &= 16;
2739                 if (!(REG32 & ~b))
2740                     valid &= 32;
2741                 if (!(REG64 & ~b))
2742                     valid &= 64;
2743
2744                 if (!(REG16 & ~i))
2745                     valid &= 16;
2746                 if (!(REG32 & ~i))
2747                     valid &= 32;
2748                 if (!(REG64 & ~i))
2749                     valid &= 64;
2750             }
2751         }
2752     }
2753
2754     if (valid & addrbits) {
2755         ins->addr_size = addrbits;
2756     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2757         /* Add an address size prefix */
2758         ins->prefixes[PPS_ASIZE] = (addrbits == 32) ? P_A16 : P_A32;;
2759         ins->addr_size = (addrbits == 32) ? 16 : 32;
2760     } else {
2761         /* Impossible... */
2762         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2763         ins->addr_size = addrbits; /* Error recovery */
2764     }
2765
2766     defdisp = ins->addr_size == 16 ? 16 : 32;
2767
2768     for (j = 0; j < ins->operands; j++) {
2769         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2770             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp) != ins->addr_size) {
2771             /*
2772              * mem_offs sizes must match the address size; if not,
2773              * strip the MEM_OFFS bit and match only EA instructions
2774              */
2775             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2776         }
2777     }
2778 }