assemble.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2011 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * assemble.c   code generation for the Netwide Assembler
  36  *
  37  * the actual codes (C syntax, i.e. octal):
  38  * \0            - terminates the code. (Unless it's a literal of course.)
  39  * \1..\4        - that many literal bytes follow in the code stream
  40  * \5            - add 4 to the primary operand number (b, low octdigit)
  41  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  42  * \7            - add 4 to both the primary and the secondary operand number
  43  * \10..\13      - a literal byte follows in the code stream, to be added
  44  *                 to the register value of operand 0..3
  45  * \14..\17      - a signed byte immediate operand, from operand 0..3
  46  * \20..\23      - a byte immediate operand, from operand 0..3
  47  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  48  * \30..\33      - a word immediate operand, from operand 0..3
  49  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  50  *                 assembly mode or the operand-size override on the operand
  51  * \40..\43      - a long immediate operand, from operand 0..3
  52  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  53  *                 depending on the address size of the instruction.
  54  * \50..\53      - a byte relative operand, from operand 0..3
  55  * \54..\57      - a qword immediate operand, from operand 0..3
  56  * \60..\63      - a word relative operand, from operand 0..3
  57  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  58  *                 assembly mode or the operand-size override on the operand
  59  * \70..\73      - a long relative operand, from operand 0..3
  60  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  61  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  62  *                 field the register value of operand b.
  63  * \140..\143    - an immediate word or signed byte for operand 0..3
  64  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  65  *                  is a signed byte rather than a word.  Opcode byte follows.
  66  * \150..\153    - an immediate dword or signed byte for operand 0..3
  67  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  68  *                  is a signed byte rather than a dword.  Opcode byte follows.
  69  * \172\ab       - the register number from operand a in bits 7..4, with
  70  *                 the 4-bit immediate from operand b in bits 3..0.
  71  * \173\xab      - the register number from operand a in bits 7..4, with
  72  *                 the value b in bits 3..0.
  73  * \174\a        - the register number from operand a in bits 7..4, and
  74  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  75  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  76  *                 field equal to digit b.
  77  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  78  *                 is not equal to the truncated and sign-extended 32-bit
  79  *                 operand; used for 32-bit immediates in 64-bit mode.
  80  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  81  * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
  82  *                 V field taken from operand 0..3.
  83  * \270          - this instruction uses VEX/XOP rather than REX, with the
  84  *                 V field set to 1111b.
  85  *
  86  * VEX/XOP prefixes are followed by the sequence:
  87  * \tmm\wlp        where mm is the M field; and wlp is:
  88  *                 00 wwl lpp
  89  *                 [l0]  ll = 0 for L = 0 (.128, .lz)
  90  *                 [l1]  ll = 1 for L = 1 (.256)
  91  *                 [lig] ll = 2 for L don't care (always assembled as 0)
  92  *
  93  *                 [w0]  ww = 0 for W = 0
  94  *                 [w1 ] ww = 1 for W = 1
  95  *                 [wig] ww = 2 for W don't care (always assembled as 0)
  96  *                 [ww]  ww = 3 for W used as REX.W
  97  *
  98  * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
  99  *
 100  * \274..\277    - a signed byte immediate operand, from operand 0..3,
 101  *                 which is to be extended to the operand size.
 102  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
 103  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
 104  * \312          - (disassembler only) invalid with non-default address size.
 105  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
 106  * \314          - (disassembler only) invalid with REX.B
 107  * \315          - (disassembler only) invalid with REX.X
 108  * \316          - (disassembler only) invalid with REX.R
 109  * \317          - (disassembler only) invalid with REX.W
 110  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 111  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 112  * \322          - indicates that this instruction is only valid when the
 113  *                 operand size is the default (instruction to disassembler,
 114  *                 generates no code in the assembler)
 115  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
 116  * \324          - indicates 64-bit operand size requiring REX prefix.
 117  * \325          - instruction which always uses spl/bpl/sil/dil
 118  * \330          - a literal byte follows in the code stream, to be added
 119  *                 to the condition code value of the instruction.
 120  * \331          - instruction not valid with REP prefix.  Hint for
 121  *                 disassembler only; for SSE instructions.
 122  * \332          - REP prefix (0xF2 byte) used as opcode extension.
 123  * \333          - REP prefix (0xF3 byte) used as opcode extension.
 124  * \334          - LOCK prefix used as REX.R (used in non-64-bit mode)
 125  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
 126  * \336          - force a REP(E) prefix (0xF2) even if not specified.
 127  * \337          - force a REPNE prefix (0xF3) even if not specified.
 128  *                 \336-\337 are still listed as prefixes in the disassembler.
 129  * \340          - reserve <operand 0> bytes of uninitialized storage.
 130  *                 Operand 0 had better be a segmentless constant.
 131  * \341          - this instruction needs a WAIT "prefix"
 132  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 133  *                 (POP is never used for CS) depending on operand 0
 134  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 135  *                 on operand 0
 136  * \360          - no SSE prefix (== \364\331)
 137  * \361          - 66 SSE prefix (== \366\331)
 138  * \362          - F2 SSE prefix (== \364\332)
 139  * \363          - F3 SSE prefix (== \364\333)
 140  * \364          - operand-size prefix (0x66) not permitted
 141  * \365          - address-size prefix (0x67) not permitted
 142  * \366          - operand-size prefix (0x66) used as opcode extension
 143  * \367          - address-size prefix (0x67) used as opcode extension
 144  * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
 145  *                 370 is used for Jcc, 371 is used for JMP.
 146  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 147  *                 used for conditional jump over longer jump
 148  * \374          - this instruction takes an XMM VSIB memory EA
 149  * \375          - this instruction takes an YMM VSIB memory EA
 150  */
 151
 152 #include "compiler.h"
 153
 154 #include <stdio.h>
 155 #include <string.h>
 156 #include <inttypes.h>
 157
 158 #include "nasm.h"
 159 #include "nasmlib.h"
 160 #include "assemble.h"
 161 #include "insns.h"
 162 #include "tables.h"
 163
 164 enum match_result {
 165     /*
 166      * Matching errors.  These should be sorted so that more specific
 167      * errors come later in the sequence.
 168      */
 169     MERR_INVALOP,
 170     MERR_OPSIZEMISSING,
 171     MERR_OPSIZEMISMATCH,
 172     MERR_BADCPU,
 173     MERR_BADMODE,
 174     /*
 175      * Matching success; the conditional ones first
 176      */
 177     MOK_JUMP,   /* Matching OK but needs jmp_match() */
 178     MOK_GOOD    /* Matching unconditionally OK */
 179 };
 180
 181 typedef struct {
 182     enum ea_type type;            /* what kind of EA is this? */
 183     int sib_present;              /* is a SIB byte necessary? */
 184     int bytes;                    /* # of bytes of offset needed */
 185     int size;                     /* lazy - this is sib+bytes+1 */
 186     uint8_t modrm, sib, rex, rip; /* the bytes themselves */
 187 } ea;
 188
 189 static uint32_t cpu;            /* cpu level received from nasm.c */
 190 static efunc errfunc;
 191 static struct ofmt *outfmt;
 192 static ListGen *list;
 193
 194 static int64_t calcsize(int32_t, int64_t, int, insn *, const uint8_t *);
 195 static void gencode(int32_t segment, int64_t offset, int bits,
 196                     insn * ins, const struct itemplate *temp,
 197                     int64_t insn_end);
 198 static enum match_result find_match(const struct itemplate **tempp,
 199                                     insn *instruction,
 200                                     int32_t segment, int64_t offset, int bits);
 201 static enum match_result matches(const struct itemplate *, insn *, int bits);
 202 static opflags_t regflag(const operand *);
 203 static int32_t regval(const operand *);
 204 static int rexflags(int, opflags_t, int);
 205 static int op_rexflags(const operand *, int);
 206 static void add_asp(insn *, int);
 207
 208 static enum ea_type process_ea(operand *, ea *, int, int, int, opflags_t);
 209
 210 static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
 211 {
 212     return ins->prefixes[pos] == prefix;
 213 }
 214
 215 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 216 {
 217     if (ins->prefixes[pos])
 218         errfunc(ERR_NONFATAL, "invalid %s prefix",
 219                 prefix_name(ins->prefixes[pos]));
 220 }
 221
 222 static const char *size_name(int size)
 223 {
 224     switch (size) {
 225     case 1:
 226         return "byte";
 227     case 2:
 228         return "word";
 229     case 4:
 230         return "dword";
 231     case 8:
 232         return "qword";
 233     case 10:
 234         return "tword";
 235     case 16:
 236         return "oword";
 237     case 32:
 238         return "yword";
 239     default:
 240         return "???";
 241     }
 242 }
 243
 244 static void warn_overflow(int pass, int size)
 245 {
 246     errfunc(ERR_WARNING | pass | ERR_WARN_NOV,
 247             "%s data exceeds bounds", size_name(size));
 248 }
 249
 250 static void warn_overflow_const(int64_t data, int size)
 251 {
 252     if (overflow_general(data, size))
 253         warn_overflow(ERR_PASS1, size);
 254 }
 255
 256 static void warn_overflow_opd(const struct operand *o, int size)
 257 {
 258     if (o->wrt == NO_SEG && o->segment == NO_SEG) {
 259         if (overflow_general(o->offset, size))
 260             warn_overflow(ERR_PASS2, size);
 261     }
 262 }
 263
 264 /*
 265  * This routine wrappers the real output format's output routine,
 266  * in order to pass a copy of the data off to the listing file
 267  * generator at the same time.
 268  */
 269 static void out(int64_t offset, int32_t segto, const void *data,
 270                 enum out_type type, uint64_t size,
 271                 int32_t segment, int32_t wrt)
 272 {
 273     static int32_t lineno = 0;     /* static!!! */
 274     static char *lnfname = NULL;
 275     uint8_t p[8];
 276
 277     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 278         /*
 279          * This is a non-relocated address, and we're going to
 280          * convert it into RAWDATA format.
 281          */
 282         uint8_t *q = p;
 283
 284         if (size > 8) {
 285             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 286             return;
 287         }
 288
 289         WRITEADDR(q, *(int64_t *)data, size);
 290         data = p;
 291         type = OUT_RAWDATA;
 292     }
 293
 294     list->output(offset, data, type, size);
 295
 296     /*
 297      * this call to src_get determines when we call the
 298      * debug-format-specific "linenum" function
 299      * it updates lineno and lnfname to the current values
 300      * returning 0 if "same as last time", -2 if lnfname
 301      * changed, and the amount by which lineno changed,
 302      * if it did. thus, these variables must be static
 303      */
 304
 305     if (src_get(&lineno, &lnfname))
 306         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 307
 308     outfmt->output(segto, data, type, size, segment, wrt);
 309 }
 310
 311 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 312                      insn * ins, const uint8_t *code)
 313 {
 314     int64_t isize;
 315     uint8_t c = code[0];
 316
 317     if ((c != 0370 && c != 0371) || (ins->oprs[0].type & STRICT))
 318         return false;
 319     if (!optimizing)
 320         return false;
 321     if (optimizing < 0 && c == 0371)
 322         return false;
 323
 324     isize = calcsize(segment, offset, bits, ins, code);
 325
 326     if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
 327         /* Be optimistic in pass 1 */
 328         return true;
 329
 330     if (ins->oprs[0].segment != segment)
 331         return false;
 332
 333     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 334     return (isize >= -128 && isize <= 127); /* is it byte size? */
 335 }
 336
 337 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 338                  insn * instruction, struct ofmt *output, efunc error,
 339                  ListGen * listgen)
 340 {
 341     const struct itemplate *temp;
 342     int j;
 343     enum match_result m;
 344     int64_t insn_end;
 345     int32_t itimes;
 346     int64_t start = offset;
 347     int64_t wsize;              /* size for DB etc. */
 348
 349     errfunc = error;            /* to pass to other functions */
 350     cpu = cp;
 351     outfmt = output;            /* likewise */
 352     list = listgen;             /* and again */
 353
 354     wsize = idata_bytes(instruction->opcode);
 355     if (wsize == -1)
 356         return 0;
 357
 358     if (wsize) {
 359         extop *e;
 360         int32_t t = instruction->times;
 361         if (t < 0)
 362             errfunc(ERR_PANIC,
 363                     "instruction->times < 0 (%ld) in assemble()", t);
 364
 365         while (t--) {           /* repeat TIMES times */
 366             list_for_each(e, instruction->eops) {
 367                 if (e->type == EOT_DB_NUMBER) {
 368                     if (wsize > 8) {
 369                         errfunc(ERR_NONFATAL,
 370                                 "integer supplied to a DT, DO or DY"
 371                                 " instruction");
 372                     } else {
 373                         out(offset, segment, &e->offset,
 374                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 375                         offset += wsize;
 376                     }
 377                 } else if (e->type == EOT_DB_STRING ||
 378                            e->type == EOT_DB_STRING_FREE) {
 379                     int align;
 380
 381                     out(offset, segment, e->stringval,
 382                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 383                     align = e->stringlen % wsize;
 384
 385                     if (align) {
 386                         align = wsize - align;
 387                         out(offset, segment, zero_buffer,
 388                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 389                     }
 390                     offset += e->stringlen + align;
 391                 }
 392             }
 393             if (t > 0 && t == instruction->times - 1) {
 394                 /*
 395                  * Dummy call to list->output to give the offset to the
 396                  * listing module.
 397                  */
 398                 list->output(offset, NULL, OUT_RAWDATA, 0);
 399                 list->uplevel(LIST_TIMES);
 400             }
 401         }
 402         if (instruction->times > 1)
 403             list->downlevel(LIST_TIMES);
 404         return offset - start;
 405     }
 406
 407     if (instruction->opcode == I_INCBIN) {
 408         const char *fname = instruction->eops->stringval;
 409         FILE *fp;
 410
 411         fp = fopen(fname, "rb");
 412         if (!fp) {
 413             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 414                   fname);
 415         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 416             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 417                   fname);
 418         } else {
 419             static char buf[4096];
 420             size_t t = instruction->times;
 421             size_t base = 0;
 422             size_t len;
 423
 424             len = ftell(fp);
 425             if (instruction->eops->next) {
 426                 base = instruction->eops->next->offset;
 427                 len -= base;
 428                 if (instruction->eops->next->next &&
 429                     len > (size_t)instruction->eops->next->next->offset)
 430                     len = (size_t)instruction->eops->next->next->offset;
 431             }
 432             /*
 433              * Dummy call to list->output to give the offset to the
 434              * listing module.
 435              */
 436             list->output(offset, NULL, OUT_RAWDATA, 0);
 437             list->uplevel(LIST_INCBIN);
 438             while (t--) {
 439                 size_t l;
 440
 441                 fseek(fp, base, SEEK_SET);
 442                 l = len;
 443                 while (l > 0) {
 444                     int32_t m;
 445                     m = fread(buf, 1, l > sizeof(buf) ? sizeof(buf) : l, fp);
 446                     if (!m) {
 447                         /*
 448                          * This shouldn't happen unless the file
 449                          * actually changes while we are reading
 450                          * it.
 451                          */
 452                         error(ERR_NONFATAL,
 453                               "`incbin': unexpected EOF while"
 454                               " reading file `%s'", fname);
 455                         t = 0;  /* Try to exit cleanly */
 456                         break;
 457                     }
 458                     out(offset, segment, buf, OUT_RAWDATA, m,
 459                         NO_SEG, NO_SEG);
 460                     l -= m;
 461                 }
 462             }
 463             list->downlevel(LIST_INCBIN);
 464             if (instruction->times > 1) {
 465                 /*
 466                  * Dummy call to list->output to give the offset to the
 467                  * listing module.
 468                  */
 469                 list->output(offset, NULL, OUT_RAWDATA, 0);
 470                 list->uplevel(LIST_TIMES);
 471                 list->downlevel(LIST_TIMES);
 472             }
 473             fclose(fp);
 474             return instruction->times * len;
 475         }
 476         return 0;               /* if we're here, there's an error */
 477     }
 478
 479     /* Check to see if we need an address-size prefix */
 480     add_asp(instruction, bits);
 481
 482     m = find_match(&temp, instruction, segment, offset, bits);
 483
 484     if (m == MOK_GOOD) {
 485         /* Matches! */
 486         int64_t insn_size = calcsize(segment, offset, bits,
 487                                      instruction, temp->code);
 488         itimes = instruction->times;
 489         if (insn_size < 0)  /* shouldn't be, on pass two */
 490             error(ERR_PANIC, "errors made it through from pass one");
 491         else
 492             while (itimes--) {
 493                 for (j = 0; j < MAXPREFIX; j++) {
 494                     uint8_t c = 0;
 495                     switch (instruction->prefixes[j]) {
 496                     case P_WAIT:
 497                         c = 0x9B;
 498                         break;
 499                     case P_LOCK:
 500                         c = 0xF0;
 501                         break;
 502                     case P_REPNE:
 503                     case P_REPNZ:
 504                         c = 0xF2;
 505                         break;
 506                     case P_REPE:
 507                     case P_REPZ:
 508                     case P_REP:
 509                         c = 0xF3;
 510                         break;
 511                     case R_CS:
 512                         if (bits == 64) {
 513                             error(ERR_WARNING | ERR_PASS2,
 514                                   "cs segment base generated, but will be ignored in 64-bit mode");
 515                         }
 516                         c = 0x2E;
 517                         break;
 518                     case R_DS:
 519                         if (bits == 64) {
 520                             error(ERR_WARNING | ERR_PASS2,
 521                                   "ds segment base generated, but will be ignored in 64-bit mode");
 522                         }
 523                         c = 0x3E;
 524                         break;
 525                     case R_ES:
 526                         if (bits == 64) {
 527                             error(ERR_WARNING | ERR_PASS2,
 528                                   "es segment base generated, but will be ignored in 64-bit mode");
 529                         }
 530                         c = 0x26;
 531                         break;
 532                     case R_FS:
 533                         c = 0x64;
 534                         break;
 535                     case R_GS:
 536                         c = 0x65;
 537                         break;
 538                     case R_SS:
 539                         if (bits == 64) {
 540                             error(ERR_WARNING | ERR_PASS2,
 541                                   "ss segment base generated, but will be ignored in 64-bit mode");
 542                         }
 543                         c = 0x36;
 544                         break;
 545                     case R_SEGR6:
 546                     case R_SEGR7:
 547                         error(ERR_NONFATAL,
 548                               "segr6 and segr7 cannot be used as prefixes");
 549                         break;
 550                     case P_A16:
 551                         if (bits == 64) {
 552                             error(ERR_NONFATAL,
 553                                   "16-bit addressing is not supported "
 554                                   "in 64-bit mode");
 555                         } else if (bits != 16)
 556                             c = 0x67;
 557                         break;
 558                     case P_A32:
 559                         if (bits != 32)
 560                             c = 0x67;
 561                         break;
 562                     case P_A64:
 563                         if (bits != 64) {
 564                             error(ERR_NONFATAL,
 565                                   "64-bit addressing is only supported "
 566                                   "in 64-bit mode");
 567                         }
 568                         break;
 569                     case P_ASP:
 570                         c = 0x67;
 571                         break;
 572                     case P_O16:
 573                         if (bits != 16)
 574                             c = 0x66;
 575                         break;
 576                     case P_O32:
 577                         if (bits == 16)
 578                             c = 0x66;
 579                         break;
 580                     case P_O64:
 581                         /* REX.W */
 582                         break;
 583                     case P_OSP:
 584                         c = 0x66;
 585                         break;
 586                     case P_none:
 587                         break;
 588                     default:
 589                         error(ERR_PANIC, "invalid instruction prefix");
 590                     }
 591                     if (c != 0) {
 592                         out(offset, segment, &c, OUT_RAWDATA, 1,
 593                             NO_SEG, NO_SEG);
 594                         offset++;
 595                     }
 596                 }
 597                 insn_end = offset + insn_size;
 598                 gencode(segment, offset, bits, instruction,
 599                         temp, insn_end);
 600                 offset += insn_size;
 601                 if (itimes > 0 && itimes == instruction->times - 1) {
 602                     /*
 603                      * Dummy call to list->output to give the offset to the
 604                      * listing module.
 605                      */
 606                     list->output(offset, NULL, OUT_RAWDATA, 0);
 607                     list->uplevel(LIST_TIMES);
 608                 }
 609             }
 610         if (instruction->times > 1)
 611             list->downlevel(LIST_TIMES);
 612         return offset - start;
 613     } else {
 614         /* No match */
 615         switch (m) {
 616         case MERR_OPSIZEMISSING:
 617             error(ERR_NONFATAL, "operation size not specified");
 618             break;
 619         case MERR_OPSIZEMISMATCH:
 620             error(ERR_NONFATAL, "mismatch in operand sizes");
 621             break;
 622         case MERR_BADCPU:
 623             error(ERR_NONFATAL, "no instruction for this cpu level");
 624             break;
 625         case MERR_BADMODE:
 626             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 627                   bits);
 628             break;
 629         default:
 630             error(ERR_NONFATAL,
 631                   "invalid combination of opcode and operands");
 632             break;
 633         }
 634     }
 635     return 0;
 636 }
 637
 638 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 639                   insn * instruction, efunc error)
 640 {
 641     const struct itemplate *temp;
 642     enum match_result m;
 643
 644     errfunc = error;            /* to pass to other functions */
 645     cpu = cp;
 646
 647     if (instruction->opcode == I_none)
 648         return 0;
 649
 650     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 651         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 652         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 653         instruction->opcode == I_DY) {
 654         extop *e;
 655         int32_t isize, osize, wsize;
 656
 657         isize = 0;
 658         wsize = idata_bytes(instruction->opcode);
 659
 660         list_for_each(e, instruction->eops) {
 661             int32_t align;
 662
 663             osize = 0;
 664             if (e->type == EOT_DB_NUMBER) {
 665                 osize = 1;
 666                 warn_overflow_const(e->offset, wsize);
 667             } else if (e->type == EOT_DB_STRING ||
 668                        e->type == EOT_DB_STRING_FREE)
 669                 osize = e->stringlen;
 670
 671             align = (-osize) % wsize;
 672             if (align < 0)
 673                 align += wsize;
 674             isize += osize + align;
 675         }
 676         return isize * instruction->times;
 677     }
 678
 679     if (instruction->opcode == I_INCBIN) {
 680         const char *fname = instruction->eops->stringval;
 681         FILE *fp;
 682         int64_t val = 0;
 683         size_t len;
 684
 685         fp = fopen(fname, "rb");
 686         if (!fp)
 687             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 688                   fname);
 689         else if (fseek(fp, 0L, SEEK_END) < 0)
 690             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 691                   fname);
 692         else {
 693             len = ftell(fp);
 694             if (instruction->eops->next) {
 695                 len -= instruction->eops->next->offset;
 696                 if (instruction->eops->next->next &&
 697                     len > (size_t)instruction->eops->next->next->offset) {
 698                     len = (size_t)instruction->eops->next->next->offset;
 699                 }
 700             }
 701             val = instruction->times * len;
 702         }
 703         if (fp)
 704             fclose(fp);
 705         return val;
 706     }
 707
 708     /* Check to see if we need an address-size prefix */
 709     add_asp(instruction, bits);
 710
 711     m = find_match(&temp, instruction, segment, offset, bits);
 712     if (m == MOK_GOOD) {
 713         /* we've matched an instruction. */
 714         int64_t isize;
 715         const uint8_t *codes = temp->code;
 716         int j;
 717
 718         isize = calcsize(segment, offset, bits, instruction, codes);
 719         if (isize < 0)
 720             return -1;
 721         for (j = 0; j < MAXPREFIX; j++) {
 722             switch (instruction->prefixes[j]) {
 723             case P_A16:
 724                 if (bits != 16)
 725                     isize++;
 726                 break;
 727             case P_A32:
 728                 if (bits != 32)
 729                     isize++;
 730                 break;
 731             case P_O16:
 732                 if (bits != 16)
 733                     isize++;
 734                 break;
 735             case P_O32:
 736                 if (bits == 16)
 737                     isize++;
 738                 break;
 739             case P_A64:
 740             case P_O64:
 741             case P_none:
 742                 break;
 743             default:
 744                 isize++;
 745                 break;
 746             }
 747         }
 748         return isize * instruction->times;
 749     } else {
 750         return -1;                  /* didn't match any instruction */
 751     }
 752 }
 753
 754 static bool possible_sbyte(operand *o)
 755 {
 756     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 757         !(o->opflags & OPFLAG_UNKNOWN) &&
 758         optimizing >= 0 && !(o->type & STRICT);
 759 }
 760
 761 /* check that opn[op]  is a signed byte of size 16 or 32 */
 762 static bool is_sbyte16(operand *o)
 763 {
 764     int16_t v;
 765
 766     if (!possible_sbyte(o))
 767         return false;
 768
 769     v = o->offset;
 770     return v >= -128 && v <= 127;
 771 }
 772
 773 static bool is_sbyte32(operand *o)
 774 {
 775     int32_t v;
 776
 777     if (!possible_sbyte(o))
 778         return false;
 779
 780     v = o->offset;
 781     return v >= -128 && v <= 127;
 782 }
 783
 784 /* Common construct */
 785 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 786
 787 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 788                         insn * ins, const uint8_t *codes)
 789 {
 790     int64_t length = 0;
 791     uint8_t c;
 792     int rex_mask = ~0;
 793     int op1, op2;
 794     struct operand *opx;
 795     uint8_t opex = 0;
 796     enum ea_type eat;
 797
 798     ins->rex = 0;               /* Ensure REX is reset */
 799     eat = EA_SCALAR;            /* Expect a scalar EA */
 800
 801     if (ins->prefixes[PPS_OSIZE] == P_O64)
 802         ins->rex |= REX_W;
 803
 804     (void)segment;              /* Don't warn that this parameter is unused */
 805     (void)offset;               /* Don't warn that this parameter is unused */
 806
 807     while (*codes) {
 808         c = *codes++;
 809         op1 = (c & 3) + ((opex & 1) << 2);
 810         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 811         opx = &ins->oprs[op1];
 812         opex = 0;               /* For the next iteration */
 813
 814         switch (c) {
 815         case 01:
 816         case 02:
 817         case 03:
 818         case 04:
 819             codes += c, length += c;
 820             break;
 821
 822         case 05:
 823         case 06:
 824         case 07:
 825             opex = c;
 826             break;
 827
 828         case4(010):
 829             ins->rex |=
 830                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 831             codes++, length++;
 832             break;
 833
 834         case4(014):
 835         case4(020):
 836         case4(024):
 837             length++;
 838             break;
 839
 840         case4(030):
 841             length += 2;
 842             break;
 843
 844         case4(034):
 845             if (opx->type & (BITS16 | BITS32 | BITS64))
 846                 length += (opx->type & BITS16) ? 2 : 4;
 847             else
 848                 length += (bits == 16) ? 2 : 4;
 849             break;
 850
 851         case4(040):
 852             length += 4;
 853             break;
 854
 855         case4(044):
 856             length += ins->addr_size >> 3;
 857             break;
 858
 859         case4(050):
 860             length++;
 861             break;
 862
 863         case4(054):
 864             length += 8; /* MOV reg64/imm */
 865             break;
 866
 867         case4(060):
 868             length += 2;
 869             break;
 870
 871         case4(064):
 872             if (opx->type & (BITS16 | BITS32 | BITS64))
 873                 length += (opx->type & BITS16) ? 2 : 4;
 874             else
 875                 length += (bits == 16) ? 2 : 4;
 876             break;
 877
 878         case4(070):
 879             length += 4;
 880             break;
 881
 882         case4(074):
 883             length += 2;
 884             break;
 885
 886         case4(0140):
 887             length += is_sbyte16(opx) ? 1 : 2;
 888             break;
 889
 890         case4(0144):
 891             codes++;
 892             length++;
 893             break;
 894
 895         case4(0150):
 896             length += is_sbyte32(opx) ? 1 : 4;
 897             break;
 898
 899         case4(0154):
 900             codes++;
 901             length++;
 902             break;
 903
 904         case 0172:
 905         case 0173:
 906         case 0174:
 907             codes++;
 908             length++;
 909             break;
 910
 911         case4(0250):
 912             length += is_sbyte32(opx) ? 1 : 4;
 913             break;
 914
 915         case4(0254):
 916             length += 4;
 917             break;
 918
 919         case4(0260):
 920             ins->rex |= REX_V;
 921             ins->vexreg = regval(opx);
 922             ins->vex_cm = *codes++;
 923             ins->vex_wlp = *codes++;
 924             break;
 925
 926         case 0270:
 927             ins->rex |= REX_V;
 928             ins->vexreg = 0;
 929             ins->vex_cm = *codes++;
 930             ins->vex_wlp = *codes++;
 931             break;
 932
 933         case4(0274):
 934             length++;
 935             break;
 936
 937         case4(0300):
 938             break;
 939
 940         case 0310:
 941             if (bits == 64)
 942                 return -1;
 943             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
 944             break;
 945
 946         case 0311:
 947             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
 948             break;
 949
 950         case 0312:
 951             break;
 952
 953         case 0313:
 954             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
 955                 has_prefix(ins, PPS_ASIZE, P_A32))
 956                 return -1;
 957             break;
 958
 959         case4(0314):
 960             break;
 961
 962         case 0320:
 963         {
 964             enum prefixes pfx = ins->prefixes[PPS_OSIZE];
 965             if (pfx == P_O16)
 966                 break;
 967             if (pfx != P_none)
 968                 errfunc(ERR_WARNING | ERR_PASS2, "invalid operand size prefix");
 969             else
 970                 ins->prefixes[PPS_OSIZE] = P_O16;
 971             break;
 972         }
 973
 974         case 0321:
 975         {
 976             enum prefixes pfx = ins->prefixes[PPS_OSIZE];
 977             if (pfx == P_O32)
 978                 break;
 979             if (pfx != P_none)
 980                 errfunc(ERR_WARNING | ERR_PASS2, "invalid operand size prefix");
 981             else
 982                 ins->prefixes[PPS_OSIZE] = P_O32;
 983             break;
 984         }
 985
 986         case 0322:
 987             break;
 988
 989         case 0323:
 990             rex_mask &= ~REX_W;
 991             break;
 992
 993         case 0324:
 994             ins->rex |= REX_W;
 995             break;
 996
 997         case 0325:
 998             ins->rex |= REX_NH;
 999             break;
1000
1001         case 0330:
1002             codes++, length++;
1003             break;
1004
1005         case 0331:
1006             break;
1007
1008         case 0332:
1009         case 0333:
1010             length++;
1011             break;
1012
1013         case 0334:
1014             ins->rex |= REX_L;
1015             break;
1016
1017         case 0335:
1018             break;
1019
1020         case 0336:
1021             if (!ins->prefixes[PPS_LREP])
1022                 ins->prefixes[PPS_LREP] = P_REP;
1023             break;
1024
1025         case 0337:
1026             if (!ins->prefixes[PPS_LREP])
1027                 ins->prefixes[PPS_LREP] = P_REPNE;
1028             break;
1029
1030         case 0340:
1031             if (ins->oprs[0].segment != NO_SEG)
1032                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1033                         " quantity of BSS space");
1034             else
1035                 length += ins->oprs[0].offset;
1036             break;
1037
1038         case 0341:
1039             if (!ins->prefixes[PPS_WAIT])
1040                 ins->prefixes[PPS_WAIT] = P_WAIT;
1041             break;
1042
1043         case4(0344):
1044             length++;
1045             break;
1046
1047         case 0360:
1048             break;
1049
1050         case 0361:
1051         case 0362:
1052         case 0363:
1053             length++;
1054             break;
1055
1056         case 0364:
1057         case 0365:
1058             break;
1059
1060         case 0366:
1061         case 0367:
1062             length++;
1063             break;
1064
1065         case 0370:
1066         case 0371:
1067         case 0372:
1068             break;
1069
1070         case 0373:
1071             length++;
1072             break;
1073
1074         case 0374:
1075             eat = EA_XMMVSIB;
1076             break;
1077
1078         case 0375:
1079             eat = EA_YMMVSIB;
1080             break;
1081
1082         case4(0100):
1083         case4(0110):
1084         case4(0120):
1085         case4(0130):
1086         case4(0200):
1087         case4(0204):
1088         case4(0210):
1089         case4(0214):
1090         case4(0220):
1091         case4(0224):
1092         case4(0230):
1093         case4(0234):
1094             {
1095                 ea ea_data;
1096                 int rfield;
1097                 opflags_t rflags;
1098                 struct operand *opy = &ins->oprs[op2];
1099
1100                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1101
1102                 if (c <= 0177) {
1103                     /* pick rfield from operand b (opx) */
1104                     rflags = regflag(opx);
1105                     rfield = nasm_regvals[opx->basereg];
1106                 } else {
1107                     rflags = 0;
1108                     rfield = c & 7;
1109                 }
1110                 if (process_ea(opy, &ea_data, bits,ins->addr_size,
1111                                rfield, rflags) != eat) {
1112                     errfunc(ERR_NONFATAL, "invalid effective address");
1113                     return -1;
1114                 } else {
1115                     ins->rex |= ea_data.rex;
1116                     length += ea_data.size;
1117                 }
1118             }
1119             break;
1120
1121         default:
1122             errfunc(ERR_PANIC, "internal instruction table corrupt"
1123                     ": instruction code \\%o (0x%02X) given", c, c);
1124             break;
1125         }
1126     }
1127
1128     ins->rex &= rex_mask;
1129
1130     if (ins->rex & REX_NH) {
1131         if (ins->rex & REX_H) {
1132             errfunc(ERR_NONFATAL, "instruction cannot use high registers");
1133             return -1;
1134         }
1135         ins->rex &= ~REX_P;        /* Don't force REX prefix due to high reg */
1136     }
1137
1138     if (ins->rex & REX_V) {
1139         int bad32 = REX_R|REX_W|REX_X|REX_B;
1140
1141         if (ins->rex & REX_H) {
1142             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1143             return -1;
1144         }
1145         switch (ins->vex_wlp & 060) {
1146         case 000:
1147         case 040:
1148             ins->rex &= ~REX_W;
1149             break;
1150         case 020:
1151             ins->rex |= REX_W;
1152             bad32 &= ~REX_W;
1153             break;
1154         case 060:
1155             /* Follow REX_W */
1156             break;
1157         }
1158
1159         if (bits != 64 && ((ins->rex & bad32) || ins->vexreg > 7)) {
1160             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1161             return -1;
1162         }
1163         if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B)))
1164             length += 3;
1165         else
1166             length += 2;
1167     } else if (ins->rex & REX_REAL) {
1168         if (ins->rex & REX_H) {
1169             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1170             return -1;
1171         } else if (bits == 64) {
1172             length++;
1173         } else if ((ins->rex & REX_L) &&
1174                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1175                    cpu >= IF_X86_64) {
1176             /* LOCK-as-REX.R */
1177             assert_no_prefix(ins, PPS_LREP);
1178             length++;
1179         } else {
1180             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1181             return -1;
1182         }
1183     }
1184
1185     return length;
1186 }
1187
1188 #define EMIT_REX()                                                              \
1189     if (!(ins->rex & REX_V) && (ins->rex & REX_REAL) && (bits == 64)) { \
1190         ins->rex = (ins->rex & REX_REAL)|REX_P;                                 \
1191         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG);        \
1192         ins->rex = 0;                                                           \
1193         offset += 1;                                                            \
1194     }
1195
1196 static void gencode(int32_t segment, int64_t offset, int bits,
1197                     insn * ins, const struct itemplate *temp,
1198                     int64_t insn_end)
1199 {
1200     static const char condval[] = {   /* conditional opcodes */
1201         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1202         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1203         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1204     };
1205     uint8_t c;
1206     uint8_t bytes[4];
1207     int64_t size;
1208     int64_t data;
1209     int op1, op2;
1210     struct operand *opx;
1211     const uint8_t *codes = temp->code;
1212     uint8_t opex = 0;
1213     enum ea_type eat = EA_SCALAR;
1214
1215     while (*codes) {
1216         c = *codes++;
1217         op1 = (c & 3) + ((opex & 1) << 2);
1218         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1219         opx = &ins->oprs[op1];
1220         opex = 0;                /* For the next iteration */
1221
1222         switch (c) {
1223         case 01:
1224         case 02:
1225         case 03:
1226         case 04:
1227             EMIT_REX();
1228             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1229             codes += c;
1230             offset += c;
1231             break;
1232
1233         case 05:
1234         case 06:
1235         case 07:
1236             opex = c;
1237             break;
1238
1239         case4(010):
1240             EMIT_REX();
1241             bytes[0] = *codes++ + (regval(opx) & 7);
1242             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1243             offset += 1;
1244             break;
1245
1246         case4(014):
1247             /*
1248              * The test for BITS8 and SBYTE here is intended to avoid
1249              * warning on optimizer actions due to SBYTE, while still
1250              * warn on explicit BYTE directives.  Also warn, obviously,
1251              * if the optimizer isn't enabled.
1252              */
1253             if (((opx->type & BITS8) ||
1254                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1255                 (opx->offset < -128 || opx->offset > 127)) {
1256                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1257                         "signed byte value exceeds bounds");
1258             }
1259             if (opx->segment != NO_SEG) {
1260                 data = opx->offset;
1261                 out(offset, segment, &data, OUT_ADDRESS, 1,
1262                     opx->segment, opx->wrt);
1263             } else {
1264                 bytes[0] = opx->offset;
1265                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1266                     NO_SEG);
1267             }
1268             offset += 1;
1269             break;
1270
1271         case4(020):
1272             if (opx->offset < -256 || opx->offset > 255) {
1273                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1274                         "byte value exceeds bounds");
1275             }
1276             if (opx->segment != NO_SEG) {
1277                 data = opx->offset;
1278                 out(offset, segment, &data, OUT_ADDRESS, 1,
1279                     opx->segment, opx->wrt);
1280             } else {
1281                 bytes[0] = opx->offset;
1282                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1283                     NO_SEG);
1284             }
1285             offset += 1;
1286             break;
1287
1288         case4(024):
1289             if (opx->offset < 0 || opx->offset > 255)
1290                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1291                         "unsigned byte value exceeds bounds");
1292             if (opx->segment != NO_SEG) {
1293                 data = opx->offset;
1294                 out(offset, segment, &data, OUT_ADDRESS, 1,
1295                     opx->segment, opx->wrt);
1296             } else {
1297                 bytes[0] = opx->offset;
1298                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1299                     NO_SEG);
1300             }
1301             offset += 1;
1302             break;
1303
1304         case4(030):
1305             warn_overflow_opd(opx, 2);
1306             data = opx->offset;
1307             out(offset, segment, &data, OUT_ADDRESS, 2,
1308                 opx->segment, opx->wrt);
1309             offset += 2;
1310             break;
1311
1312         case4(034):
1313             if (opx->type & (BITS16 | BITS32))
1314                 size = (opx->type & BITS16) ? 2 : 4;
1315             else
1316                 size = (bits == 16) ? 2 : 4;
1317             warn_overflow_opd(opx, size);
1318             data = opx->offset;
1319             out(offset, segment, &data, OUT_ADDRESS, size,
1320                 opx->segment, opx->wrt);
1321             offset += size;
1322             break;
1323
1324         case4(040):
1325             warn_overflow_opd(opx, 4);
1326             data = opx->offset;
1327             out(offset, segment, &data, OUT_ADDRESS, 4,
1328                 opx->segment, opx->wrt);
1329             offset += 4;
1330             break;
1331
1332         case4(044):
1333             data = opx->offset;
1334             size = ins->addr_size >> 3;
1335             warn_overflow_opd(opx, size);
1336             out(offset, segment, &data, OUT_ADDRESS, size,
1337                 opx->segment, opx->wrt);
1338             offset += size;
1339             break;
1340
1341         case4(050):
1342             if (opx->segment != segment) {
1343                 data = opx->offset;
1344                 out(offset, segment, &data,
1345                     OUT_REL1ADR, insn_end - offset,
1346                     opx->segment, opx->wrt);
1347             } else {
1348                 data = opx->offset - insn_end;
1349                 if (data > 127 || data < -128)
1350                     errfunc(ERR_NONFATAL, "short jump is out of range");
1351                 out(offset, segment, &data,
1352                     OUT_ADDRESS, 1, NO_SEG, NO_SEG);
1353             }
1354             offset += 1;
1355             break;
1356
1357         case4(054):
1358             data = (int64_t)opx->offset;
1359             out(offset, segment, &data, OUT_ADDRESS, 8,
1360                 opx->segment, opx->wrt);
1361             offset += 8;
1362             break;
1363
1364         case4(060):
1365             if (opx->segment != segment) {
1366                 data = opx->offset;
1367                 out(offset, segment, &data,
1368                     OUT_REL2ADR, insn_end - offset,
1369                     opx->segment, opx->wrt);
1370             } else {
1371                 data = opx->offset - insn_end;
1372                 out(offset, segment, &data,
1373                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1374             }
1375             offset += 2;
1376             break;
1377
1378         case4(064):
1379             if (opx->type & (BITS16 | BITS32 | BITS64))
1380                 size = (opx->type & BITS16) ? 2 : 4;
1381             else
1382                 size = (bits == 16) ? 2 : 4;
1383             if (opx->segment != segment) {
1384                 data = opx->offset;
1385                 out(offset, segment, &data,
1386                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1387                     insn_end - offset, opx->segment, opx->wrt);
1388             } else {
1389                 data = opx->offset - insn_end;
1390                 out(offset, segment, &data,
1391                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1392             }
1393             offset += size;
1394             break;
1395
1396         case4(070):
1397             if (opx->segment != segment) {
1398                 data = opx->offset;
1399                 out(offset, segment, &data,
1400                     OUT_REL4ADR, insn_end - offset,
1401                     opx->segment, opx->wrt);
1402             } else {
1403                 data = opx->offset - insn_end;
1404                 out(offset, segment, &data,
1405                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1406             }
1407             offset += 4;
1408             break;
1409
1410         case4(074):
1411             if (opx->segment == NO_SEG)
1412                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1413                         " relocatable");
1414             data = 0;
1415             out(offset, segment, &data, OUT_ADDRESS, 2,
1416                 outfmt->segbase(1 + opx->segment),
1417                 opx->wrt);
1418             offset += 2;
1419             break;
1420
1421         case4(0140):
1422             data = opx->offset;
1423             warn_overflow_opd(opx, 2);
1424             if (is_sbyte16(opx)) {
1425                 bytes[0] = data;
1426                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1427                     NO_SEG);
1428                 offset++;
1429             } else {
1430                 out(offset, segment, &data, OUT_ADDRESS, 2,
1431                     opx->segment, opx->wrt);
1432                 offset += 2;
1433             }
1434             break;
1435
1436         case4(0144):
1437             EMIT_REX();
1438             bytes[0] = *codes++;
1439             if (is_sbyte16(opx))
1440                 bytes[0] |= 2;  /* s-bit */
1441             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1442             offset++;
1443             break;
1444
1445         case4(0150):
1446             data = opx->offset;
1447             warn_overflow_opd(opx, 4);
1448             if (is_sbyte32(opx)) {
1449                 bytes[0] = data;
1450                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1451                     NO_SEG);
1452                 offset++;
1453             } else {
1454                 out(offset, segment, &data, OUT_ADDRESS, 4,
1455                     opx->segment, opx->wrt);
1456                 offset += 4;
1457             }
1458             break;
1459
1460         case4(0154):
1461             EMIT_REX();
1462             bytes[0] = *codes++;
1463             if (is_sbyte32(opx))
1464                 bytes[0] |= 2;  /* s-bit */
1465             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1466             offset++;
1467             break;
1468
1469         case 0172:
1470             c = *codes++;
1471             opx = &ins->oprs[c >> 3];
1472             bytes[0] = nasm_regvals[opx->basereg] << 4;
1473             opx = &ins->oprs[c & 7];
1474             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1475                 errfunc(ERR_NONFATAL,
1476                         "non-absolute expression not permitted as argument %d",
1477                         c & 7);
1478             } else {
1479                 if (opx->offset & ~15) {
1480                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1481                             "four-bit argument exceeds bounds");
1482                 }
1483                 bytes[0] |= opx->offset & 15;
1484             }
1485             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1486             offset++;
1487             break;
1488
1489         case 0173:
1490             c = *codes++;
1491             opx = &ins->oprs[c >> 4];
1492             bytes[0] = nasm_regvals[opx->basereg] << 4;
1493             bytes[0] |= c & 15;
1494             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1495             offset++;
1496             break;
1497
1498         case 0174:
1499             c = *codes++;
1500             opx = &ins->oprs[c];
1501             bytes[0] = nasm_regvals[opx->basereg] << 4;
1502             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1503             offset++;
1504             break;
1505
1506         case4(0250):
1507             data = opx->offset;
1508             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1509                 (int32_t)data != (int64_t)data) {
1510                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1511                         "signed dword immediate exceeds bounds");
1512             }
1513             if (is_sbyte32(opx)) {
1514                 bytes[0] = data;
1515                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1516                     NO_SEG);
1517                 offset++;
1518             } else {
1519                 out(offset, segment, &data, OUT_ADDRESS, 4,
1520                     opx->segment, opx->wrt);
1521                 offset += 4;
1522             }
1523             break;
1524
1525         case4(0254):
1526             data = opx->offset;
1527             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1528                 (int32_t)data != (int64_t)data) {
1529                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1530                         "signed dword immediate exceeds bounds");
1531             }
1532             out(offset, segment, &data, OUT_ADDRESS, 4,
1533                 opx->segment, opx->wrt);
1534             offset += 4;
1535             break;
1536
1537         case4(0260):
1538         case 0270:
1539             codes += 2;
1540             if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1541                 bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
1542                 bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
1543                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1544                     ((~ins->vexreg & 15)<< 3) | (ins->vex_wlp & 07);
1545                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1546                 offset += 3;
1547             } else {
1548                 bytes[0] = 0xc5;
1549                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1550                     ((~ins->vexreg & 15) << 3) | (ins->vex_wlp & 07);
1551                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1552                 offset += 2;
1553             }
1554             break;
1555
1556         case4(0274):
1557         {
1558             uint64_t uv, um;
1559             int s;
1560
1561             if (ins->rex & REX_W)
1562                 s = 64;
1563             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1564                 s = 16;
1565             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1566                 s = 32;
1567             else
1568                 s = bits;
1569
1570             um = (uint64_t)2 << (s-1);
1571             uv = opx->offset;
1572
1573             if (uv > 127 && uv < (uint64_t)-128 &&
1574                 (uv < um-128 || uv > um-1)) {
1575                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1576                         "signed byte value exceeds bounds");
1577             }
1578             if (opx->segment != NO_SEG) {
1579                 data = uv;
1580                 out(offset, segment, &data, OUT_ADDRESS, 1,
1581                     opx->segment, opx->wrt);
1582             } else {
1583                 bytes[0] = uv;
1584                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1585                     NO_SEG);
1586             }
1587             offset += 1;
1588             break;
1589         }
1590
1591         case4(0300):
1592             break;
1593
1594         case 0310:
1595             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1596                 *bytes = 0x67;
1597                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1598                 offset += 1;
1599             } else
1600                 offset += 0;
1601             break;
1602
1603         case 0311:
1604             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1605                 *bytes = 0x67;
1606                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1607                 offset += 1;
1608             } else
1609                 offset += 0;
1610             break;
1611
1612         case 0312:
1613             break;
1614
1615         case 0313:
1616             ins->rex = 0;
1617             break;
1618
1619         case4(0314):
1620             break;
1621
1622         case 0320:
1623         case 0321:
1624             break;
1625
1626         case 0322:
1627         case 0323:
1628             break;
1629
1630         case 0324:
1631             ins->rex |= REX_W;
1632             break;
1633
1634         case 0325:
1635             break;
1636
1637         case 0330:
1638             *bytes = *codes++ ^ condval[ins->condition];
1639             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1640             offset += 1;
1641             break;
1642
1643         case 0331:
1644             break;
1645
1646         case 0332:
1647         case 0333:
1648             *bytes = c - 0332 + 0xF2;
1649             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1650             offset += 1;
1651             break;
1652
1653         case 0334:
1654             if (ins->rex & REX_R) {
1655                 *bytes = 0xF0;
1656                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1657                 offset += 1;
1658             }
1659             ins->rex &= ~(REX_L|REX_R);
1660             break;
1661
1662         case 0335:
1663             break;
1664
1665         case 0336:
1666         case 0337:
1667             break;
1668
1669         case 0340:
1670             if (ins->oprs[0].segment != NO_SEG)
1671                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1672             else {
1673                 int64_t size = ins->oprs[0].offset;
1674                 if (size > 0)
1675                     out(offset, segment, NULL,
1676                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1677                 offset += size;
1678             }
1679             break;
1680
1681         case 0341:
1682             break;
1683
1684         case 0344:
1685         case 0345:
1686             bytes[0] = c & 1;
1687             switch (ins->oprs[0].basereg) {
1688             case R_CS:
1689                 bytes[0] += 0x0E;
1690                 break;
1691             case R_DS:
1692                 bytes[0] += 0x1E;
1693                 break;
1694             case R_ES:
1695                 bytes[0] += 0x06;
1696                 break;
1697             case R_SS:
1698                 bytes[0] += 0x16;
1699                 break;
1700             default:
1701                 errfunc(ERR_PANIC,
1702                         "bizarre 8086 segment register received");
1703             }
1704             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1705             offset++;
1706             break;
1707
1708         case 0346:
1709         case 0347:
1710             bytes[0] = c & 1;
1711             switch (ins->oprs[0].basereg) {
1712             case R_FS:
1713                 bytes[0] += 0xA0;
1714                 break;
1715             case R_GS:
1716                 bytes[0] += 0xA8;
1717                 break;
1718             default:
1719                 errfunc(ERR_PANIC,
1720                         "bizarre 386 segment register received");
1721             }
1722             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1723             offset++;
1724             break;
1725
1726         case 0360:
1727             break;
1728
1729         case 0361:
1730             bytes[0] = 0x66;
1731             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1732             offset += 1;
1733             break;
1734
1735         case 0362:
1736         case 0363:
1737             bytes[0] = c - 0362 + 0xf2;
1738             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1739             offset += 1;
1740             break;
1741
1742         case 0364:
1743         case 0365:
1744             break;
1745
1746         case 0366:
1747         case 0367:
1748             *bytes = c - 0366 + 0x66;
1749             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1750             offset += 1;
1751             break;
1752
1753         case 0370:
1754         case 0371:
1755         case 0372:
1756             break;
1757
1758         case 0373:
1759             *bytes = bits == 16 ? 3 : 5;
1760             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1761             offset += 1;
1762             break;
1763
1764         case 0374:
1765             eat = EA_XMMVSIB;
1766             break;
1767
1768         case 0375:
1769             eat = EA_YMMVSIB;
1770             break;
1771
1772         case4(0100):
1773         case4(0110):
1774         case4(0120):
1775         case4(0130):
1776         case4(0200):
1777         case4(0204):
1778         case4(0210):
1779         case4(0214):
1780         case4(0220):
1781         case4(0224):
1782         case4(0230):
1783         case4(0234):
1784             {
1785                 ea ea_data;
1786                 int rfield;
1787                 opflags_t rflags;
1788                 uint8_t *p;
1789                 int32_t s;
1790                 enum out_type type;
1791                 struct operand *opy = &ins->oprs[op2];
1792
1793                 if (c <= 0177) {
1794                     /* pick rfield from operand b (opx) */
1795                     rflags = regflag(opx);
1796                     rfield = nasm_regvals[opx->basereg];
1797                 } else {
1798                     /* rfield is constant */
1799                     rflags = 0;
1800                     rfield = c & 7;
1801                 }
1802
1803                 if (process_ea(opy, &ea_data, bits, ins->addr_size,
1804                                rfield, rflags) != eat) {
1805                     errfunc(ERR_NONFATAL, "invalid effective address");
1806                 }
1807
1808
1809                 p = bytes;
1810                 *p++ = ea_data.modrm;
1811                 if (ea_data.sib_present)
1812                     *p++ = ea_data.sib;
1813
1814                 s = p - bytes;
1815                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1816
1817                 /*
1818                  * Make sure the address gets the right offset in case
1819                  * the line breaks in the .lst file (BR 1197827)
1820                  */
1821                 offset += s;
1822                 s = 0;
1823
1824                 switch (ea_data.bytes) {
1825                 case 0:
1826                     break;
1827                 case 1:
1828                 case 2:
1829                 case 4:
1830                 case 8:
1831                     data = opy->offset;
1832                     s += ea_data.bytes;
1833                     if (ea_data.rip) {
1834                         if (opy->segment == segment) {
1835                             data -= insn_end;
1836                             if (overflow_signed(data, ea_data.bytes))
1837                                 warn_overflow(ERR_PASS2, ea_data.bytes);
1838                             out(offset, segment, &data, OUT_ADDRESS,
1839                                 ea_data.bytes, NO_SEG, NO_SEG);
1840                         } else {
1841                             /* overflow check in output/linker? */
1842                             out(offset, segment, &data,        OUT_REL4ADR,
1843                                 insn_end - offset, opy->segment, opy->wrt);
1844                         }
1845                     } else {
1846                         if (overflow_general(opy->offset, ins->addr_size >> 3) ||
1847                             signed_bits(opy->offset, ins->addr_size) !=
1848                             signed_bits(opy->offset, ea_data.bytes * 8))
1849                             warn_overflow(ERR_PASS2, ea_data.bytes);
1850
1851                         type = OUT_ADDRESS;
1852                         out(offset, segment, &data, OUT_ADDRESS,
1853                             ea_data.bytes, opy->segment, opy->wrt);
1854                     }
1855                     break;
1856                 default:
1857                     /* Impossible! */
1858                     errfunc(ERR_PANIC,
1859                             "Invalid amount of bytes (%d) for offset?!",
1860                             ea_data.bytes);
1861                     break;
1862                 }
1863                 offset += s;
1864             }
1865             break;
1866
1867         default:
1868             errfunc(ERR_PANIC, "internal instruction table corrupt"
1869                     ": instruction code \\%o (0x%02X) given", c, c);
1870             break;
1871         }
1872     }
1873 }
1874
1875 static opflags_t regflag(const operand * o)
1876 {
1877     if (!is_register(o->basereg))
1878         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1879     return nasm_reg_flags[o->basereg];
1880 }
1881
1882 static int32_t regval(const operand * o)
1883 {
1884     if (!is_register(o->basereg))
1885         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1886     return nasm_regvals[o->basereg];
1887 }
1888
1889 static int op_rexflags(const operand * o, int mask)
1890 {
1891     opflags_t flags;
1892     int val;
1893
1894     if (!is_register(o->basereg))
1895         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1896
1897     flags = nasm_reg_flags[o->basereg];
1898     val = nasm_regvals[o->basereg];
1899
1900     return rexflags(val, flags, mask);
1901 }
1902
1903 static int rexflags(int val, opflags_t flags, int mask)
1904 {
1905     int rex = 0;
1906
1907     if (val >= 8)
1908         rex |= REX_B|REX_X|REX_R;
1909     if (flags & BITS64)
1910         rex |= REX_W;
1911     if (!(REG_HIGH & ~flags))                   /* AH, CH, DH, BH */
1912         rex |= REX_H;
1913     else if (!(REG8 & ~flags) && val >= 4)      /* SPL, BPL, SIL, DIL */
1914         rex |= REX_P;
1915
1916     return rex & mask;
1917 }
1918
1919 static enum match_result find_match(const struct itemplate **tempp,
1920                                     insn *instruction,
1921                                     int32_t segment, int64_t offset, int bits)
1922 {
1923     const struct itemplate *temp;
1924     enum match_result m, merr;
1925     opflags_t xsizeflags[MAX_OPERANDS];
1926     bool opsizemissing = false;
1927     int i;
1928
1929     for (i = 0; i < instruction->operands; i++)
1930         xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
1931
1932     merr = MERR_INVALOP;
1933
1934     for (temp = nasm_instructions[instruction->opcode];
1935          temp->opcode != I_none; temp++) {
1936         m = matches(temp, instruction, bits);
1937         if (m == MOK_JUMP) {
1938             if (jmp_match(segment, offset, bits, instruction, temp->code))
1939                 m = MOK_GOOD;
1940             else
1941                 m = MERR_INVALOP;
1942         } else if (m == MERR_OPSIZEMISSING &&
1943                    (temp->flags & IF_SMASK) != IF_SX) {
1944             /*
1945              * Missing operand size and a candidate for fuzzy matching...
1946              */
1947             for (i = 0; i < temp->operands; i++) {
1948                 if ((temp->opd[i] & SAME_AS) == 0)
1949                     xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
1950             }
1951             opsizemissing = true;
1952         }
1953         if (m > merr)
1954             merr = m;
1955         if (merr == MOK_GOOD)
1956             goto done;
1957     }
1958
1959     /* No match, but see if we can get a fuzzy operand size match... */
1960     if (!opsizemissing)
1961         goto done;
1962
1963     for (i = 0; i < instruction->operands; i++) {
1964         /*
1965          * We ignore extrinsic operand sizes on registers, so we should
1966          * never try to fuzzy-match on them.  This also resolves the case
1967          * when we have e.g. "xmmrm128" in two different positions.
1968          */
1969         if (is_class(REGISTER, instruction->oprs[i].type))
1970             continue;
1971
1972         /* This tests if xsizeflags[i] has more than one bit set */
1973         if ((xsizeflags[i] & (xsizeflags[i]-1)))
1974             goto done;                /* No luck */
1975
1976         instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
1977     }
1978
1979     /* Try matching again... */
1980     for (temp = nasm_instructions[instruction->opcode];
1981          temp->opcode != I_none; temp++) {
1982         m = matches(temp, instruction, bits);
1983         if (m == MOK_JUMP) {
1984             if (jmp_match(segment, offset, bits, instruction, temp->code))
1985                 m = MOK_GOOD;
1986             else
1987                 m = MERR_INVALOP;
1988         }
1989         if (m > merr)
1990             merr = m;
1991         if (merr == MOK_GOOD)
1992             goto done;
1993     }
1994
1995 done:
1996     *tempp = temp;
1997     return merr;
1998 }
1999
2000 static enum match_result matches(const struct itemplate *itemp,
2001                                  insn *instruction, int bits)
2002 {
2003     int i, size[MAX_OPERANDS], asize, oprs;
2004     bool opsizemissing = false;
2005
2006     /*
2007      * Check the opcode
2008      */
2009     if (itemp->opcode != instruction->opcode)
2010         return MERR_INVALOP;
2011
2012     /*
2013      * Count the operands
2014      */
2015     if (itemp->operands != instruction->operands)
2016         return MERR_INVALOP;
2017
2018     /*
2019      * Is it legal?
2020      */
2021     if (!(optimizing > 0) && (itemp->flags & IF_OPT))
2022         return MERR_INVALOP;
2023
2024     /*
2025      * Check that no spurious colons or TOs are present
2026      */
2027     for (i = 0; i < itemp->operands; i++)
2028         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
2029             return MERR_INVALOP;
2030
2031     /*
2032      * Process size flags
2033      */
2034     switch (itemp->flags & IF_SMASK) {
2035     case IF_SB:
2036         asize = BITS8;
2037         break;
2038     case IF_SW:
2039         asize = BITS16;
2040         break;
2041     case IF_SD:
2042         asize = BITS32;
2043         break;
2044     case IF_SQ:
2045         asize = BITS64;
2046         break;
2047     case IF_SO:
2048         asize = BITS128;
2049         break;
2050     case IF_SY:
2051         asize = BITS256;
2052         break;
2053     case IF_SZ:
2054         switch (bits) {
2055         case 16:
2056             asize = BITS16;
2057             break;
2058         case 32:
2059             asize = BITS32;
2060             break;
2061         case 64:
2062             asize = BITS64;
2063             break;
2064         default:
2065             asize = 0;
2066             break;
2067         }
2068         break;
2069     default:
2070         asize = 0;
2071         break;
2072     }
2073
2074     if (itemp->flags & IF_ARMASK) {
2075         /* S- flags only apply to a specific operand */
2076         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
2077         memset(size, 0, sizeof size);
2078         size[i] = asize;
2079     } else {
2080         /* S- flags apply to all operands */
2081         for (i = 0; i < MAX_OPERANDS; i++)
2082             size[i] = asize;
2083     }
2084
2085     /*
2086      * Check that the operand flags all match up,
2087      * it's a bit tricky so lets be verbose:
2088      *
2089      * 1) Find out the size of operand. If instruction
2090      *    doesn't have one specified -- we're trying to
2091      *    guess it either from template (IF_S* flag) or
2092      *    from code bits.
2093      *
2094      * 2) If template operand (i) has SAME_AS flag [used for registers only]
2095      *    (ie the same operand as was specified somewhere in template, and
2096      *    this referred operand index is being achieved via ~SAME_AS)
2097      *    we are to be sure that both registers (in template and instruction)
2098      *    do exactly match.
2099      *
2100      * 3) If template operand do not match the instruction OR
2101      *    template has an operand size specified AND this size differ
2102      *    from which instruction has (perhaps we got it from code bits)
2103      *    we are:
2104      *      a)  Check that only size of instruction and operand is differ
2105      *          other characteristics do match
2106      *      b)  Perhaps it's a register specified in instruction so
2107      *          for such a case we just mark that operand as "size
2108      *          missing" and this will turn on fuzzy operand size
2109      *          logic facility (handled by a caller)
2110      */
2111     for (i = 0; i < itemp->operands; i++) {
2112         opflags_t type = instruction->oprs[i].type;
2113         if (!(type & SIZE_MASK))
2114             type |= size[i];
2115
2116         if (itemp->opd[i] & SAME_AS) {
2117             int j = itemp->opd[i] & ~SAME_AS;
2118             if (type != instruction->oprs[j].type ||
2119                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2120                 return MERR_INVALOP;
2121         } else if (itemp->opd[i] & ~type ||
2122             ((itemp->opd[i] & SIZE_MASK) &&
2123              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2124             if ((itemp->opd[i] & ~type & ~SIZE_MASK) || (type & SIZE_MASK)) {
2125                 return MERR_INVALOP;
2126             } else if (!is_class(REGISTER, type)) {
2127                 /*
2128                  * Note: we don't honor extrinsic operand sizes for registers,
2129                  * so "missing operand size" for a register should be
2130                  * considered a wildcard match rather than an error.
2131                  */
2132                 opsizemissing = true;
2133             }
2134         }
2135     }
2136
2137     if (opsizemissing)
2138         return MERR_OPSIZEMISSING;
2139
2140     /*
2141      * Check operand sizes
2142      */
2143     if (itemp->flags & (IF_SM | IF_SM2)) {
2144         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2145         for (i = 0; i < oprs; i++) {
2146             asize = itemp->opd[i] & SIZE_MASK;
2147             if (asize) {
2148                 for (i = 0; i < oprs; i++)
2149                     size[i] = asize;
2150                 break;
2151             }
2152         }
2153     } else {
2154         oprs = itemp->operands;
2155     }
2156
2157     for (i = 0; i < itemp->operands; i++) {
2158         if (!(itemp->opd[i] & SIZE_MASK) &&
2159             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2160             return MERR_OPSIZEMISMATCH;
2161     }
2162
2163     /*
2164      * Check template is okay at the set cpu level
2165      */
2166     if (((itemp->flags & IF_PLEVEL) > cpu))
2167         return MERR_BADCPU;
2168
2169     /*
2170      * Verify the appropriate long mode flag.
2171      */
2172     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2173         return MERR_BADMODE;
2174
2175     /*
2176      * Check if special handling needed for Jumps
2177      */
2178     if ((itemp->code[0] & 0374) == 0370)
2179         return MOK_JUMP;
2180
2181     return MOK_GOOD;
2182 }
2183
2184 static enum ea_type process_ea(operand * input, ea * output, int bits,
2185                                int addrbits, int rfield, opflags_t rflags)
2186 {
2187     bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
2188
2189     output->type = EA_SCALAR;
2190     output->rip = false;
2191
2192     /* REX flags for the rfield operand */
2193     output->rex |= rexflags(rfield, rflags, REX_R | REX_P | REX_W | REX_H);
2194
2195     if (is_class(REGISTER, input->type)) {  /* register direct */
2196         int i;
2197         opflags_t f;
2198
2199         if (!is_register(input->basereg))
2200             goto err;
2201         f = regflag(input);
2202         i = nasm_regvals[input->basereg];
2203
2204         if (REG_EA & ~f)
2205             goto err;
2206
2207         output->rex |= op_rexflags(input, REX_B | REX_P | REX_W | REX_H);
2208
2209         output->sib_present = false;    /* no SIB necessary */
2210         output->bytes = 0;              /* no offset necessary either */
2211         output->modrm = 0xC0 | ((rfield & 7) << 3) | (i & 7);
2212     } else {                    /* it's a memory reference */
2213         if (input->basereg == -1 &&
2214             (input->indexreg == -1 || input->scale == 0)) {
2215             /* it's a pure offset */
2216
2217             if (bits == 64 && ((input->type & IP_REL) == IP_REL) &&
2218                 input->segment == NO_SEG) {
2219                 nasm_error(ERR_WARNING | ERR_PASS1, "absolute address can not be RIP-relative");
2220                 input->type &= ~IP_REL;
2221                 input->type |= MEMORY;
2222             }
2223
2224             if (input->eaflags & EAF_BYTEOFFS ||
2225                 (input->eaflags & EAF_WORDOFFS &&
2226                  input->disp_size != (addrbits != 16 ? 32 : 16))) {
2227                 nasm_error(ERR_WARNING | ERR_PASS1, "displacement size ignored on absolute address");
2228             }
2229
2230             if (bits == 64 && (~input->type & IP_REL)) {
2231                 int scale, index, base;
2232                 output->sib_present = true;
2233                 scale = 0;
2234                 index = 4;
2235                 base = 5;
2236                 output->sib = (scale << 6) | (index << 3) | base;
2237                 output->bytes = 4;
2238                 output->modrm = 4 | ((rfield & 7) << 3);
2239                 output->rip = false;
2240             } else {
2241                 output->sib_present = false;
2242                 output->bytes = (addrbits != 16 ? 4 : 2);
2243                 output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
2244                 output->rip = bits == 64;
2245             }
2246         } else {                /* it's an indirection */
2247             int i = input->indexreg, b = input->basereg, s = input->scale;
2248             int32_t seg = input->segment;
2249             int hb = input->hintbase, ht = input->hinttype;
2250             int t, it, bt;              /* register numbers */
2251             opflags_t x, ix, bx;        /* register flags */
2252
2253             if (s == 0)
2254                 i = -1;         /* make this easy, at least */
2255
2256             if (is_register(i)) {
2257                 it = nasm_regvals[i];
2258                 ix = nasm_reg_flags[i];
2259             } else {
2260                 it = -1;
2261                 ix = 0;
2262             }
2263
2264             if (is_register(b)) {
2265                 bt = nasm_regvals[b];
2266                 bx = nasm_reg_flags[b];
2267             } else {
2268                 bt = -1;
2269                 bx = 0;
2270             }
2271
2272             /* if either one are a vector register... */
2273             if ((ix|bx) & (XMMREG|YMMREG) & ~REG_EA) {
2274                 int32_t sok = BITS32 | BITS64;
2275                 int32_t o = input->offset;
2276                 int mod, scale, index, base;
2277
2278                 printf("bt = %x, bx = %x, it = %x, ix = %x, s = %d\n",
2279                        bt, bx, it, ix, s);
2280
2281                 /*
2282                  * For a vector SIB, one has to be a vector and the other,
2283                  * if present, a GPR.  The vector must be the index operand.
2284                  */
2285                 if (it == -1 || (bx & (XMMREG|YMMREG) & ~REG_EA)) {
2286                     if (s == 0)
2287                         s = 1;
2288                     else if (s != 1)
2289                         goto err;
2290
2291                     t = bt, bt = it, it = t;
2292                     x = bx, bx = ix, ix = x;
2293                 }
2294
2295                 if (bt != -1) {
2296                     if (REG_GPR & ~bx)
2297                         goto err;
2298                     if (!(REG64 & ~bx) || !(REG32 & ~bx))
2299                         sok &= bx;
2300                     else
2301                         goto err;
2302                 }
2303
2304                 /*
2305                  * While we're here, ensure the user didn't specify
2306                  * WORD or QWORD
2307                  */
2308                 if (input->disp_size == 16 || input->disp_size == 64)
2309                     goto err;
2310
2311                 if (addrbits == 16 ||
2312                     (addrbits == 32 && !(sok & BITS32)) ||
2313                     (addrbits == 64 && !(sok & BITS64)))
2314                     goto err;
2315
2316                 output->type = (ix & YMMREG & ~REG_EA)
2317                     ? EA_YMMVSIB : EA_XMMVSIB;
2318
2319                 output->rex |= rexflags(it, ix, REX_X);
2320                 output->rex |= rexflags(bt, bx, REX_B);
2321
2322                 index = it & 7; /* it is known to be != -1 */
2323
2324                 switch (s) {
2325                 case 1:
2326                     scale = 0;
2327                     break;
2328                 case 2:
2329                     scale = 1;
2330                     break;
2331                 case 4:
2332                     scale = 2;
2333                     break;
2334                 case 8:
2335                     scale = 3;
2336                     break;
2337                 default:   /* then what the smeg is it? */
2338                     goto err;    /* panic */
2339                 }
2340
2341                 if (bt == -1) {
2342                     base = 5;
2343                     mod = 0;
2344                 } else {
2345                     base = (bt & 7);
2346                     if (base != REG_NUM_EBP && o == 0 &&
2347                         seg == NO_SEG && !forw_ref &&
2348                         !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2349                         mod = 0;
2350                     else if (input->eaflags & EAF_BYTEOFFS ||
2351                              (o >= -128 && o <= 127 &&
2352                               seg == NO_SEG && !forw_ref &&
2353                               !(input->eaflags & EAF_WORDOFFS)))
2354                         mod = 1;
2355                     else
2356                         mod = 2;
2357                 }
2358
2359                 output->sib_present = true;
2360                 output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2361                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2362                 output->sib = (scale << 6) | (index << 3) | base;
2363             } else if ((ix|bx) & (BITS32|BITS64)) {
2364                 /*
2365                  * it must be a 32/64-bit memory reference. Firstly we have
2366                  * to check that all registers involved are type E/Rxx.
2367                  */
2368                 int32_t sok = BITS32 | BITS64;
2369                 int32_t o = input->offset;
2370
2371                 if (it != -1) {
2372                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2373                         sok &= ix;
2374                     else
2375                         goto err;
2376                 }
2377
2378                 if (bt != -1) {
2379                     if (REG_GPR & ~bx)
2380                         goto err; /* Invalid register */
2381                     if (~sok & bx & SIZE_MASK)
2382                         goto err; /* Invalid size */
2383                     sok &= bx;
2384                 }
2385
2386                 /*
2387                  * While we're here, ensure the user didn't specify
2388                  * WORD or QWORD
2389                  */
2390                 if (input->disp_size == 16 || input->disp_size == 64)
2391                     goto err;
2392
2393                 if (addrbits == 16 ||
2394                     (addrbits == 32 && !(sok & BITS32)) ||
2395                     (addrbits == 64 && !(sok & BITS64)))
2396                     goto err;
2397
2398                 /* now reorganize base/index */
2399                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2400                     ((hb == b && ht == EAH_NOTBASE) ||
2401                      (hb == i && ht == EAH_MAKEBASE))) {
2402                     /* swap if hints say so */
2403                     t = bt, bt = it, it = t;
2404                     x = bx, bx = ix, ix = x;
2405                 }
2406                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2407                     bt = -1, bx = 0, s++;
2408                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2409                     /* make single reg base, unless hint */
2410                     bt = it, bx = ix, it = -1, ix = 0;
2411                 }
2412                 if (((s == 2 && it != REG_NUM_ESP && !(input->eaflags & EAF_TIMESTWO)) ||
2413                       s == 3 || s == 5 || s == 9) && bt == -1)
2414                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2415                 if (it == -1 && (bt & 7) != REG_NUM_ESP &&
2416                     (input->eaflags & EAF_TIMESTWO))
2417                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2418                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2419                 if (s == 1 && it == REG_NUM_ESP) {
2420                     /* swap ESP into base if scale is 1 */
2421                     t = it, it = bt, bt = t;
2422                     x = ix, ix = bx, bx = x;
2423                 }
2424                 if (it == REG_NUM_ESP ||
2425                     (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2426                     goto err;        /* wrong, for various reasons */
2427
2428                 output->rex |= rexflags(it, ix, REX_X);
2429                 output->rex |= rexflags(bt, bx, REX_B);
2430
2431                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2432                     /* no SIB needed */
2433                     int mod, rm;
2434
2435                     if (bt == -1) {
2436                         rm = 5;
2437                         mod = 0;
2438                     } else {
2439                         rm = (bt & 7);
2440                         if (rm != REG_NUM_EBP && o == 0 &&
2441                             seg == NO_SEG && !forw_ref &&
2442                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2443                             mod = 0;
2444                         else if (input->eaflags & EAF_BYTEOFFS ||
2445                                  (o >= -128 && o <= 127 &&
2446                                   seg == NO_SEG && !forw_ref &&
2447                                   !(input->eaflags & EAF_WORDOFFS)))
2448                             mod = 1;
2449                         else
2450                             mod = 2;
2451                     }
2452
2453                     output->sib_present = false;
2454                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2455                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2456                 } else {
2457                     /* we need a SIB */
2458                     int mod, scale, index, base;
2459
2460                     if (it == -1)
2461                         index = 4, s = 1;
2462                     else
2463                         index = (it & 7);
2464
2465                     switch (s) {
2466                     case 1:
2467                         scale = 0;
2468                         break;
2469                     case 2:
2470                         scale = 1;
2471                         break;
2472                     case 4:
2473                         scale = 2;
2474                         break;
2475                     case 8:
2476                         scale = 3;
2477                         break;
2478                     default:   /* then what the smeg is it? */
2479                         goto err;    /* panic */
2480                     }
2481
2482                     if (bt == -1) {
2483                         base = 5;
2484                         mod = 0;
2485                     } else {
2486                         base = (bt & 7);
2487                         if (base != REG_NUM_EBP && o == 0 &&
2488                             seg == NO_SEG && !forw_ref &&
2489                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2490                             mod = 0;
2491                         else if (input->eaflags & EAF_BYTEOFFS ||
2492                                  (o >= -128 && o <= 127 &&
2493                                   seg == NO_SEG && !forw_ref &&
2494                                   !(input->eaflags & EAF_WORDOFFS)))
2495                             mod = 1;
2496                         else
2497                             mod = 2;
2498                     }
2499
2500                     output->sib_present = true;
2501                     output->bytes = (bt == -1 || mod == 2 ? 4 : mod);
2502                     output->modrm = (mod << 6) | ((rfield & 7) << 3) | 4;
2503                     output->sib = (scale << 6) | (index << 3) | base;
2504                 }
2505             } else {            /* it's 16-bit */
2506                 int mod, rm;
2507                 int16_t o = input->offset;
2508
2509                 /* check for 64-bit long mode */
2510                 if (addrbits == 64)
2511                     goto err;
2512
2513                 /* check all registers are BX, BP, SI or DI */
2514                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI && b != R_DI) ||
2515                     (i != -1 && i != R_BP && i != R_BX && i != R_SI && i != R_DI))
2516                     goto err;
2517
2518                 /* ensure the user didn't specify DWORD/QWORD */
2519                 if (input->disp_size == 32 || input->disp_size == 64)
2520                     goto err;
2521
2522                 if (s != 1 && i != -1)
2523                     goto err;        /* no can do, in 16-bit EA */
2524                 if (b == -1 && i != -1) {
2525                     int tmp = b;
2526                     b = i;
2527                     i = tmp;
2528                 }               /* swap */
2529                 if ((b == R_SI || b == R_DI) && i != -1) {
2530                     int tmp = b;
2531                     b = i;
2532                     i = tmp;
2533                 }
2534                 /* have BX/BP as base, SI/DI index */
2535                 if (b == i)
2536                     goto err;        /* shouldn't ever happen, in theory */
2537                 if (i != -1 && b != -1 &&
2538                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2539                     goto err;        /* invalid combinations */
2540                 if (b == -1)            /* pure offset: handled above */
2541                     goto err;        /* so if it gets to here, panic! */
2542
2543                 rm = -1;
2544                 if (i != -1)
2545                     switch (i * 256 + b) {
2546                     case R_SI * 256 + R_BX:
2547                         rm = 0;
2548                         break;
2549                     case R_DI * 256 + R_BX:
2550                         rm = 1;
2551                         break;
2552                     case R_SI * 256 + R_BP:
2553                         rm = 2;
2554                         break;
2555                     case R_DI * 256 + R_BP:
2556                         rm = 3;
2557                         break;
2558                 } else
2559                     switch (b) {
2560                     case R_SI:
2561                         rm = 4;
2562                         break;
2563                     case R_DI:
2564                         rm = 5;
2565                         break;
2566                     case R_BP:
2567                         rm = 6;
2568                         break;
2569                     case R_BX:
2570                         rm = 7;
2571                         break;
2572                     }
2573                 if (rm == -1)           /* can't happen, in theory */
2574                     goto err;        /* so panic if it does */
2575
2576                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2577                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2578                     mod = 0;
2579                 else if (input->eaflags & EAF_BYTEOFFS ||
2580                          (o >= -128 && o <= 127 && seg == NO_SEG &&
2581                           !forw_ref && !(input->eaflags & EAF_WORDOFFS)))
2582                     mod = 1;
2583                 else
2584                     mod = 2;
2585
2586                 output->sib_present = false;    /* no SIB - it's 16-bit */
2587                 output->bytes = mod;            /* bytes of offset needed */
2588                 output->modrm = (mod << 6) | ((rfield & 7) << 3) | rm;
2589             }
2590         }
2591     }
2592
2593     output->size = 1 + output->sib_present + output->bytes;
2594     return output->type;
2595
2596 err:
2597     return output->type = EA_INVALID;
2598 }
2599
2600 static void add_asp(insn *ins, int addrbits)
2601 {
2602     int j, valid;
2603     int defdisp;
2604
2605     valid = (addrbits == 64) ? 64|32 : 32|16;
2606
2607     switch (ins->prefixes[PPS_ASIZE]) {
2608     case P_A16:
2609         valid &= 16;
2610         break;
2611     case P_A32:
2612         valid &= 32;
2613         break;
2614     case P_A64:
2615         valid &= 64;
2616         break;
2617     case P_ASP:
2618         valid &= (addrbits == 32) ? 16 : 32;
2619         break;
2620     default:
2621         break;
2622     }
2623
2624     for (j = 0; j < ins->operands; j++) {
2625         if (is_class(MEMORY, ins->oprs[j].type)) {
2626             opflags_t i, b;
2627
2628             /* Verify as Register */
2629             if (!is_register(ins->oprs[j].indexreg))
2630                 i = 0;
2631             else
2632                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2633
2634             /* Verify as Register */
2635             if (!is_register(ins->oprs[j].basereg))
2636                 b = 0;
2637             else
2638                 b = nasm_reg_flags[ins->oprs[j].basereg];
2639
2640             if (ins->oprs[j].scale == 0)
2641                 i = 0;
2642
2643             if (!i && !b) {
2644                 int ds = ins->oprs[j].disp_size;
2645                 if ((addrbits != 64 && ds > 8) ||
2646                     (addrbits == 64 && ds == 16))
2647                     valid &= ds;
2648             } else {
2649                 if (!(REG16 & ~b))
2650                     valid &= 16;
2651                 if (!(REG32 & ~b))
2652                     valid &= 32;
2653                 if (!(REG64 & ~b))
2654                     valid &= 64;
2655
2656                 if (!(REG16 & ~i))
2657                     valid &= 16;
2658                 if (!(REG32 & ~i))
2659                     valid &= 32;
2660                 if (!(REG64 & ~i))
2661                     valid &= 64;
2662             }
2663         }
2664     }
2665
2666     if (valid & addrbits) {
2667         ins->addr_size = addrbits;
2668     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2669         /* Add an address size prefix */
2670         enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
2671         ins->prefixes[PPS_ASIZE] = pref;
2672         ins->addr_size = (addrbits == 32) ? 16 : 32;
2673     } else {
2674         /* Impossible... */
2675         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2676         ins->addr_size = addrbits; /* Error recovery */
2677     }
2678
2679     defdisp = ins->addr_size == 16 ? 16 : 32;
2680
2681     for (j = 0; j < ins->operands; j++) {
2682         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2683             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp) != ins->addr_size) {
2684             /*
2685              * mem_offs sizes must match the address size; if not,
2686              * strip the MEM_OFFS bit and match only EA instructions
2687              */
2688             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2689         }
2690     }
2691 }