assemble.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2012 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * assemble.c   code generation for the Netwide Assembler
  36  *
  37  * the actual codes (C syntax, i.e. octal):
  38  * \0            - terminates the code. (Unless it's a literal of course.)
  39  * \1..\4        - that many literal bytes follow in the code stream
  40  * \5            - add 4 to the primary operand number (b, low octdigit)
  41  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  42  * \7            - add 4 to both the primary and the secondary operand number
  43  * \10..\13      - a literal byte follows in the code stream, to be added
  44  *                 to the register value of operand 0..3
  45  * \20..\23      - a byte immediate operand, from operand 0..3
  46  * \24..\27      - a zero-extended byte immediate operand, from operand 0..3
  47  * \30..\33      - a word immediate operand, from operand 0..3
  48  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  49  *                 assembly mode or the operand-size override on the operand
  50  * \40..\43      - a long immediate operand, from operand 0..3
  51  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  52  *                 depending on the address size of the instruction.
  53  * \50..\53      - a byte relative operand, from operand 0..3
  54  * \54..\57      - a qword immediate operand, from operand 0..3
  55  * \60..\63      - a word relative operand, from operand 0..3
  56  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  57  *                 assembly mode or the operand-size override on the operand
  58  * \70..\73      - a long relative operand, from operand 0..3
  59  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  60  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  61  *                 field the register value of operand b.
  62  * \172\ab       - the register number from operand a in bits 7..4, with
  63  *                 the 4-bit immediate from operand b in bits 3..0.
  64  * \173\xab      - the register number from operand a in bits 7..4, with
  65  *                 the value b in bits 3..0.
  66  * \174..\177    - the register number from operand 0..3 in bits 7..4, and
  67  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  68  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  69  *                 field equal to digit b.
  70  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  71  * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
  72  *                 V field taken from operand 0..3.
  73  * \270          - this instruction uses VEX/XOP rather than REX, with the
  74  *                 V field set to 1111b.
  75  *
  76  * VEX/XOP prefixes are followed by the sequence:
  77  * \tmm\wlp        where mm is the M field; and wlp is:
  78  *                 00 wwl lpp
  79  *                 [l0]  ll = 0 for L = 0 (.128, .lz)
  80  *                 [l1]  ll = 1 for L = 1 (.256)
  81  *                 [lig] ll = 2 for L don't care (always assembled as 0)
  82  *
  83  *                 [w0]  ww = 0 for W = 0
  84  *                 [w1 ] ww = 1 for W = 1
  85  *                 [wig] ww = 2 for W don't care (always assembled as 0)
  86  *                 [ww]  ww = 3 for W used as REX.W
  87  *
  88  * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
  89  *
  90  * \271          - instruction takes XRELEASE (F3) with or without lock
  91  * \272          - instruction takes XACQUIRE/XRELEASE with or without lock
  92  * \273          - instruction takes XACQUIRE/XRELEASE with lock only
  93  * \274..\277    - a byte immediate operand, from operand 0..3, sign-extended
  94  *                 to the operand size (if o16/o32/o64 present) or the bit size
  95  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
  96  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
  97  * \312          - (disassembler only) invalid with non-default address size.
  98  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
  99  * \314          - (disassembler only) invalid with REX.B
 100  * \315          - (disassembler only) invalid with REX.X
 101  * \316          - (disassembler only) invalid with REX.R
 102  * \317          - (disassembler only) invalid with REX.W
 103  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 104  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 105  * \322          - indicates that this instruction is only valid when the
 106  *                 operand size is the default (instruction to disassembler,
 107  *                 generates no code in the assembler)
 108  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
 109  * \324          - indicates 64-bit operand size requiring REX prefix.
 110  * \325          - instruction which always uses spl/bpl/sil/dil
 111  * \326          - instruction not valid with 0xF3 REP prefix.  Hint for
 112                    disassembler only; for SSE instructions.
 113  * \330          - a literal byte follows in the code stream, to be added
 114  *                 to the condition code value of the instruction.
 115  * \331          - instruction not valid with REP prefix.  Hint for
 116  *                 disassembler only; for SSE instructions.
 117  * \332          - REP prefix (0xF2 byte) used as opcode extension.
 118  * \333          - REP prefix (0xF3 byte) used as opcode extension.
 119  * \334          - LOCK prefix used as REX.R (used in non-64-bit mode)
 120  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
 121  * \336          - force a REP(E) prefix (0xF3) even if not specified.
 122  * \337          - force a REPNE prefix (0xF2) even if not specified.
 123  *                 \336-\337 are still listed as prefixes in the disassembler.
 124  * \340          - reserve <operand 0> bytes of uninitialized storage.
 125  *                 Operand 0 had better be a segmentless constant.
 126  * \341          - this instruction needs a WAIT "prefix"
 127  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 128  *                 (POP is never used for CS) depending on operand 0
 129  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 130  *                 on operand 0
 131  * \360          - no SSE prefix (== \364\331)
 132  * \361          - 66 SSE prefix (== \366\331)
 133  * \362          - F2 SSE prefix (== \364\332)
 134  * \363          - F3 SSE prefix (== \364\333)
 135  * \364          - operand-size prefix (0x66) not permitted
 136  * \365          - address-size prefix (0x67) not permitted
 137  * \366          - operand-size prefix (0x66) used as opcode extension
 138  * \367          - address-size prefix (0x67) used as opcode extension
 139  * \370,\371     - match only if operand 0 meets byte jump criteria.
 140  *                 370 is used for Jcc, 371 is used for JMP.
 141  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 142  *                 used for conditional jump over longer jump
 143  * \374          - this instruction takes an XMM VSIB memory EA
 144  * \375          - this instruction takes an YMM VSIB memory EA
 145  */
 146
 147 #include "compiler.h"
 148
 149 #include <stdio.h>
 150 #include <string.h>
 151 #include <inttypes.h>
 152
 153 #include "nasm.h"
 154 #include "nasmlib.h"
 155 #include "assemble.h"
 156 #include "insns.h"
 157 #include "tables.h"
 158
 159 enum match_result {
 160     /*
 161      * Matching errors.  These should be sorted so that more specific
 162      * errors come later in the sequence.
 163      */
 164     MERR_INVALOP,
 165     MERR_OPSIZEMISSING,
 166     MERR_OPSIZEMISMATCH,
 167     MERR_BADCPU,
 168     MERR_BADMODE,
 169     MERR_BADHLE,
 170     /*
 171      * Matching success; the conditional ones first
 172      */
 173     MOK_JUMP,   /* Matching OK but needs jmp_match() */
 174     MOK_GOOD    /* Matching unconditionally OK */
 175 };
 176
 177 typedef struct {
 178     enum ea_type type;            /* what kind of EA is this? */
 179     int sib_present;              /* is a SIB byte necessary? */
 180     int bytes;                    /* # of bytes of offset needed */
 181     int size;                     /* lazy - this is sib+bytes+1 */
 182     uint8_t modrm, sib, rex, rip; /* the bytes themselves */
 183 } ea;
 184
 185 #define GEN_SIB(scale, index, base)                 \
 186         (((scale) << 6) | ((index) << 3) | ((base)))
 187
 188 #define GEN_MODRM(mod, reg, rm)                     \
 189         (((mod) << 6) | (((reg) & 7) << 3) | ((rm) & 7))
 190
 191 static uint32_t cpu;            /* cpu level received from nasm.c */
 192 static efunc errfunc;
 193 static struct ofmt *outfmt;
 194 static ListGen *list;
 195
 196 static int64_t calcsize(int32_t, int64_t, int, insn *,
 197                         const struct itemplate *);
 198 static void gencode(int32_t segment, int64_t offset, int bits,
 199                     insn * ins, const struct itemplate *temp,
 200                     int64_t insn_end);
 201 static enum match_result find_match(const struct itemplate **tempp,
 202                                     insn *instruction,
 203                                     int32_t segment, int64_t offset, int bits);
 204 static enum match_result matches(const struct itemplate *, insn *, int bits);
 205 static opflags_t regflag(const operand *);
 206 static int32_t regval(const operand *);
 207 static int rexflags(int, opflags_t, int);
 208 static int op_rexflags(const operand *, int);
 209 static void add_asp(insn *, int);
 210
 211 static enum ea_type process_ea(operand *, ea *, int, int, int, opflags_t);
 212
 213 static int has_prefix(insn * ins, enum prefix_pos pos, int prefix)
 214 {
 215     return ins->prefixes[pos] == prefix;
 216 }
 217
 218 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 219 {
 220     if (ins->prefixes[pos])
 221         errfunc(ERR_NONFATAL, "invalid %s prefix",
 222                 prefix_name(ins->prefixes[pos]));
 223 }
 224
 225 static const char *size_name(int size)
 226 {
 227     switch (size) {
 228     case 1:
 229         return "byte";
 230     case 2:
 231         return "word";
 232     case 4:
 233         return "dword";
 234     case 8:
 235         return "qword";
 236     case 10:
 237         return "tword";
 238     case 16:
 239         return "oword";
 240     case 32:
 241         return "yword";
 242     default:
 243         return "???";
 244     }
 245 }
 246
 247 static void warn_overflow(int pass, int size)
 248 {
 249     errfunc(ERR_WARNING | pass | ERR_WARN_NOV,
 250             "%s data exceeds bounds", size_name(size));
 251 }
 252
 253 static void warn_overflow_const(int64_t data, int size)
 254 {
 255     if (overflow_general(data, size))
 256         warn_overflow(ERR_PASS1, size);
 257 }
 258
 259 static void warn_overflow_opd(const struct operand *o, int size)
 260 {
 261     if (o->wrt == NO_SEG && o->segment == NO_SEG) {
 262         if (overflow_general(o->offset, size))
 263             warn_overflow(ERR_PASS2, size);
 264     }
 265 }
 266
 267 /*
 268  * This routine wrappers the real output format's output routine,
 269  * in order to pass a copy of the data off to the listing file
 270  * generator at the same time.
 271  */
 272 static void out(int64_t offset, int32_t segto, const void *data,
 273                 enum out_type type, uint64_t size,
 274                 int32_t segment, int32_t wrt)
 275 {
 276     static int32_t lineno = 0;     /* static!!! */
 277     static char *lnfname = NULL;
 278     uint8_t p[8];
 279
 280     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 281         /*
 282          * This is a non-relocated address, and we're going to
 283          * convert it into RAWDATA format.
 284          */
 285         uint8_t *q = p;
 286
 287         if (size > 8) {
 288             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 289             return;
 290         }
 291
 292         WRITEADDR(q, *(int64_t *)data, size);
 293         data = p;
 294         type = OUT_RAWDATA;
 295     }
 296
 297     list->output(offset, data, type, size);
 298
 299     /*
 300      * this call to src_get determines when we call the
 301      * debug-format-specific "linenum" function
 302      * it updates lineno and lnfname to the current values
 303      * returning 0 if "same as last time", -2 if lnfname
 304      * changed, and the amount by which lineno changed,
 305      * if it did. thus, these variables must be static
 306      */
 307
 308     if (src_get(&lineno, &lnfname))
 309         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 310
 311     outfmt->output(segto, data, type, size, segment, wrt);
 312 }
 313
 314 static void out_imm8(int64_t offset, int32_t segment, struct operand *opx)
 315 {
 316     if (opx->segment != NO_SEG) {
 317         uint64_t data = opx->offset;
 318         out(offset, segment, &data, OUT_ADDRESS, 1, opx->segment, opx->wrt);
 319     } else {
 320         uint8_t byte = opx->offset;
 321         out(offset, segment, &byte, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
 322     }
 323 }
 324
 325 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 326                       insn * ins, const struct itemplate *temp)
 327 {
 328     int64_t isize;
 329     const uint8_t *code = temp->code;
 330     uint8_t c = code[0];
 331
 332     if (((c & ~1) != 0370) || (ins->oprs[0].type & STRICT))
 333         return false;
 334     if (!optimizing)
 335         return false;
 336     if (optimizing < 0 && c == 0371)
 337         return false;
 338
 339     isize = calcsize(segment, offset, bits, ins, temp);
 340
 341     if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
 342         /* Be optimistic in pass 1 */
 343         return true;
 344
 345     if (ins->oprs[0].segment != segment)
 346         return false;
 347
 348     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 349     return (isize >= -128 && isize <= 127); /* is it byte size? */
 350 }
 351
 352 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 353                  insn * instruction, struct ofmt *output, efunc error,
 354                  ListGen * listgen)
 355 {
 356     const struct itemplate *temp;
 357     int j;
 358     enum match_result m;
 359     int64_t insn_end;
 360     int32_t itimes;
 361     int64_t start = offset;
 362     int64_t wsize;              /* size for DB etc. */
 363
 364     errfunc = error;            /* to pass to other functions */
 365     cpu = cp;
 366     outfmt = output;            /* likewise */
 367     list = listgen;             /* and again */
 368
 369     wsize = idata_bytes(instruction->opcode);
 370     if (wsize == -1)
 371         return 0;
 372
 373     if (wsize) {
 374         extop *e;
 375         int32_t t = instruction->times;
 376         if (t < 0)
 377             errfunc(ERR_PANIC,
 378                     "instruction->times < 0 (%ld) in assemble()", t);
 379
 380         while (t--) {           /* repeat TIMES times */
 381             list_for_each(e, instruction->eops) {
 382                 if (e->type == EOT_DB_NUMBER) {
 383                     if (wsize > 8) {
 384                         errfunc(ERR_NONFATAL,
 385                                 "integer supplied to a DT, DO or DY"
 386                                 " instruction");
 387                     } else {
 388                         out(offset, segment, &e->offset,
 389                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 390                         offset += wsize;
 391                     }
 392                 } else if (e->type == EOT_DB_STRING ||
 393                            e->type == EOT_DB_STRING_FREE) {
 394                     int align;
 395
 396                     out(offset, segment, e->stringval,
 397                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 398                     align = e->stringlen % wsize;
 399
 400                     if (align) {
 401                         align = wsize - align;
 402                         out(offset, segment, zero_buffer,
 403                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 404                     }
 405                     offset += e->stringlen + align;
 406                 }
 407             }
 408             if (t > 0 && t == instruction->times - 1) {
 409                 /*
 410                  * Dummy call to list->output to give the offset to the
 411                  * listing module.
 412                  */
 413                 list->output(offset, NULL, OUT_RAWDATA, 0);
 414                 list->uplevel(LIST_TIMES);
 415             }
 416         }
 417         if (instruction->times > 1)
 418             list->downlevel(LIST_TIMES);
 419         return offset - start;
 420     }
 421
 422     if (instruction->opcode == I_INCBIN) {
 423         const char *fname = instruction->eops->stringval;
 424         FILE *fp;
 425
 426         fp = fopen(fname, "rb");
 427         if (!fp) {
 428             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 429                   fname);
 430         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 431             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 432                   fname);
 433         } else {
 434             static char buf[4096];
 435             size_t t = instruction->times;
 436             size_t base = 0;
 437             size_t len;
 438
 439             len = ftell(fp);
 440             if (instruction->eops->next) {
 441                 base = instruction->eops->next->offset;
 442                 len -= base;
 443                 if (instruction->eops->next->next &&
 444                     len > (size_t)instruction->eops->next->next->offset)
 445                     len = (size_t)instruction->eops->next->next->offset;
 446             }
 447             /*
 448              * Dummy call to list->output to give the offset to the
 449              * listing module.
 450              */
 451             list->output(offset, NULL, OUT_RAWDATA, 0);
 452             list->uplevel(LIST_INCBIN);
 453             while (t--) {
 454                 size_t l;
 455
 456                 fseek(fp, base, SEEK_SET);
 457                 l = len;
 458                 while (l > 0) {
 459                     int32_t m;
 460                     m = fread(buf, 1, l > sizeof(buf) ? sizeof(buf) : l, fp);
 461                     if (!m) {
 462                         /*
 463                          * This shouldn't happen unless the file
 464                          * actually changes while we are reading
 465                          * it.
 466                          */
 467                         error(ERR_NONFATAL,
 468                               "`incbin': unexpected EOF while"
 469                               " reading file `%s'", fname);
 470                         t = 0;  /* Try to exit cleanly */
 471                         break;
 472                     }
 473                     out(offset, segment, buf, OUT_RAWDATA, m,
 474                         NO_SEG, NO_SEG);
 475                     l -= m;
 476                 }
 477             }
 478             list->downlevel(LIST_INCBIN);
 479             if (instruction->times > 1) {
 480                 /*
 481                  * Dummy call to list->output to give the offset to the
 482                  * listing module.
 483                  */
 484                 list->output(offset, NULL, OUT_RAWDATA, 0);
 485                 list->uplevel(LIST_TIMES);
 486                 list->downlevel(LIST_TIMES);
 487             }
 488             fclose(fp);
 489             return instruction->times * len;
 490         }
 491         return 0;               /* if we're here, there's an error */
 492     }
 493
 494     /* Check to see if we need an address-size prefix */
 495     add_asp(instruction, bits);
 496
 497     m = find_match(&temp, instruction, segment, offset, bits);
 498
 499     if (m == MOK_GOOD) {
 500         /* Matches! */
 501         int64_t insn_size = calcsize(segment, offset, bits, instruction, temp);
 502         itimes = instruction->times;
 503         if (insn_size < 0)  /* shouldn't be, on pass two */
 504             error(ERR_PANIC, "errors made it through from pass one");
 505         else
 506             while (itimes--) {
 507                 for (j = 0; j < MAXPREFIX; j++) {
 508                     uint8_t c = 0;
 509                     switch (instruction->prefixes[j]) {
 510                     case P_WAIT:
 511                         c = 0x9B;
 512                         break;
 513                     case P_LOCK:
 514                         c = 0xF0;
 515                         break;
 516                     case P_REPNE:
 517                     case P_REPNZ:
 518                     case P_XACQUIRE:
 519                         c = 0xF2;
 520                         break;
 521                     case P_REPE:
 522                     case P_REPZ:
 523                     case P_REP:
 524                     case P_XRELEASE:
 525                         c = 0xF3;
 526                         break;
 527                     case R_CS:
 528                         if (bits == 64) {
 529                             error(ERR_WARNING | ERR_PASS2,
 530                                   "cs segment base generated, but will be ignored in 64-bit mode");
 531                         }
 532                         c = 0x2E;
 533                         break;
 534                     case R_DS:
 535                         if (bits == 64) {
 536                             error(ERR_WARNING | ERR_PASS2,
 537                                   "ds segment base generated, but will be ignored in 64-bit mode");
 538                         }
 539                         c = 0x3E;
 540                         break;
 541                     case R_ES:
 542                         if (bits == 64) {
 543                             error(ERR_WARNING | ERR_PASS2,
 544                                   "es segment base generated, but will be ignored in 64-bit mode");
 545                         }
 546                         c = 0x26;
 547                         break;
 548                     case R_FS:
 549                         c = 0x64;
 550                         break;
 551                     case R_GS:
 552                         c = 0x65;
 553                         break;
 554                     case R_SS:
 555                         if (bits == 64) {
 556                             error(ERR_WARNING | ERR_PASS2,
 557                                   "ss segment base generated, but will be ignored in 64-bit mode");
 558                         }
 559                         c = 0x36;
 560                         break;
 561                     case R_SEGR6:
 562                     case R_SEGR7:
 563                         error(ERR_NONFATAL,
 564                               "segr6 and segr7 cannot be used as prefixes");
 565                         break;
 566                     case P_A16:
 567                         if (bits == 64) {
 568                             error(ERR_NONFATAL,
 569                                   "16-bit addressing is not supported "
 570                                   "in 64-bit mode");
 571                         } else if (bits != 16)
 572                             c = 0x67;
 573                         break;
 574                     case P_A32:
 575                         if (bits != 32)
 576                             c = 0x67;
 577                         break;
 578                     case P_A64:
 579                         if (bits != 64) {
 580                             error(ERR_NONFATAL,
 581                                   "64-bit addressing is only supported "
 582                                   "in 64-bit mode");
 583                         }
 584                         break;
 585                     case P_ASP:
 586                         c = 0x67;
 587                         break;
 588                     case P_O16:
 589                         if (bits != 16)
 590                             c = 0x66;
 591                         break;
 592                     case P_O32:
 593                         if (bits == 16)
 594                             c = 0x66;
 595                         break;
 596                     case P_O64:
 597                         /* REX.W */
 598                         break;
 599                     case P_OSP:
 600                         c = 0x66;
 601                         break;
 602                     case P_none:
 603                         break;
 604                     default:
 605                         error(ERR_PANIC, "invalid instruction prefix");
 606                     }
 607                     if (c != 0) {
 608                         out(offset, segment, &c, OUT_RAWDATA, 1,
 609                             NO_SEG, NO_SEG);
 610                         offset++;
 611                     }
 612                 }
 613                 insn_end = offset + insn_size;
 614                 gencode(segment, offset, bits, instruction,
 615                         temp, insn_end);
 616                 offset += insn_size;
 617                 if (itimes > 0 && itimes == instruction->times - 1) {
 618                     /*
 619                      * Dummy call to list->output to give the offset to the
 620                      * listing module.
 621                      */
 622                     list->output(offset, NULL, OUT_RAWDATA, 0);
 623                     list->uplevel(LIST_TIMES);
 624                 }
 625             }
 626         if (instruction->times > 1)
 627             list->downlevel(LIST_TIMES);
 628         return offset - start;
 629     } else {
 630         /* No match */
 631         switch (m) {
 632         case MERR_OPSIZEMISSING:
 633             error(ERR_NONFATAL, "operation size not specified");
 634             break;
 635         case MERR_OPSIZEMISMATCH:
 636             error(ERR_NONFATAL, "mismatch in operand sizes");
 637             break;
 638         case MERR_BADCPU:
 639             error(ERR_NONFATAL, "no instruction for this cpu level");
 640             break;
 641         case MERR_BADMODE:
 642             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 643                   bits);
 644             break;
 645         default:
 646             error(ERR_NONFATAL,
 647                   "invalid combination of opcode and operands");
 648             break;
 649         }
 650     }
 651     return 0;
 652 }
 653
 654 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 655                   insn * instruction, efunc error)
 656 {
 657     const struct itemplate *temp;
 658     enum match_result m;
 659
 660     errfunc = error;            /* to pass to other functions */
 661     cpu = cp;
 662
 663     if (instruction->opcode == I_none)
 664         return 0;
 665
 666     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 667         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 668         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 669         instruction->opcode == I_DY) {
 670         extop *e;
 671         int32_t isize, osize, wsize;
 672
 673         isize = 0;
 674         wsize = idata_bytes(instruction->opcode);
 675
 676         list_for_each(e, instruction->eops) {
 677             int32_t align;
 678
 679             osize = 0;
 680             if (e->type == EOT_DB_NUMBER) {
 681                 osize = 1;
 682                 warn_overflow_const(e->offset, wsize);
 683             } else if (e->type == EOT_DB_STRING ||
 684                        e->type == EOT_DB_STRING_FREE)
 685                 osize = e->stringlen;
 686
 687             align = (-osize) % wsize;
 688             if (align < 0)
 689                 align += wsize;
 690             isize += osize + align;
 691         }
 692         return isize * instruction->times;
 693     }
 694
 695     if (instruction->opcode == I_INCBIN) {
 696         const char *fname = instruction->eops->stringval;
 697         FILE *fp;
 698         int64_t val = 0;
 699         size_t len;
 700
 701         fp = fopen(fname, "rb");
 702         if (!fp)
 703             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 704                   fname);
 705         else if (fseek(fp, 0L, SEEK_END) < 0)
 706             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 707                   fname);
 708         else {
 709             len = ftell(fp);
 710             if (instruction->eops->next) {
 711                 len -= instruction->eops->next->offset;
 712                 if (instruction->eops->next->next &&
 713                     len > (size_t)instruction->eops->next->next->offset) {
 714                     len = (size_t)instruction->eops->next->next->offset;
 715                 }
 716             }
 717             val = instruction->times * len;
 718         }
 719         if (fp)
 720             fclose(fp);
 721         return val;
 722     }
 723
 724     /* Check to see if we need an address-size prefix */
 725     add_asp(instruction, bits);
 726
 727     m = find_match(&temp, instruction, segment, offset, bits);
 728     if (m == MOK_GOOD) {
 729         /* we've matched an instruction. */
 730         int64_t isize;
 731         int j;
 732
 733         isize = calcsize(segment, offset, bits, instruction, temp);
 734         if (isize < 0)
 735             return -1;
 736         for (j = 0; j < MAXPREFIX; j++) {
 737             switch (instruction->prefixes[j]) {
 738             case P_A16:
 739                 if (bits != 16)
 740                     isize++;
 741                 break;
 742             case P_A32:
 743                 if (bits != 32)
 744                     isize++;
 745                 break;
 746             case P_O16:
 747                 if (bits != 16)
 748                     isize++;
 749                 break;
 750             case P_O32:
 751                 if (bits == 16)
 752                     isize++;
 753                 break;
 754             case P_A64:
 755             case P_O64:
 756             case P_none:
 757                 break;
 758             default:
 759                 isize++;
 760                 break;
 761             }
 762         }
 763         return isize * instruction->times;
 764     } else {
 765         return -1;                  /* didn't match any instruction */
 766     }
 767 }
 768
 769 static void bad_hle_warn(const insn * ins, uint8_t hleok)
 770 {
 771     enum prefixes rep_pfx = ins->prefixes[PPS_REP];
 772     enum whatwarn { w_none, w_lock, w_inval } ww;
 773     static const enum whatwarn warn[2][4] =
 774     {
 775         { w_inval, w_inval, w_none, w_lock }, /* XACQUIRE */
 776         { w_inval, w_none,  w_none, w_lock }, /* XRELEASE */
 777     };
 778     unsigned int n;
 779
 780     n = (unsigned int)rep_pfx - P_XACQUIRE;
 781     if (n > 1)
 782         return;                 /* Not XACQUIRE/XRELEASE */
 783
 784     ww = warn[n][hleok];
 785     if (!is_class(MEMORY, ins->oprs[0].type))
 786         ww = w_inval;           /* HLE requires operand 0 to be memory */
 787
 788     switch (ww) {
 789     case w_none:
 790         break;
 791
 792     case w_lock:
 793         if (ins->prefixes[PPS_LOCK] != P_LOCK) {
 794             errfunc(ERR_WARNING | ERR_WARN_HLE | ERR_PASS2,
 795                     "%s with this instruction requires lock",
 796                     prefix_name(rep_pfx));
 797         }
 798         break;
 799
 800     case w_inval:
 801         errfunc(ERR_WARNING | ERR_WARN_HLE | ERR_PASS2,
 802                 "%s invalid with this instruction",
 803                 prefix_name(rep_pfx));
 804         break;
 805     }
 806 }
 807
 808 /* Common construct */
 809 #define case3(x) case (x): case (x)+1: case (x)+2
 810 #define case4(x) case3(x): case (x)+3
 811
 812 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 813                         insn * ins, const struct itemplate *temp)
 814 {
 815     const uint8_t *codes = temp->code;
 816     int64_t length = 0;
 817     uint8_t c;
 818     int rex_mask = ~0;
 819     int op1, op2;
 820     struct operand *opx;
 821     uint8_t opex = 0;
 822     enum ea_type eat;
 823     uint8_t hleok = 0;
 824     bool lockcheck = true;
 825
 826     ins->rex = 0;               /* Ensure REX is reset */
 827     eat = EA_SCALAR;            /* Expect a scalar EA */
 828
 829     if (ins->prefixes[PPS_OSIZE] == P_O64)
 830         ins->rex |= REX_W;
 831
 832     (void)segment;              /* Don't warn that this parameter is unused */
 833     (void)offset;               /* Don't warn that this parameter is unused */
 834
 835     while (*codes) {
 836         c = *codes++;
 837         op1 = (c & 3) + ((opex & 1) << 2);
 838         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 839         opx = &ins->oprs[op1];
 840         opex = 0;               /* For the next iteration */
 841
 842         switch (c) {
 843         case4(01):
 844             codes += c, length += c;
 845             break;
 846
 847         case3(05):
 848             opex = c;
 849             break;
 850
 851         case4(010):
 852             ins->rex |=
 853                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 854             codes++, length++;
 855             break;
 856
 857         case4(020):
 858         case4(024):
 859             length++;
 860             break;
 861
 862         case4(030):
 863             length += 2;
 864             break;
 865
 866         case4(034):
 867             if (opx->type & (BITS16 | BITS32 | BITS64))
 868                 length += (opx->type & BITS16) ? 2 : 4;
 869             else
 870                 length += (bits == 16) ? 2 : 4;
 871             break;
 872
 873         case4(040):
 874             length += 4;
 875             break;
 876
 877         case4(044):
 878             length += ins->addr_size >> 3;
 879             break;
 880
 881         case4(050):
 882             length++;
 883             break;
 884
 885         case4(054):
 886             length += 8; /* MOV reg64/imm */
 887             break;
 888
 889         case4(060):
 890             length += 2;
 891             break;
 892
 893         case4(064):
 894             if (opx->type & (BITS16 | BITS32 | BITS64))
 895                 length += (opx->type & BITS16) ? 2 : 4;
 896             else
 897                 length += (bits == 16) ? 2 : 4;
 898             break;
 899
 900         case4(070):
 901             length += 4;
 902             break;
 903
 904         case4(074):
 905             length += 2;
 906             break;
 907
 908         case 0172:
 909         case 0173:
 910             codes++;
 911             length++;
 912             break;
 913
 914         case4(0174):
 915             length++;
 916             break;
 917
 918         case4(0254):
 919             length += 4;
 920             break;
 921
 922         case4(0260):
 923             ins->rex |= REX_V;
 924             ins->vexreg = regval(opx);
 925             ins->vex_cm = *codes++;
 926             ins->vex_wlp = *codes++;
 927             break;
 928
 929         case 0270:
 930             ins->rex |= REX_V;
 931             ins->vexreg = 0;
 932             ins->vex_cm = *codes++;
 933             ins->vex_wlp = *codes++;
 934             break;
 935
 936         case3(0271):
 937             hleok = c & 3;
 938             break;
 939
 940         case4(0274):
 941             length++;
 942             break;
 943
 944         case4(0300):
 945             break;
 946
 947         case 0310:
 948             if (bits == 64)
 949                 return -1;
 950             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
 951             break;
 952
 953         case 0311:
 954             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
 955             break;
 956
 957         case 0312:
 958             break;
 959
 960         case 0313:
 961             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
 962                 has_prefix(ins, PPS_ASIZE, P_A32))
 963                 return -1;
 964             break;
 965
 966         case4(0314):
 967             break;
 968
 969         case 0320:
 970         {
 971             enum prefixes pfx = ins->prefixes[PPS_OSIZE];
 972             if (pfx == P_O16)
 973                 break;
 974             if (pfx != P_none)
 975                 errfunc(ERR_WARNING | ERR_PASS2, "invalid operand size prefix");
 976             else
 977                 ins->prefixes[PPS_OSIZE] = P_O16;
 978             break;
 979         }
 980
 981         case 0321:
 982         {
 983             enum prefixes pfx = ins->prefixes[PPS_OSIZE];
 984             if (pfx == P_O32)
 985                 break;
 986             if (pfx != P_none)
 987                 errfunc(ERR_WARNING | ERR_PASS2, "invalid operand size prefix");
 988             else
 989                 ins->prefixes[PPS_OSIZE] = P_O32;
 990             break;
 991         }
 992
 993         case 0322:
 994             break;
 995
 996         case 0323:
 997             rex_mask &= ~REX_W;
 998             break;
 999
1000         case 0324:
1001             ins->rex |= REX_W;
1002             break;
1003
1004         case 0325:
1005             ins->rex |= REX_NH;
1006             break;
1007
1008         case 0326:
1009             break;
1010
1011         case 0330:
1012             codes++, length++;
1013             break;
1014
1015         case 0331:
1016             break;
1017
1018         case 0332:
1019         case 0333:
1020             length++;
1021             break;
1022
1023         case 0334:
1024             ins->rex |= REX_L;
1025             break;
1026
1027         case 0335:
1028             break;
1029
1030         case 0336:
1031             if (!ins->prefixes[PPS_REP])
1032                 ins->prefixes[PPS_REP] = P_REP;
1033             break;
1034
1035         case 0337:
1036             if (!ins->prefixes[PPS_REP])
1037                 ins->prefixes[PPS_REP] = P_REPNE;
1038             break;
1039
1040         case 0340:
1041             if (ins->oprs[0].segment != NO_SEG)
1042                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1043                         " quantity of BSS space");
1044             else
1045                 length += ins->oprs[0].offset;
1046             break;
1047
1048         case 0341:
1049             if (!ins->prefixes[PPS_WAIT])
1050                 ins->prefixes[PPS_WAIT] = P_WAIT;
1051             break;
1052
1053         case4(0344):
1054             length++;
1055             break;
1056
1057         case 0360:
1058             break;
1059
1060         case3(0361):
1061             length++;
1062             break;
1063
1064         case 0364:
1065         case 0365:
1066             break;
1067
1068         case 0366:
1069         case 0367:
1070             length++;
1071             break;
1072
1073         case3(0370):
1074             break;
1075
1076         case 0373:
1077             length++;
1078             break;
1079
1080         case 0374:
1081             eat = EA_XMMVSIB;
1082             break;
1083
1084         case 0375:
1085             eat = EA_YMMVSIB;
1086             break;
1087
1088         case4(0100):
1089         case4(0110):
1090         case4(0120):
1091         case4(0130):
1092         case4(0200):
1093         case4(0204):
1094         case4(0210):
1095         case4(0214):
1096         case4(0220):
1097         case4(0224):
1098         case4(0230):
1099         case4(0234):
1100             {
1101                 ea ea_data;
1102                 int rfield;
1103                 opflags_t rflags;
1104                 struct operand *opy = &ins->oprs[op2];
1105
1106                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1107
1108                 if (c <= 0177) {
1109                     /* pick rfield from operand b (opx) */
1110                     rflags = regflag(opx);
1111                     rfield = nasm_regvals[opx->basereg];
1112                 } else {
1113                     rflags = 0;
1114                     rfield = c & 7;
1115                 }
1116                 if (process_ea(opy, &ea_data, bits,ins->addr_size,
1117                                rfield, rflags) != eat) {
1118                     errfunc(ERR_NONFATAL, "invalid effective address");
1119                     return -1;
1120                 } else {
1121                     ins->rex |= ea_data.rex;
1122                     length += ea_data.size;
1123                 }
1124             }
1125             break;
1126
1127         default:
1128             errfunc(ERR_PANIC, "internal instruction table corrupt"
1129                     ": instruction code \\%o (0x%02X) given", c, c);
1130             break;
1131         }
1132     }
1133
1134     ins->rex &= rex_mask;
1135
1136     if (ins->rex & REX_NH) {
1137         if (ins->rex & REX_H) {
1138             errfunc(ERR_NONFATAL, "instruction cannot use high registers");
1139             return -1;
1140         }
1141         ins->rex &= ~REX_P;        /* Don't force REX prefix due to high reg */
1142     }
1143
1144     if (ins->rex & REX_V) {
1145         int bad32 = REX_R|REX_W|REX_X|REX_B;
1146
1147         if (ins->rex & REX_H) {
1148             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1149             return -1;
1150         }
1151         switch (ins->vex_wlp & 060) {
1152         case 000:
1153         case 040:
1154             ins->rex &= ~REX_W;
1155             break;
1156         case 020:
1157             ins->rex |= REX_W;
1158             bad32 &= ~REX_W;
1159             break;
1160         case 060:
1161             /* Follow REX_W */
1162             break;
1163         }
1164
1165         if (bits != 64 && ((ins->rex & bad32) || ins->vexreg > 7)) {
1166             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1167             return -1;
1168         }
1169         if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B)))
1170             length += 3;
1171         else
1172             length += 2;
1173     } else if (ins->rex & REX_REAL) {
1174         if (ins->rex & REX_H) {
1175             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1176             return -1;
1177         } else if (bits == 64) {
1178             length++;
1179         } else if ((ins->rex & REX_L) &&
1180                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1181                    cpu >= IF_X86_64) {
1182             /* LOCK-as-REX.R */
1183             assert_no_prefix(ins, PPS_LOCK);
1184             lockcheck = false;  /* Already errored, no need for warning */
1185             length++;
1186         } else {
1187             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1188             return -1;
1189         }
1190     }
1191
1192     if (has_prefix(ins, PPS_LOCK, P_LOCK) && lockcheck &&
1193         (!(temp->flags & IF_LOCK) || !is_class(MEMORY, ins->oprs[0].type))) {
1194         errfunc(ERR_WARNING | ERR_WARN_LOCK | ERR_PASS2 ,
1195                 "instruction is not lockable");
1196     }
1197
1198     bad_hle_warn(ins, hleok);
1199
1200     return length;
1201 }
1202
1203 static inline unsigned int emit_rex(insn *ins, int32_t segment, int64_t offset, int bits)
1204 {
1205     if (bits == 64) {
1206         if ((ins->rex & REX_REAL) && !(ins->rex & REX_V)) {
1207             ins->rex = (ins->rex & REX_REAL) | REX_P;
1208             out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1209             ins->rex = 0;
1210             return 1;
1211         }
1212     }
1213
1214     return 0;
1215 }
1216
1217 static void gencode(int32_t segment, int64_t offset, int bits,
1218                     insn * ins, const struct itemplate *temp,
1219                     int64_t insn_end)
1220 {
1221     uint8_t c;
1222     uint8_t bytes[4];
1223     int64_t size;
1224     int64_t data;
1225     int op1, op2;
1226     struct operand *opx;
1227     const uint8_t *codes = temp->code;
1228     uint8_t opex = 0;
1229     enum ea_type eat = EA_SCALAR;
1230
1231     while (*codes) {
1232         c = *codes++;
1233         op1 = (c & 3) + ((opex & 1) << 2);
1234         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1235         opx = &ins->oprs[op1];
1236         opex = 0;                /* For the next iteration */
1237
1238         switch (c) {
1239         case 01:
1240         case 02:
1241         case 03:
1242         case 04:
1243             offset += emit_rex(ins, segment, offset, bits);
1244             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1245             codes += c;
1246             offset += c;
1247             break;
1248
1249         case 05:
1250         case 06:
1251         case 07:
1252             opex = c;
1253             break;
1254
1255         case4(010):
1256             offset += emit_rex(ins, segment, offset, bits);
1257             bytes[0] = *codes++ + (regval(opx) & 7);
1258             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1259             offset += 1;
1260             break;
1261
1262         case4(020):
1263             if (opx->offset < -256 || opx->offset > 255) {
1264                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1265                         "byte value exceeds bounds");
1266             }
1267             out_imm8(offset, segment, opx);
1268             offset += 1;
1269             break;
1270
1271         case4(024):
1272             if (opx->offset < 0 || opx->offset > 255)
1273                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1274                         "unsigned byte value exceeds bounds");
1275             out_imm8(offset, segment, opx);
1276             offset += 1;
1277             break;
1278
1279         case4(030):
1280             warn_overflow_opd(opx, 2);
1281             data = opx->offset;
1282             out(offset, segment, &data, OUT_ADDRESS, 2,
1283                 opx->segment, opx->wrt);
1284             offset += 2;
1285             break;
1286
1287         case4(034):
1288             if (opx->type & (BITS16 | BITS32))
1289                 size = (opx->type & BITS16) ? 2 : 4;
1290             else
1291                 size = (bits == 16) ? 2 : 4;
1292             warn_overflow_opd(opx, size);
1293             data = opx->offset;
1294             out(offset, segment, &data, OUT_ADDRESS, size,
1295                 opx->segment, opx->wrt);
1296             offset += size;
1297             break;
1298
1299         case4(040):
1300             warn_overflow_opd(opx, 4);
1301             data = opx->offset;
1302             out(offset, segment, &data, OUT_ADDRESS, 4,
1303                 opx->segment, opx->wrt);
1304             offset += 4;
1305             break;
1306
1307         case4(044):
1308             data = opx->offset;
1309             size = ins->addr_size >> 3;
1310             warn_overflow_opd(opx, size);
1311             out(offset, segment, &data, OUT_ADDRESS, size,
1312                 opx->segment, opx->wrt);
1313             offset += size;
1314             break;
1315
1316         case4(050):
1317             if (opx->segment != segment) {
1318                 data = opx->offset;
1319                 out(offset, segment, &data,
1320                     OUT_REL1ADR, insn_end - offset,
1321                     opx->segment, opx->wrt);
1322             } else {
1323                 data = opx->offset - insn_end;
1324                 if (data > 127 || data < -128)
1325                     errfunc(ERR_NONFATAL, "short jump is out of range");
1326                 out(offset, segment, &data,
1327                     OUT_ADDRESS, 1, NO_SEG, NO_SEG);
1328             }
1329             offset += 1;
1330             break;
1331
1332         case4(054):
1333             data = (int64_t)opx->offset;
1334             out(offset, segment, &data, OUT_ADDRESS, 8,
1335                 opx->segment, opx->wrt);
1336             offset += 8;
1337             break;
1338
1339         case4(060):
1340             if (opx->segment != segment) {
1341                 data = opx->offset;
1342                 out(offset, segment, &data,
1343                     OUT_REL2ADR, insn_end - offset,
1344                     opx->segment, opx->wrt);
1345             } else {
1346                 data = opx->offset - insn_end;
1347                 out(offset, segment, &data,
1348                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1349             }
1350             offset += 2;
1351             break;
1352
1353         case4(064):
1354             if (opx->type & (BITS16 | BITS32 | BITS64))
1355                 size = (opx->type & BITS16) ? 2 : 4;
1356             else
1357                 size = (bits == 16) ? 2 : 4;
1358             if (opx->segment != segment) {
1359                 data = opx->offset;
1360                 out(offset, segment, &data,
1361                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1362                     insn_end - offset, opx->segment, opx->wrt);
1363             } else {
1364                 data = opx->offset - insn_end;
1365                 out(offset, segment, &data,
1366                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1367             }
1368             offset += size;
1369             break;
1370
1371         case4(070):
1372             if (opx->segment != segment) {
1373                 data = opx->offset;
1374                 out(offset, segment, &data,
1375                     OUT_REL4ADR, insn_end - offset,
1376                     opx->segment, opx->wrt);
1377             } else {
1378                 data = opx->offset - insn_end;
1379                 out(offset, segment, &data,
1380                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1381             }
1382             offset += 4;
1383             break;
1384
1385         case4(074):
1386             if (opx->segment == NO_SEG)
1387                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1388                         " relocatable");
1389             data = 0;
1390             out(offset, segment, &data, OUT_ADDRESS, 2,
1391                 outfmt->segbase(1 + opx->segment),
1392                 opx->wrt);
1393             offset += 2;
1394             break;
1395
1396         case 0172:
1397             c = *codes++;
1398             opx = &ins->oprs[c >> 3];
1399             bytes[0] = nasm_regvals[opx->basereg] << 4;
1400             opx = &ins->oprs[c & 7];
1401             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1402                 errfunc(ERR_NONFATAL,
1403                         "non-absolute expression not permitted as argument %d",
1404                         c & 7);
1405             } else {
1406                 if (opx->offset & ~15) {
1407                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1408                             "four-bit argument exceeds bounds");
1409                 }
1410                 bytes[0] |= opx->offset & 15;
1411             }
1412             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1413             offset++;
1414             break;
1415
1416         case 0173:
1417             c = *codes++;
1418             opx = &ins->oprs[c >> 4];
1419             bytes[0] = nasm_regvals[opx->basereg] << 4;
1420             bytes[0] |= c & 15;
1421             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1422             offset++;
1423             break;
1424
1425         case4(0174):
1426             bytes[0] = nasm_regvals[opx->basereg] << 4;
1427             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1428             offset++;
1429             break;
1430
1431         case4(0254):
1432             data = opx->offset;
1433             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1434                 (int32_t)data != (int64_t)data) {
1435                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1436                         "signed dword immediate exceeds bounds");
1437             }
1438             out(offset, segment, &data, OUT_ADDRESS, 4,
1439                 opx->segment, opx->wrt);
1440             offset += 4;
1441             break;
1442
1443         case4(0260):
1444         case 0270:
1445             codes += 2;
1446             if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1447                 bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
1448                 bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
1449                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1450                     ((~ins->vexreg & 15)<< 3) | (ins->vex_wlp & 07);
1451                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1452                 offset += 3;
1453             } else {
1454                 bytes[0] = 0xc5;
1455                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1456                     ((~ins->vexreg & 15) << 3) | (ins->vex_wlp & 07);
1457                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1458                 offset += 2;
1459             }
1460             break;
1461
1462         case 0271:
1463         case 0272:
1464         case 0273:
1465             break;
1466
1467         case4(0274):
1468         {
1469             uint64_t uv, um;
1470             int s;
1471
1472             if (ins->rex & REX_W)
1473                 s = 64;
1474             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1475                 s = 16;
1476             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1477                 s = 32;
1478             else
1479                 s = bits;
1480
1481             um = (uint64_t)2 << (s-1);
1482             uv = opx->offset;
1483
1484             if (uv > 127 && uv < (uint64_t)-128 &&
1485                 (uv < um-128 || uv > um-1)) {
1486                 /* If this wasn't explicitly byte-sized, warn as though we
1487                  * had fallen through to the imm16/32/64 case.
1488                  */
1489                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1490                         "%s value exceeds bounds",
1491                         (opx->type & BITS8) ? "signed byte" :
1492                         s == 16 ? "word" :
1493                         s == 32 ? "dword" :
1494                         "signed dword");
1495             }
1496             if (opx->segment != NO_SEG) {
1497                 data = uv;
1498                 out(offset, segment, &data, OUT_ADDRESS, 1,
1499                     opx->segment, opx->wrt);
1500             } else {
1501                 bytes[0] = uv;
1502                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1503                     NO_SEG);
1504             }
1505             offset += 1;
1506             break;
1507         }
1508
1509         case4(0300):
1510             break;
1511
1512         case 0310:
1513             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1514                 *bytes = 0x67;
1515                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1516                 offset += 1;
1517             } else
1518                 offset += 0;
1519             break;
1520
1521         case 0311:
1522             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1523                 *bytes = 0x67;
1524                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1525                 offset += 1;
1526             } else
1527                 offset += 0;
1528             break;
1529
1530         case 0312:
1531             break;
1532
1533         case 0313:
1534             ins->rex = 0;
1535             break;
1536
1537         case4(0314):
1538             break;
1539
1540         case 0320:
1541         case 0321:
1542             break;
1543
1544         case 0322:
1545         case 0323:
1546             break;
1547
1548         case 0324:
1549             ins->rex |= REX_W;
1550             break;
1551
1552         case 0325:
1553             break;
1554
1555         case 0326:
1556             break;
1557
1558         case 0330:
1559             *bytes = *codes++ ^ get_cond_opcode(ins->condition);
1560             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1561             offset += 1;
1562             break;
1563
1564         case 0331:
1565             break;
1566
1567         case 0332:
1568         case 0333:
1569             *bytes = c - 0332 + 0xF2;
1570             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1571             offset += 1;
1572             break;
1573
1574         case 0334:
1575             if (ins->rex & REX_R) {
1576                 *bytes = 0xF0;
1577                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1578                 offset += 1;
1579             }
1580             ins->rex &= ~(REX_L|REX_R);
1581             break;
1582
1583         case 0335:
1584             break;
1585
1586         case 0336:
1587         case 0337:
1588             break;
1589
1590         case 0340:
1591             if (ins->oprs[0].segment != NO_SEG)
1592                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1593             else {
1594                 int64_t size = ins->oprs[0].offset;
1595                 if (size > 0)
1596                     out(offset, segment, NULL,
1597                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1598                 offset += size;
1599             }
1600             break;
1601
1602         case 0341:
1603             break;
1604
1605         case 0344:
1606         case 0345:
1607             bytes[0] = c & 1;
1608             switch (ins->oprs[0].basereg) {
1609             case R_CS:
1610                 bytes[0] += 0x0E;
1611                 break;
1612             case R_DS:
1613                 bytes[0] += 0x1E;
1614                 break;
1615             case R_ES:
1616                 bytes[0] += 0x06;
1617                 break;
1618             case R_SS:
1619                 bytes[0] += 0x16;
1620                 break;
1621             default:
1622                 errfunc(ERR_PANIC,
1623                         "bizarre 8086 segment register received");
1624             }
1625             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1626             offset++;
1627             break;
1628
1629         case 0346:
1630         case 0347:
1631             bytes[0] = c & 1;
1632             switch (ins->oprs[0].basereg) {
1633             case R_FS:
1634                 bytes[0] += 0xA0;
1635                 break;
1636             case R_GS:
1637                 bytes[0] += 0xA8;
1638                 break;
1639             default:
1640                 errfunc(ERR_PANIC,
1641                         "bizarre 386 segment register received");
1642             }
1643             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1644             offset++;
1645             break;
1646
1647         case 0360:
1648             break;
1649
1650         case 0361:
1651             bytes[0] = 0x66;
1652             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1653             offset += 1;
1654             break;
1655
1656         case 0362:
1657         case 0363:
1658             bytes[0] = c - 0362 + 0xf2;
1659             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1660             offset += 1;
1661             break;
1662
1663         case 0364:
1664         case 0365:
1665             break;
1666
1667         case 0366:
1668         case 0367:
1669             *bytes = c - 0366 + 0x66;
1670             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1671             offset += 1;
1672             break;
1673
1674         case 0370:
1675         case 0371:
1676             break;
1677
1678         case 0373:
1679             *bytes = bits == 16 ? 3 : 5;
1680             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1681             offset += 1;
1682             break;
1683
1684         case 0374:
1685             eat = EA_XMMVSIB;
1686             break;
1687
1688         case 0375:
1689             eat = EA_YMMVSIB;
1690             break;
1691
1692         case4(0100):
1693         case4(0110):
1694         case4(0120):
1695         case4(0130):
1696         case4(0200):
1697         case4(0204):
1698         case4(0210):
1699         case4(0214):
1700         case4(0220):
1701         case4(0224):
1702         case4(0230):
1703         case4(0234):
1704             {
1705                 ea ea_data;
1706                 int rfield;
1707                 opflags_t rflags;
1708                 uint8_t *p;
1709                 int32_t s;
1710                 struct operand *opy = &ins->oprs[op2];
1711
1712                 if (c <= 0177) {
1713                     /* pick rfield from operand b (opx) */
1714                     rflags = regflag(opx);
1715                     rfield = nasm_regvals[opx->basereg];
1716                 } else {
1717                     /* rfield is constant */
1718                     rflags = 0;
1719                     rfield = c & 7;
1720                 }
1721
1722                 if (process_ea(opy, &ea_data, bits, ins->addr_size,
1723                                rfield, rflags) != eat)
1724                     errfunc(ERR_NONFATAL, "invalid effective address");
1725
1726                 p = bytes;
1727                 *p++ = ea_data.modrm;
1728                 if (ea_data.sib_present)
1729                     *p++ = ea_data.sib;
1730
1731                 s = p - bytes;
1732                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1733
1734                 /*
1735                  * Make sure the address gets the right offset in case
1736                  * the line breaks in the .lst file (BR 1197827)
1737                  */
1738                 offset += s;
1739                 s = 0;
1740
1741                 switch (ea_data.bytes) {
1742                 case 0:
1743                     break;
1744                 case 1:
1745                 case 2:
1746                 case 4:
1747                 case 8:
1748                     data = opy->offset;
1749                     s += ea_data.bytes;
1750                     if (ea_data.rip) {
1751                         if (opy->segment == segment) {
1752                             data -= insn_end;
1753                             if (overflow_signed(data, ea_data.bytes))
1754                                 warn_overflow(ERR_PASS2, ea_data.bytes);
1755                             out(offset, segment, &data, OUT_ADDRESS,
1756                                 ea_data.bytes, NO_SEG, NO_SEG);
1757                         } else {
1758                             /* overflow check in output/linker? */
1759                             out(offset, segment, &data,        OUT_REL4ADR,
1760                                 insn_end - offset, opy->segment, opy->wrt);
1761                         }
1762                     } else {
1763                         if (overflow_general(opy->offset, ins->addr_size >> 3) ||
1764                             signed_bits(opy->offset, ins->addr_size) !=
1765                             signed_bits(opy->offset, ea_data.bytes * 8))
1766                             warn_overflow(ERR_PASS2, ea_data.bytes);
1767
1768                         out(offset, segment, &data, OUT_ADDRESS,
1769                             ea_data.bytes, opy->segment, opy->wrt);
1770                     }
1771                     break;
1772                 default:
1773                     /* Impossible! */
1774                     errfunc(ERR_PANIC,
1775                             "Invalid amount of bytes (%d) for offset?!",
1776                             ea_data.bytes);
1777                     break;
1778                 }
1779                 offset += s;
1780             }
1781             break;
1782
1783         default:
1784             errfunc(ERR_PANIC, "internal instruction table corrupt"
1785                     ": instruction code \\%o (0x%02X) given", c, c);
1786             break;
1787         }
1788     }
1789 }
1790
1791 static opflags_t regflag(const operand * o)
1792 {
1793     if (!is_register(o->basereg))
1794         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1795     return nasm_reg_flags[o->basereg];
1796 }
1797
1798 static int32_t regval(const operand * o)
1799 {
1800     if (!is_register(o->basereg))
1801         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1802     return nasm_regvals[o->basereg];
1803 }
1804
1805 static int op_rexflags(const operand * o, int mask)
1806 {
1807     opflags_t flags;
1808     int val;
1809
1810     if (!is_register(o->basereg))
1811         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1812
1813     flags = nasm_reg_flags[o->basereg];
1814     val = nasm_regvals[o->basereg];
1815
1816     return rexflags(val, flags, mask);
1817 }
1818
1819 static int rexflags(int val, opflags_t flags, int mask)
1820 {
1821     int rex = 0;
1822
1823     if (val >= 8)
1824         rex |= REX_B|REX_X|REX_R;
1825     if (flags & BITS64)
1826         rex |= REX_W;
1827     if (!(REG_HIGH & ~flags))                   /* AH, CH, DH, BH */
1828         rex |= REX_H;
1829     else if (!(REG8 & ~flags) && val >= 4)      /* SPL, BPL, SIL, DIL */
1830         rex |= REX_P;
1831
1832     return rex & mask;
1833 }
1834
1835 static enum match_result find_match(const struct itemplate **tempp,
1836                                     insn *instruction,
1837                                     int32_t segment, int64_t offset, int bits)
1838 {
1839     const struct itemplate *temp;
1840     enum match_result m, merr;
1841     opflags_t xsizeflags[MAX_OPERANDS];
1842     bool opsizemissing = false;
1843     int i;
1844
1845     for (i = 0; i < instruction->operands; i++)
1846         xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
1847
1848     merr = MERR_INVALOP;
1849
1850     for (temp = nasm_instructions[instruction->opcode];
1851          temp->opcode != I_none; temp++) {
1852         m = matches(temp, instruction, bits);
1853         if (m == MOK_JUMP) {
1854             if (jmp_match(segment, offset, bits, instruction, temp))
1855                 m = MOK_GOOD;
1856             else
1857                 m = MERR_INVALOP;
1858         } else if (m == MERR_OPSIZEMISSING &&
1859                    (temp->flags & IF_SMASK) != IF_SX) {
1860             /*
1861              * Missing operand size and a candidate for fuzzy matching...
1862              */
1863             for (i = 0; i < temp->operands; i++) {
1864                 if ((temp->opd[i] & SAME_AS) == 0)
1865                     xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
1866             }
1867             opsizemissing = true;
1868         }
1869         if (m > merr)
1870             merr = m;
1871         if (merr == MOK_GOOD)
1872             goto done;
1873     }
1874
1875     /* No match, but see if we can get a fuzzy operand size match... */
1876     if (!opsizemissing)
1877         goto done;
1878
1879     for (i = 0; i < instruction->operands; i++) {
1880         /*
1881          * We ignore extrinsic operand sizes on registers, so we should
1882          * never try to fuzzy-match on them.  This also resolves the case
1883          * when we have e.g. "xmmrm128" in two different positions.
1884          */
1885         if (is_class(REGISTER, instruction->oprs[i].type))
1886             continue;
1887
1888         /* This tests if xsizeflags[i] has more than one bit set */
1889         if ((xsizeflags[i] & (xsizeflags[i]-1)))
1890             goto done;                /* No luck */
1891
1892         instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
1893     }
1894
1895     /* Try matching again... */
1896     for (temp = nasm_instructions[instruction->opcode];
1897          temp->opcode != I_none; temp++) {
1898         m = matches(temp, instruction, bits);
1899         if (m == MOK_JUMP) {
1900             if (jmp_match(segment, offset, bits, instruction, temp))
1901                 m = MOK_GOOD;
1902             else
1903                 m = MERR_INVALOP;
1904         }
1905         if (m > merr)
1906             merr = m;
1907         if (merr == MOK_GOOD)
1908             goto done;
1909     }
1910
1911 done:
1912     *tempp = temp;
1913     return merr;
1914 }
1915
1916 static enum match_result matches(const struct itemplate *itemp,
1917                                  insn *instruction, int bits)
1918 {
1919     opflags_t size[MAX_OPERANDS], asize;
1920     bool opsizemissing = false;
1921     int i, oprs;
1922
1923     /*
1924      * Check the opcode
1925      */
1926     if (itemp->opcode != instruction->opcode)
1927         return MERR_INVALOP;
1928
1929     /*
1930      * Count the operands
1931      */
1932     if (itemp->operands != instruction->operands)
1933         return MERR_INVALOP;
1934
1935     /*
1936      * Is it legal?
1937      */
1938     if (!(optimizing > 0) && (itemp->flags & IF_OPT))
1939         return MERR_INVALOP;
1940
1941     /*
1942      * Check that no spurious colons or TOs are present
1943      */
1944     for (i = 0; i < itemp->operands; i++)
1945         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
1946             return MERR_INVALOP;
1947
1948     /*
1949      * Process size flags
1950      */
1951     switch (itemp->flags & IF_SMASK) {
1952     case IF_SB:
1953         asize = BITS8;
1954         break;
1955     case IF_SW:
1956         asize = BITS16;
1957         break;
1958     case IF_SD:
1959         asize = BITS32;
1960         break;
1961     case IF_SQ:
1962         asize = BITS64;
1963         break;
1964     case IF_SO:
1965         asize = BITS128;
1966         break;
1967     case IF_SY:
1968         asize = BITS256;
1969         break;
1970     case IF_SZ:
1971         switch (bits) {
1972         case 16:
1973             asize = BITS16;
1974             break;
1975         case 32:
1976             asize = BITS32;
1977             break;
1978         case 64:
1979             asize = BITS64;
1980             break;
1981         default:
1982             asize = 0;
1983             break;
1984         }
1985         break;
1986     default:
1987         asize = 0;
1988         break;
1989     }
1990
1991     if (itemp->flags & IF_ARMASK) {
1992         /* S- flags only apply to a specific operand */
1993         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
1994         memset(size, 0, sizeof size);
1995         size[i] = asize;
1996     } else {
1997         /* S- flags apply to all operands */
1998         for (i = 0; i < MAX_OPERANDS; i++)
1999             size[i] = asize;
2000     }
2001
2002     /*
2003      * Check that the operand flags all match up,
2004      * it's a bit tricky so lets be verbose:
2005      *
2006      * 1) Find out the size of operand. If instruction
2007      *    doesn't have one specified -- we're trying to
2008      *    guess it either from template (IF_S* flag) or
2009      *    from code bits.
2010      *
2011      * 2) If template operand (i) has SAME_AS flag [used for registers only]
2012      *    (ie the same operand as was specified somewhere in template, and
2013      *    this referred operand index is being achieved via ~SAME_AS)
2014      *    we are to be sure that both registers (in template and instruction)
2015      *    do exactly match.
2016      *
2017      * 3) If template operand do not match the instruction OR
2018      *    template has an operand size specified AND this size differ
2019      *    from which instruction has (perhaps we got it from code bits)
2020      *    we are:
2021      *      a)  Check that only size of instruction and operand is differ
2022      *          other characteristics do match
2023      *      b)  Perhaps it's a register specified in instruction so
2024      *          for such a case we just mark that operand as "size
2025      *          missing" and this will turn on fuzzy operand size
2026      *          logic facility (handled by a caller)
2027      */
2028     for (i = 0; i < itemp->operands; i++) {
2029         opflags_t type = instruction->oprs[i].type;
2030         if (!(type & SIZE_MASK))
2031             type |= size[i];
2032
2033         if (itemp->opd[i] & SAME_AS) {
2034             int j = itemp->opd[i] & ~SAME_AS;
2035             if (type != instruction->oprs[j].type ||
2036                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2037                 return MERR_INVALOP;
2038         } else if (itemp->opd[i] & ~type & ~SIZE_MASK) {
2039             return MERR_INVALOP;
2040         } else if ((itemp->opd[i] & SIZE_MASK) &&
2041                    (itemp->opd[i] & SIZE_MASK) != (type & SIZE_MASK)) {
2042             if (type & SIZE_MASK) {
2043                 return MERR_INVALOP;
2044             } else if (!is_class(REGISTER, type)) {
2045                 /*
2046                  * Note: we don't honor extrinsic operand sizes for registers,
2047                  * so "missing operand size" for a register should be
2048                  * considered a wildcard match rather than an error.
2049                  */
2050                 opsizemissing = true;
2051             }
2052         }
2053     }
2054
2055     if (opsizemissing)
2056         return MERR_OPSIZEMISSING;
2057
2058     /*
2059      * Check operand sizes
2060      */
2061     if (itemp->flags & (IF_SM | IF_SM2)) {
2062         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2063         for (i = 0; i < oprs; i++) {
2064             asize = itemp->opd[i] & SIZE_MASK;
2065             if (asize) {
2066                 for (i = 0; i < oprs; i++)
2067                     size[i] = asize;
2068                 break;
2069             }
2070         }
2071     } else {
2072         oprs = itemp->operands;
2073     }
2074
2075     for (i = 0; i < itemp->operands; i++) {
2076         if (!(itemp->opd[i] & SIZE_MASK) &&
2077             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2078             return MERR_OPSIZEMISMATCH;
2079     }
2080
2081     /*
2082      * Check template is okay at the set cpu level
2083      */
2084     if (((itemp->flags & IF_PLEVEL) > cpu))
2085         return MERR_BADCPU;
2086
2087     /*
2088      * Verify the appropriate long mode flag.
2089      */
2090     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2091         return MERR_BADMODE;
2092
2093     /*
2094      * If we have a HLE prefix, look for the NOHLE flag
2095      */
2096     if ((itemp->flags & IF_NOHLE) &&
2097         (has_prefix(instruction, PPS_REP, P_XACQUIRE) ||
2098          has_prefix(instruction, PPS_REP, P_XRELEASE)))
2099         return MERR_BADHLE;
2100
2101     /*
2102      * Check if special handling needed for Jumps
2103      */
2104     if ((itemp->code[0] & ~1) == 0370)
2105         return MOK_JUMP;
2106
2107     return MOK_GOOD;
2108 }
2109
2110 static enum ea_type process_ea(operand *input, ea *output, int bits,
2111                                int addrbits, int rfield, opflags_t rflags)
2112 {
2113     bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
2114
2115     output->type    = EA_SCALAR;
2116     output->rip     = false;
2117
2118     /* REX flags for the rfield operand */
2119     output->rex     |= rexflags(rfield, rflags, REX_R | REX_P | REX_W | REX_H);
2120
2121     if (is_class(REGISTER, input->type)) {
2122         /*
2123          * It's a direct register.
2124          */
2125         if (!is_register(input->basereg))
2126             goto err;
2127
2128         if (!is_class(REG_EA, regflag(input)))
2129             goto err;
2130
2131         output->rex         |= op_rexflags(input, REX_B | REX_P | REX_W | REX_H);
2132         output->sib_present = false;    /* no SIB necessary */
2133         output->bytes       = 0;        /* no offset necessary either */
2134         output->modrm       = GEN_MODRM(3, rfield, nasm_regvals[input->basereg]);
2135     } else {
2136         /*
2137          * It's a memory reference.
2138          */
2139         if (input->basereg == -1 &&
2140             (input->indexreg == -1 || input->scale == 0)) {
2141             /*
2142              * It's a pure offset.
2143              */
2144             if (bits == 64 && ((input->type & IP_REL) == IP_REL) &&
2145                 input->segment == NO_SEG) {
2146                 nasm_error(ERR_WARNING | ERR_PASS1, "absolute address can not be RIP-relative");
2147                 input->type &= ~IP_REL;
2148                 input->type |= MEMORY;
2149             }
2150
2151             if (input->eaflags & EAF_BYTEOFFS ||
2152                 (input->eaflags & EAF_WORDOFFS &&
2153                  input->disp_size != (addrbits != 16 ? 32 : 16))) {
2154                 nasm_error(ERR_WARNING | ERR_PASS1, "displacement size ignored on absolute address");
2155             }
2156
2157             if (bits == 64 && (~input->type & IP_REL)) {
2158                 output->sib_present = true;
2159                 output->sib         = GEN_SIB(0, 4, 5);
2160                 output->bytes       = 4;
2161                 output->modrm       = GEN_MODRM(0, rfield, 4);
2162                 output->rip         = false;
2163             } else {
2164                 output->sib_present = false;
2165                 output->bytes       = (addrbits != 16 ? 4 : 2);
2166                 output->modrm       = GEN_MODRM(0, rfield, (addrbits != 16 ? 5 : 6));
2167                 output->rip         = bits == 64;
2168             }
2169         } else {
2170             /*
2171              * It's an indirection.
2172              */
2173             int i = input->indexreg, b = input->basereg, s = input->scale;
2174             int32_t seg = input->segment;
2175             int hb = input->hintbase, ht = input->hinttype;
2176             int t, it, bt;              /* register numbers */
2177             opflags_t x, ix, bx;        /* register flags */
2178
2179             if (s == 0)
2180                 i = -1;         /* make this easy, at least */
2181
2182             if (is_register(i)) {
2183                 it = nasm_regvals[i];
2184                 ix = nasm_reg_flags[i];
2185             } else {
2186                 it = -1;
2187                 ix = 0;
2188             }
2189
2190             if (is_register(b)) {
2191                 bt = nasm_regvals[b];
2192                 bx = nasm_reg_flags[b];
2193             } else {
2194                 bt = -1;
2195                 bx = 0;
2196             }
2197
2198             /* if either one are a vector register... */
2199             if ((ix|bx) & (XMMREG|YMMREG) & ~REG_EA) {
2200                 opflags_t sok = BITS32 | BITS64;
2201                 int32_t o = input->offset;
2202                 int mod, scale, index, base;
2203
2204                 /*
2205                  * For a vector SIB, one has to be a vector and the other,
2206                  * if present, a GPR.  The vector must be the index operand.
2207                  */
2208                 if (it == -1 || (bx & (XMMREG|YMMREG) & ~REG_EA)) {
2209                     if (s == 0)
2210                         s = 1;
2211                     else if (s != 1)
2212                         goto err;
2213
2214                     t = bt, bt = it, it = t;
2215                     x = bx, bx = ix, ix = x;
2216                 }
2217
2218                 if (bt != -1) {
2219                     if (REG_GPR & ~bx)
2220                         goto err;
2221                     if (!(REG64 & ~bx) || !(REG32 & ~bx))
2222                         sok &= bx;
2223                     else
2224                         goto err;
2225                 }
2226
2227                 /*
2228                  * While we're here, ensure the user didn't specify
2229                  * WORD or QWORD
2230                  */
2231                 if (input->disp_size == 16 || input->disp_size == 64)
2232                     goto err;
2233
2234                 if (addrbits == 16 ||
2235                     (addrbits == 32 && !(sok & BITS32)) ||
2236                     (addrbits == 64 && !(sok & BITS64)))
2237                     goto err;
2238
2239                 output->type = (ix & YMMREG & ~REG_EA)
2240                     ? EA_YMMVSIB : EA_XMMVSIB;
2241
2242                 output->rex |= rexflags(it, ix, REX_X);
2243                 output->rex |= rexflags(bt, bx, REX_B);
2244
2245                 index = it & 7; /* it is known to be != -1 */
2246
2247                 switch (s) {
2248                 case 1:
2249                     scale = 0;
2250                     break;
2251                 case 2:
2252                     scale = 1;
2253                     break;
2254                 case 4:
2255                     scale = 2;
2256                     break;
2257                 case 8:
2258                     scale = 3;
2259                     break;
2260                 default:   /* then what the smeg is it? */
2261                     goto err;    /* panic */
2262                 }
2263
2264                 if (bt == -1) {
2265                     base = 5;
2266                     mod = 0;
2267                 } else {
2268                     base = (bt & 7);
2269                     if (base != REG_NUM_EBP && o == 0 &&
2270                         seg == NO_SEG && !forw_ref &&
2271                         !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2272                         mod = 0;
2273                     else if (input->eaflags & EAF_BYTEOFFS ||
2274                              (o >= -128 && o <= 127 &&
2275                               seg == NO_SEG && !forw_ref &&
2276                               !(input->eaflags & EAF_WORDOFFS)))
2277                         mod = 1;
2278                     else
2279                         mod = 2;
2280                 }
2281
2282                 output->sib_present = true;
2283                 output->bytes       = (bt == -1 || mod == 2 ? 4 : mod);
2284                 output->modrm       = GEN_MODRM(mod, rfield, 4);
2285                 output->sib         = GEN_SIB(scale, index, base);
2286             } else if ((ix|bx) & (BITS32|BITS64)) {
2287                 /*
2288                  * it must be a 32/64-bit memory reference. Firstly we have
2289                  * to check that all registers involved are type E/Rxx.
2290                  */
2291                 opflags_t sok = BITS32 | BITS64;
2292                 int32_t o = input->offset;
2293
2294                 if (it != -1) {
2295                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2296                         sok &= ix;
2297                     else
2298                         goto err;
2299                 }
2300
2301                 if (bt != -1) {
2302                     if (REG_GPR & ~bx)
2303                         goto err; /* Invalid register */
2304                     if (~sok & bx & SIZE_MASK)
2305                         goto err; /* Invalid size */
2306                     sok &= bx;
2307                 }
2308
2309                 /*
2310                  * While we're here, ensure the user didn't specify
2311                  * WORD or QWORD
2312                  */
2313                 if (input->disp_size == 16 || input->disp_size == 64)
2314                     goto err;
2315
2316                 if (addrbits == 16 ||
2317                     (addrbits == 32 && !(sok & BITS32)) ||
2318                     (addrbits == 64 && !(sok & BITS64)))
2319                     goto err;
2320
2321                 /* now reorganize base/index */
2322                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2323                     ((hb == b && ht == EAH_NOTBASE) ||
2324                      (hb == i && ht == EAH_MAKEBASE))) {
2325                     /* swap if hints say so */
2326                     t = bt, bt = it, it = t;
2327                     x = bx, bx = ix, ix = x;
2328                 }
2329                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2330                     bt = -1, bx = 0, s++;
2331                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2332                     /* make single reg base, unless hint */
2333                     bt = it, bx = ix, it = -1, ix = 0;
2334                 }
2335                 if (((s == 2 && it != REG_NUM_ESP && !(input->eaflags & EAF_TIMESTWO)) ||
2336                       s == 3 || s == 5 || s == 9) && bt == -1)
2337                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2338                 if (it == -1 && (bt & 7) != REG_NUM_ESP &&
2339                     (input->eaflags & EAF_TIMESTWO))
2340                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2341                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2342                 if (s == 1 && it == REG_NUM_ESP) {
2343                     /* swap ESP into base if scale is 1 */
2344                     t = it, it = bt, bt = t;
2345                     x = ix, ix = bx, bx = x;
2346                 }
2347                 if (it == REG_NUM_ESP ||
2348                     (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2349                     goto err;        /* wrong, for various reasons */
2350
2351                 output->rex |= rexflags(it, ix, REX_X);
2352                 output->rex |= rexflags(bt, bx, REX_B);
2353
2354                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2355                     /* no SIB needed */
2356                     int mod, rm;
2357
2358                     if (bt == -1) {
2359                         rm = 5;
2360                         mod = 0;
2361                     } else {
2362                         rm = (bt & 7);
2363                         if (rm != REG_NUM_EBP && o == 0 &&
2364                             seg == NO_SEG && !forw_ref &&
2365                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2366                             mod = 0;
2367                         else if (input->eaflags & EAF_BYTEOFFS ||
2368                                  (o >= -128 && o <= 127 &&
2369                                   seg == NO_SEG && !forw_ref &&
2370                                   !(input->eaflags & EAF_WORDOFFS)))
2371                             mod = 1;
2372                         else
2373                             mod = 2;
2374                     }
2375
2376                     output->sib_present = false;
2377                     output->bytes       = (bt == -1 || mod == 2 ? 4 : mod);
2378                     output->modrm       = GEN_MODRM(mod, rfield, rm);
2379                 } else {
2380                     /* we need a SIB */
2381                     int mod, scale, index, base;
2382
2383                     if (it == -1)
2384                         index = 4, s = 1;
2385                     else
2386                         index = (it & 7);
2387
2388                     switch (s) {
2389                     case 1:
2390                         scale = 0;
2391                         break;
2392                     case 2:
2393                         scale = 1;
2394                         break;
2395                     case 4:
2396                         scale = 2;
2397                         break;
2398                     case 8:
2399                         scale = 3;
2400                         break;
2401                     default:   /* then what the smeg is it? */
2402                         goto err;    /* panic */
2403                     }
2404
2405                     if (bt == -1) {
2406                         base = 5;
2407                         mod = 0;
2408                     } else {
2409                         base = (bt & 7);
2410                         if (base != REG_NUM_EBP && o == 0 &&
2411                             seg == NO_SEG && !forw_ref &&
2412                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2413                             mod = 0;
2414                         else if (input->eaflags & EAF_BYTEOFFS ||
2415                                  (o >= -128 && o <= 127 &&
2416                                   seg == NO_SEG && !forw_ref &&
2417                                   !(input->eaflags & EAF_WORDOFFS)))
2418                             mod = 1;
2419                         else
2420                             mod = 2;
2421                     }
2422
2423                     output->sib_present = true;
2424                     output->bytes       = (bt == -1 || mod == 2 ? 4 : mod);
2425                     output->modrm       = GEN_MODRM(mod, rfield, 4);
2426                     output->sib         = GEN_SIB(scale, index, base);
2427                 }
2428             } else {            /* it's 16-bit */
2429                 int mod, rm;
2430                 int16_t o = input->offset;
2431
2432                 /* check for 64-bit long mode */
2433                 if (addrbits == 64)
2434                     goto err;
2435
2436                 /* check all registers are BX, BP, SI or DI */
2437                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI && b != R_DI) ||
2438                     (i != -1 && i != R_BP && i != R_BX && i != R_SI && i != R_DI))
2439                     goto err;
2440
2441                 /* ensure the user didn't specify DWORD/QWORD */
2442                 if (input->disp_size == 32 || input->disp_size == 64)
2443                     goto err;
2444
2445                 if (s != 1 && i != -1)
2446                     goto err;        /* no can do, in 16-bit EA */
2447                 if (b == -1 && i != -1) {
2448                     int tmp = b;
2449                     b = i;
2450                     i = tmp;
2451                 }               /* swap */
2452                 if ((b == R_SI || b == R_DI) && i != -1) {
2453                     int tmp = b;
2454                     b = i;
2455                     i = tmp;
2456                 }
2457                 /* have BX/BP as base, SI/DI index */
2458                 if (b == i)
2459                     goto err;        /* shouldn't ever happen, in theory */
2460                 if (i != -1 && b != -1 &&
2461                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2462                     goto err;        /* invalid combinations */
2463                 if (b == -1)            /* pure offset: handled above */
2464                     goto err;        /* so if it gets to here, panic! */
2465
2466                 rm = -1;
2467                 if (i != -1)
2468                     switch (i * 256 + b) {
2469                     case R_SI * 256 + R_BX:
2470                         rm = 0;
2471                         break;
2472                     case R_DI * 256 + R_BX:
2473                         rm = 1;
2474                         break;
2475                     case R_SI * 256 + R_BP:
2476                         rm = 2;
2477                         break;
2478                     case R_DI * 256 + R_BP:
2479                         rm = 3;
2480                         break;
2481                 } else
2482                     switch (b) {
2483                     case R_SI:
2484                         rm = 4;
2485                         break;
2486                     case R_DI:
2487                         rm = 5;
2488                         break;
2489                     case R_BP:
2490                         rm = 6;
2491                         break;
2492                     case R_BX:
2493                         rm = 7;
2494                         break;
2495                     }
2496                 if (rm == -1)           /* can't happen, in theory */
2497                     goto err;        /* so panic if it does */
2498
2499                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2500                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2501                     mod = 0;
2502                 else if (input->eaflags & EAF_BYTEOFFS ||
2503                          (o >= -128 && o <= 127 && seg == NO_SEG &&
2504                           !forw_ref && !(input->eaflags & EAF_WORDOFFS)))
2505                     mod = 1;
2506                 else
2507                     mod = 2;
2508
2509                 output->sib_present = false;    /* no SIB - it's 16-bit */
2510                 output->bytes       = mod;      /* bytes of offset needed */
2511                 output->modrm       = GEN_MODRM(mod, rfield, rm);
2512             }
2513         }
2514     }
2515
2516     output->size = 1 + output->sib_present + output->bytes;
2517     return output->type;
2518
2519 err:
2520     return output->type = EA_INVALID;
2521 }
2522
2523 static void add_asp(insn *ins, int addrbits)
2524 {
2525     int j, valid;
2526     int defdisp;
2527
2528     valid = (addrbits == 64) ? 64|32 : 32|16;
2529
2530     switch (ins->prefixes[PPS_ASIZE]) {
2531     case P_A16:
2532         valid &= 16;
2533         break;
2534     case P_A32:
2535         valid &= 32;
2536         break;
2537     case P_A64:
2538         valid &= 64;
2539         break;
2540     case P_ASP:
2541         valid &= (addrbits == 32) ? 16 : 32;
2542         break;
2543     default:
2544         break;
2545     }
2546
2547     for (j = 0; j < ins->operands; j++) {
2548         if (is_class(MEMORY, ins->oprs[j].type)) {
2549             opflags_t i, b;
2550
2551             /* Verify as Register */
2552             if (!is_register(ins->oprs[j].indexreg))
2553                 i = 0;
2554             else
2555                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2556
2557             /* Verify as Register */
2558             if (!is_register(ins->oprs[j].basereg))
2559                 b = 0;
2560             else
2561                 b = nasm_reg_flags[ins->oprs[j].basereg];
2562
2563             if (ins->oprs[j].scale == 0)
2564                 i = 0;
2565
2566             if (!i && !b) {
2567                 int ds = ins->oprs[j].disp_size;
2568                 if ((addrbits != 64 && ds > 8) ||
2569                     (addrbits == 64 && ds == 16))
2570                     valid &= ds;
2571             } else {
2572                 if (!(REG16 & ~b))
2573                     valid &= 16;
2574                 if (!(REG32 & ~b))
2575                     valid &= 32;
2576                 if (!(REG64 & ~b))
2577                     valid &= 64;
2578
2579                 if (!(REG16 & ~i))
2580                     valid &= 16;
2581                 if (!(REG32 & ~i))
2582                     valid &= 32;
2583                 if (!(REG64 & ~i))
2584                     valid &= 64;
2585             }
2586         }
2587     }
2588
2589     if (valid & addrbits) {
2590         ins->addr_size = addrbits;
2591     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2592         /* Add an address size prefix */
2593         ins->prefixes[PPS_ASIZE] = (addrbits == 32) ? P_A16 : P_A32;;
2594         ins->addr_size = (addrbits == 32) ? 16 : 32;
2595     } else {
2596         /* Impossible... */
2597         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2598         ins->addr_size = addrbits; /* Error recovery */
2599     }
2600
2601     defdisp = ins->addr_size == 16 ? 16 : 32;
2602
2603     for (j = 0; j < ins->operands; j++) {
2604         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2605             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp) != ins->addr_size) {
2606             /*
2607              * mem_offs sizes must match the address size; if not,
2608              * strip the MEM_OFFS bit and match only EA instructions
2609              */
2610             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2611         }
2612     }
2613 }