1 /* assemble.c code generation for the Netwide Assembler
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the licence given in the file "Licence"
6 * distributed in the NASM archive.
8 * the actual codes (C syntax, i.e. octal):
9 * \0 - terminates the code. (Unless it's a literal of course.)
10 * \1, \2, \3 - that many literal bytes follow in the code stream
11 * \4, \6 - the POP/PUSH (respectively) codes for CS, DS, ES, SS
12 * (POP is never used for CS) depending on operand 0
13 * \5, \7 - the second byte of POP/PUSH codes for FS, GS, depending
15 * \10, \11, \12 - a literal byte follows in the code stream, to be added
16 * to the register value of operand 0, 1 or 2
17 * \17 - encodes the literal byte 0. (Some compilers don't take
18 * kindly to a zero byte in the _middle_ of a compile time
19 * string constant, so I had to put this hack in.)
20 * \14, \15, \16 - a signed byte immediate operand, from operand 0, 1 or 2
21 * \20, \21, \22 - a byte immediate operand, from operand 0, 1 or 2
22 * \24, \25, \26 - an unsigned byte immediate operand, from operand 0, 1 or 2
23 * \30, \31, \32 - a word immediate operand, from operand 0, 1 or 2
24 * \34, \35, \36 - select between \3[012] and \4[012] depending on 16/32 bit
25 * assembly mode or the address-size override on the operand
26 * \37 - a word constant, from the _segment_ part of operand 0
27 * \40, \41, \42 - a long immediate operand, from operand 0, 1 or 2
28 * \50, \51, \52 - a byte relative operand, from operand 0, 1 or 2
29 * \60, \61, \62 - a word relative operand, from operand 0, 1 or 2
30 * \64, \65, \66 - select between \6[012] and \7[012] depending on 16/32 bit
31 * assembly mode or the address-size override on the operand
32 * \70, \71, \72 - a long relative operand, from operand 0, 1 or 2
33 * \1ab - a ModRM, calculated on EA in operand a, with the spare
34 * field the register value of operand b.
35 * \130,\131,\132 - an immediate word or signed byte for operand 0, 1, or 2
36 * \133,\134,\135 - or 2 (s-field) into next opcode byte if operand 0, 1, or 2
37 * is a signed byte rather than a word.
38 * \140,\141,\142 - an immediate dword or signed byte for operand 0, 1, or 2
39 * \143,\144,\145 - or 2 (s-field) into next opcode byte if operand 0, 1, or 2
40 * is a signed byte rather than a dword.
41 * \2ab - a ModRM, calculated on EA in operand a, with the spare
42 * field equal to digit b.
43 * \30x - might be an 0x67 byte, depending on the address size of
44 * the memory reference in operand x.
45 * \310 - indicates fixed 16-bit address size, i.e. optional 0x67.
46 * \311 - indicates fixed 32-bit address size, i.e. optional 0x67.
47 * \312 - (disassembler only) marker on LOOP, LOOPxx instructions.
48 * \320 - indicates fixed 16-bit operand size, i.e. optional 0x66.
49 * \321 - indicates fixed 32-bit operand size, i.e. optional 0x66.
50 * \322 - indicates that this instruction is only valid when the
51 * operand size is the default (instruction to disassembler,
52 * generates no code in the assembler)
53 * \330 - a literal byte follows in the code stream, to be added
54 * to the condition code value of the instruction.
55 * \331 - instruction not valid with REP prefix. Hint for
56 * disassembler only; for SSE instructions.
57 * \332 - disassemble a rep (0xF3 byte) prefix as repe not rep.
58 * \333 - REP prefix (0xF3 byte); for SSE instructions. Not encoded
59 * as a literal byte in order to aid the disassembler.
60 * \340 - reserve <operand 0> bytes of uninitialised storage.
61 * Operand 0 had better be a segmentless constant.
62 * \370,\371,\372 - match only if operand 0 meets byte jump criteria.
63 * 370 is used for Jcc, 371 is used for JMP.
64 * \373 - assemble 0x03 if bits==16, 0x05 if bits==32;
65 * used for conditional jump over longer jump
76 extern struct itemplate *nasm_instructions[];
79 int sib_present; /* is a SIB byte necessary? */
80 int bytes; /* # of bytes of offset needed */
81 int size; /* lazy - this is sib+bytes+1 */
82 unsigned char modrm, sib; /* the bytes themselves */
85 static unsigned long cpu; /* cpu level received from nasm.c */
87 static struct ofmt *outfmt;
90 static long calcsize (long, long, int, insn *, char *);
91 static void gencode (long, long, int, insn *, char *, long);
92 static int regval (operand *o);
93 static int matches (struct itemplate *, insn *);
94 static ea * process_ea (operand *, ea *, int, int, int);
95 static int chsize (operand *, int);
98 * This routine wrappers the real output format's output routine,
99 * in order to pass a copy of the data off to the listing file
100 * generator at the same time.
102 static void out (long offset, long segto, void *data, unsigned long type,
103 long segment, long wrt)
106 char *lnfname = NULL;
108 if ((type & OUT_TYPMASK) == OUT_ADDRESS) {
109 if (segment != NO_SEG || wrt != NO_SEG) {
111 * This address is relocated. We must write it as
112 * OUT_ADDRESS, so there's no work to be done here.
114 list->output (offset, data, type);
117 unsigned char p[4], *q = p;
119 * This is a non-relocated address, and we're going to
120 * convert it into RAWDATA format.
122 if ((type & OUT_SIZMASK) == 4) {
123 WRITELONG (q, * (long *) data);
124 list->output (offset, p, OUT_RAWDATA+4);
127 WRITESHORT (q, * (long *) data);
128 list->output (offset, p, OUT_RAWDATA+2);
132 else if ((type & OUT_TYPMASK) == OUT_RAWDATA) {
133 list->output (offset, data, type);
135 else if ((type & OUT_TYPMASK) == OUT_RESERVE) {
136 list->output (offset, NULL, type);
138 else if ((type & OUT_TYPMASK) == OUT_REL2ADR ||
139 (type & OUT_TYPMASK) == OUT_REL4ADR) {
140 list->output (offset, data, type);
143 if (src_get(&lineno,&lnfname))
145 outfmt->current_dfmt->linenum(lnfname,lineno,segto);
146 if (lnfname) nasm_free(lnfname);
149 outfmt->output (segto, data, type, segment, wrt);
152 static int jmp_match (long segment, long offset, int bits,
153 insn *ins, char *code)
155 unsigned char c = code[0];
158 if (c != 0370 && c != 0371) return 0;
159 if (ins->oprs[0].opflags & OPFLAG_FORWARD) {
160 if (optimizing<0 && c==0370) return 1;
161 else return (pass0==0); /* match a forward reference */
163 isize = calcsize (segment, offset, bits, ins, code);
164 if (ins->oprs[0].segment != segment) return 0;
165 isize = ins->oprs[0].offset - offset - isize; /* isize is now the delta */
166 if (isize >= -128L && isize <= 127L) return 1; /* it is byte size */
172 long assemble (long segment, long offset, int bits, unsigned long cp,
173 insn *instruction, struct ofmt *output, efunc error,
176 struct itemplate *temp;
182 long wsize = 0; /* size for DB etc. */
184 errfunc = error; /* to pass to other functions */
186 outfmt = output; /* likewise */
187 list = listgen; /* and again */
189 switch (instruction->opcode)
192 case I_DB: wsize = 1; break;
193 case I_DW: wsize = 2; break;
194 case I_DD: wsize = 4; break;
195 case I_DQ: wsize = 8; break;
196 case I_DT: wsize = 10; break;
201 long t = instruction->times;
203 errfunc(ERR_PANIC, "instruction->times < 0 (%ld) in assemble()",t);
205 while (t--) /* repeat TIMES times */
207 for (e = instruction->eops; e; e = e->next)
209 if (e->type == EOT_DB_NUMBER)
212 if (e->segment != NO_SEG)
213 errfunc (ERR_NONFATAL,
214 "one-byte relocation attempted");
216 unsigned char out_byte = e->offset;
217 out (offset, segment, &out_byte, OUT_RAWDATA+1,
221 else if (wsize > 5) {
222 errfunc (ERR_NONFATAL, "integer supplied to a D%c"
223 " instruction", wsize==8 ? 'Q' : 'T');
226 out (offset, segment, &e->offset,
227 OUT_ADDRESS+wsize, e->segment,
231 else if (e->type == EOT_DB_STRING)
235 out (offset, segment, e->stringval,
236 OUT_RAWDATA+e->stringlen, NO_SEG, NO_SEG);
237 align = e->stringlen % wsize;
240 align = wsize - align;
241 out (offset, segment, "\0\0\0\0\0\0\0\0",
242 OUT_RAWDATA+align, NO_SEG, NO_SEG);
244 offset += e->stringlen + align;
247 if (t > 0 && t == instruction->times-1)
250 * Dummy call to list->output to give the offset to the
253 list->output (offset, NULL, OUT_RAWDATA);
254 list->uplevel (LIST_TIMES);
257 if (instruction->times > 1)
258 list->downlevel (LIST_TIMES);
259 return offset - start;
262 if (instruction->opcode == I_INCBIN)
264 static char fname[FILENAME_MAX];
268 len = FILENAME_MAX-1;
269 if (len > instruction->eops->stringlen)
270 len = instruction->eops->stringlen;
271 strncpy (fname, instruction->eops->stringval, len);
274 if ( (fp = fopen(fname, "rb")) == NULL)
275 error (ERR_NONFATAL, "`incbin': unable to open file `%s'", fname);
276 else if (fseek(fp, 0L, SEEK_END) < 0)
277 error (ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
281 static char buf[2048];
282 long t = instruction->times;
286 if (instruction->eops->next) {
287 base = instruction->eops->next->offset;
289 if (instruction->eops->next->next &&
290 len > instruction->eops->next->next->offset)
291 len = instruction->eops->next->next->offset;
294 * Dummy call to list->output to give the offset to the
297 list->output (offset, NULL, OUT_RAWDATA);
298 list->uplevel(LIST_INCBIN);
303 fseek (fp, base, SEEK_SET);
306 long m = fread (buf, 1, (l>sizeof(buf)?sizeof(buf):l),
310 * This shouldn't happen unless the file
311 * actually changes while we are reading
314 error (ERR_NONFATAL, "`incbin': unexpected EOF while"
315 " reading file `%s'", fname);
316 t=0; /* Try to exit cleanly */
319 out (offset, segment, buf, OUT_RAWDATA+m,
324 list->downlevel(LIST_INCBIN);
325 if (instruction->times > 1) {
327 * Dummy call to list->output to give the offset to the
330 list->output (offset, NULL, OUT_RAWDATA);
331 list->uplevel(LIST_TIMES);
332 list->downlevel(LIST_TIMES);
335 return instruction->times * len;
337 return 0; /* if we're here, there's an error */
341 temp = nasm_instructions[instruction->opcode];
342 while (temp->opcode != -1) {
343 int m = matches (temp, instruction);
345 m += jmp_match(segment, offset, bits, instruction, temp->code);
347 if (m == 100) /* matches! */
349 char *codes = temp->code;
350 long insn_size = calcsize(segment, offset, bits,
352 itimes = instruction->times;
353 if (insn_size < 0) /* shouldn't be, on pass two */
354 error (ERR_PANIC, "errors made it through from pass one");
355 else while (itimes--) {
356 insn_end = offset + insn_size;
357 for (j=0; j<instruction->nprefix; j++) {
359 switch (instruction->prefixes[j]) {
362 case P_REPNE: case P_REPNZ:
364 case P_REPE: case P_REPZ: case P_REP:
366 case R_CS: c = 0x2E; break;
367 case R_DS: c = 0x3E; break;
368 case R_ES: c = 0x26; break;
369 case R_FS: c = 0x64; break;
370 case R_GS: c = 0x65; break;
371 case R_SS: c = 0x36; break;
390 "invalid instruction prefix");
393 out (offset, segment, &c, OUT_RAWDATA+1,
398 gencode (segment, offset, bits, instruction, codes, insn_end);
400 if (itimes > 0 && itimes == instruction->times-1) {
402 * Dummy call to list->output to give the offset to the
405 list->output (offset, NULL, OUT_RAWDATA);
406 list->uplevel (LIST_TIMES);
409 if (instruction->times > 1)
410 list->downlevel (LIST_TIMES);
411 return offset - start;
412 } else if (m > 0 && m > size_prob) {
418 if (temp->opcode == -1) { /* didn't match any instruction */
419 if (size_prob == 1) /* would have matched, but for size */
420 error (ERR_NONFATAL, "operation size not specified");
421 else if (size_prob == 2)
422 error (ERR_NONFATAL, "mismatch in operand sizes");
423 else if (size_prob == 3)
424 error (ERR_NONFATAL, "no instruction for this cpu level");
427 "invalid combination of opcode and operands");
432 long insn_size (long segment, long offset, int bits, unsigned long cp,
433 insn *instruction, efunc error)
435 struct itemplate *temp;
437 errfunc = error; /* to pass to other functions */
440 if (instruction->opcode == -1)
443 if (instruction->opcode == I_DB ||
444 instruction->opcode == I_DW ||
445 instruction->opcode == I_DD ||
446 instruction->opcode == I_DQ ||
447 instruction->opcode == I_DT)
450 long isize, osize, wsize = 0; /* placate gcc */
453 switch (instruction->opcode)
455 case I_DB: wsize = 1; break;
456 case I_DW: wsize = 2; break;
457 case I_DD: wsize = 4; break;
458 case I_DQ: wsize = 8; break;
459 case I_DT: wsize = 10; break;
462 for (e = instruction->eops; e; e = e->next)
467 if (e->type == EOT_DB_NUMBER)
469 else if (e->type == EOT_DB_STRING)
470 osize = e->stringlen;
472 align = (-osize) % wsize;
475 isize += osize + align;
477 return isize * instruction->times;
480 if (instruction->opcode == I_INCBIN)
482 char fname[FILENAME_MAX];
486 len = FILENAME_MAX-1;
487 if (len > instruction->eops->stringlen)
488 len = instruction->eops->stringlen;
489 strncpy (fname, instruction->eops->stringval, len);
491 if ( (fp = fopen(fname, "rb")) == NULL )
492 error (ERR_NONFATAL, "`incbin': unable to open file `%s'", fname);
493 else if (fseek(fp, 0L, SEEK_END) < 0)
494 error (ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
500 if (instruction->eops->next)
502 len -= instruction->eops->next->offset;
503 if (instruction->eops->next->next &&
504 len > instruction->eops->next->next->offset)
506 len = instruction->eops->next->next->offset;
509 return instruction->times * len;
511 return 0; /* if we're here, there's an error */
514 temp = nasm_instructions[instruction->opcode];
515 while (temp->opcode != -1) {
516 int m = matches(temp, instruction);
518 m += jmp_match(segment, offset, bits, instruction, temp->code);
521 /* we've matched an instruction. */
523 char * codes = temp->code;
526 isize = calcsize(segment, offset, bits, instruction, codes);
529 for (j = 0; j < instruction->nprefix; j++)
531 if ((instruction->prefixes[j] != P_A16 &&
532 instruction->prefixes[j] != P_O16 && bits==16) ||
533 (instruction->prefixes[j] != P_A32 &&
534 instruction->prefixes[j] != P_O32 && bits==32))
539 return isize * instruction->times;
543 return -1; /* didn't match any instruction */
547 /* check that opn[op] is a signed byte of size 16 or 32,
548 and return the signed value*/
549 static int is_sbyte (insn *ins, int op, int size)
554 ret = !(ins->forw_ref && ins->oprs[op].opflags ) && /* dead in the water on forward reference or External */
555 (optimizing>0 || !(ins->oprs[op].type & (BITS16|BITS32))) &&
556 ins->oprs[op].wrt==NO_SEG && ins->oprs[op].segment==NO_SEG;
558 v = ins->oprs[op].offset;
559 if (size==16) v = (signed short)v; /* sign extend if 16 bits */
561 return ret && v>=-128L && v<=127L;
564 static long calcsize (long segment, long offset, int bits,
565 insn *ins, char *codes)
570 (void) segment; /* Don't warn that this parameter is unused */
571 (void) offset; /* Don't warn that this parameter is unused */
573 while (*codes) switch (c = *codes++) {
574 case 01: case 02: case 03:
575 codes += c, length += c; break;
576 case 04: case 05: case 06: case 07:
578 case 010: case 011: case 012:
579 codes++, length++; break;
582 case 014: case 015: case 016:
584 case 020: case 021: case 022:
586 case 024: case 025: case 026:
588 case 030: case 031: case 032:
590 case 034: case 035: case 036:
591 length += ((ins->oprs[c-034].addr_size ?
592 ins->oprs[c-034].addr_size : bits) == 16 ? 2 : 4); break;
595 case 040: case 041: case 042:
597 case 050: case 051: case 052:
599 case 060: case 061: case 062:
601 case 064: case 065: case 066:
602 length += ((ins->oprs[c-064].addr_size ?
603 ins->oprs[c-064].addr_size : bits) == 16 ? 2 : 4); break;
604 case 070: case 071: case 072:
606 case 0130: case 0131: case 0132:
607 length += is_sbyte(ins, c-0130, 16) ? 1 : 2; break;
608 case 0133: case 0134: case 0135:
609 codes+=2; length++; break;
610 case 0140: case 0141: case 0142:
611 length += is_sbyte(ins, c-0140, 32) ? 1 : 4; break;
612 case 0143: case 0144: case 0145:
613 codes+=2; length++; break;
614 case 0300: case 0301: case 0302:
615 length += chsize (&ins->oprs[c-0300], bits);
618 length += (bits==32);
621 length += (bits==16);
626 length += (bits==32);
629 length += (bits==16);
634 codes++, length++; break;
640 case 0340: case 0341: case 0342:
641 if (ins->oprs[0].segment != NO_SEG)
642 errfunc (ERR_NONFATAL, "attempt to reserve non-constant"
643 " quantity of BSS space");
645 length += ins->oprs[0].offset << (c-0340);
647 case 0370: case 0371: case 0372:
651 default: /* can't do it by 'case' statements */
652 if (c>=0100 && c<=0277) { /* it's an EA */
654 if (!process_ea (&ins->oprs[(c>>3)&7], &ea_data, bits, 0,
656 errfunc (ERR_NONFATAL, "invalid effective address");
659 length += ea_data.size;
661 errfunc (ERR_PANIC, "internal instruction table corrupt"
662 ": instruction code 0x%02X given", c);
667 static void gencode (long segment, long offset, int bits,
668 insn *ins, char *codes, long insn_end)
670 static char condval[] = { /* conditional opcodes */
671 0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
672 0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
673 0x0, 0xA, 0xA, 0xB, 0x8, 0x4
676 unsigned char bytes[4];
680 switch (c = *codes++)
682 case 01: case 02: case 03:
683 out (offset, segment, codes, OUT_RAWDATA+c, NO_SEG, NO_SEG);
689 switch (ins->oprs[0].basereg)
692 bytes[0] = 0x0E + (c == 0x04 ? 1 : 0); break;
694 bytes[0] = 0x1E + (c == 0x04 ? 1 : 0); break;
696 bytes[0] = 0x06 + (c == 0x04 ? 1 : 0); break;
698 bytes[0] = 0x16 + (c == 0x04 ? 1 : 0); break;
700 errfunc (ERR_PANIC, "bizarre 8086 segment register received");
702 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
707 switch (ins->oprs[0].basereg) {
708 case R_FS: bytes[0] = 0xA0 + (c == 0x05 ? 1 : 0); break;
709 case R_GS: bytes[0] = 0xA8 + (c == 0x05 ? 1 : 0); break;
711 errfunc (ERR_PANIC, "bizarre 386 segment register received");
713 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
717 case 010: case 011: case 012:
718 bytes[0] = *codes++ + regval(&ins->oprs[c-010]);
719 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
725 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
729 case 014: case 015: case 016:
730 if (ins->oprs[c-014].offset < -128
731 || ins->oprs[c-014].offset > 127)
733 errfunc (ERR_WARNING, "signed byte value exceeds bounds");
736 if (ins->oprs[c-014].segment != NO_SEG)
738 data = ins->oprs[c-014].offset;
739 out (offset, segment, &data, OUT_ADDRESS+1,
740 ins->oprs[c-014].segment, ins->oprs[c-014].wrt);
743 bytes[0] = ins->oprs[c-014].offset;
744 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
749 case 020: case 021: case 022:
750 if (ins->oprs[c-020].offset < -256
751 || ins->oprs[c-020].offset > 255)
753 errfunc (ERR_WARNING, "byte value exceeds bounds");
755 if (ins->oprs[c-020].segment != NO_SEG) {
756 data = ins->oprs[c-020].offset;
757 out (offset, segment, &data, OUT_ADDRESS+1,
758 ins->oprs[c-020].segment, ins->oprs[c-020].wrt);
761 bytes[0] = ins->oprs[c-020].offset;
762 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
767 case 024: case 025: case 026:
768 if (ins->oprs[c-024].offset < 0 || ins->oprs[c-024].offset > 255)
769 errfunc (ERR_WARNING, "unsigned byte value exceeds bounds");
770 if (ins->oprs[c-024].segment != NO_SEG) {
771 data = ins->oprs[c-024].offset;
772 out (offset, segment, &data, OUT_ADDRESS+1,
773 ins->oprs[c-024].segment, ins->oprs[c-024].wrt);
776 bytes[0] = ins->oprs[c-024].offset;
777 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
782 case 030: case 031: case 032:
783 if (ins->oprs[c-030].segment == NO_SEG &&
784 ins->oprs[c-030].wrt == NO_SEG &&
785 (ins->oprs[c-030].offset < -65536L ||
786 ins->oprs[c-030].offset > 65535L))
788 errfunc (ERR_WARNING, "word value exceeds bounds");
790 data = ins->oprs[c-030].offset;
791 out (offset, segment, &data, OUT_ADDRESS+2,
792 ins->oprs[c-030].segment, ins->oprs[c-030].wrt);
796 case 034: case 035: case 036:
797 data = ins->oprs[c-034].offset;
798 size = ((ins->oprs[c-034].addr_size ?
799 ins->oprs[c-034].addr_size : bits) == 16 ? 2 : 4);
800 if (size==2 && (data < -65536L || data > 65535L))
801 errfunc (ERR_WARNING, "word value exceeds bounds");
802 out (offset, segment, &data, OUT_ADDRESS+size,
803 ins->oprs[c-034].segment, ins->oprs[c-034].wrt);
808 if (ins->oprs[0].segment == NO_SEG)
809 errfunc (ERR_NONFATAL, "value referenced by FAR is not"
812 out (offset, segment, &data, OUT_ADDRESS+2,
813 outfmt->segbase(1+ins->oprs[0].segment),
818 case 040: case 041: case 042:
819 data = ins->oprs[c-040].offset;
820 out (offset, segment, &data, OUT_ADDRESS+4,
821 ins->oprs[c-040].segment, ins->oprs[c-040].wrt);
825 case 050: case 051: case 052:
826 if (ins->oprs[c-050].segment != segment)
827 errfunc (ERR_NONFATAL, "short relative jump outside segment");
828 data = ins->oprs[c-050].offset - insn_end;
829 if (data > 127 || data < -128)
830 errfunc (ERR_NONFATAL, "short jump is out of range");
832 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
836 case 060: case 061: case 062:
837 if (ins->oprs[c-060].segment != segment) {
838 data = ins->oprs[c-060].offset;
839 out (offset, segment, &data, OUT_REL2ADR+insn_end-offset,
840 ins->oprs[c-060].segment, ins->oprs[c-060].wrt);
842 data = ins->oprs[c-060].offset - insn_end;
843 out (offset, segment, &data,
844 OUT_ADDRESS+2, NO_SEG, NO_SEG);
849 case 064: case 065: case 066:
850 size = ((ins->oprs[c-064].addr_size ?
851 ins->oprs[c-064].addr_size : bits) == 16 ? 2 : 4);
852 if (ins->oprs[c-064].segment != segment) {
853 data = ins->oprs[c-064].offset;
854 size = (bits == 16 ? OUT_REL2ADR : OUT_REL4ADR);
855 out (offset, segment, &data, size+insn_end-offset,
856 ins->oprs[c-064].segment, ins->oprs[c-064].wrt);
857 size = (bits == 16 ? 2 : 4);
859 data = ins->oprs[c-064].offset - insn_end;
860 out (offset, segment, &data,
861 OUT_ADDRESS+size, NO_SEG, NO_SEG);
866 case 070: case 071: case 072:
867 if (ins->oprs[c-070].segment != segment) {
868 data = ins->oprs[c-070].offset;
869 out (offset, segment, &data, OUT_REL4ADR+insn_end-offset,
870 ins->oprs[c-070].segment, ins->oprs[c-070].wrt);
872 data = ins->oprs[c-070].offset - insn_end;
873 out (offset, segment, &data,
874 OUT_ADDRESS+4, NO_SEG, NO_SEG);
879 case 0130: case 0131: case 0132:
880 data = ins->oprs[c-0130].offset;
881 if (is_sbyte(ins, c-0130, 16)) {
882 out (offset, segment, &data, OUT_RAWDATA+1, NO_SEG, NO_SEG);
885 if (ins->oprs[c-0130].segment == NO_SEG &&
886 ins->oprs[c-0130].wrt == NO_SEG &&
887 (data < -65536L || data > 65535L)) {
888 errfunc (ERR_WARNING, "word value exceeds bounds");
890 out (offset, segment, &data, OUT_ADDRESS+2,
891 ins->oprs[c-0130].segment, ins->oprs[c-0130].wrt);
896 case 0133: case 0134: case 0135:
899 if (is_sbyte(ins, c-0133, 16)) bytes[0] |= 2; /* s-bit */
900 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
904 case 0140: case 0141: case 0142:
905 data = ins->oprs[c-0140].offset;
906 if (is_sbyte(ins, c-0140, 32)) {
907 out (offset, segment, &data, OUT_RAWDATA+1, NO_SEG, NO_SEG);
910 out (offset, segment, &data, OUT_ADDRESS+4,
911 ins->oprs[c-0140].segment, ins->oprs[c-0140].wrt);
916 case 0143: case 0144: case 0145:
919 if (is_sbyte(ins, c-0143, 32)) bytes[0] |= 2; /* s-bit */
920 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
924 case 0300: case 0301: case 0302:
925 if (chsize (&ins->oprs[c-0300], bits)) {
927 out (offset, segment, bytes,
928 OUT_RAWDATA+1, NO_SEG, NO_SEG);
937 out (offset, segment, bytes,
938 OUT_RAWDATA+1, NO_SEG, NO_SEG);
947 out (offset, segment, bytes,
948 OUT_RAWDATA+1, NO_SEG, NO_SEG);
960 out (offset, segment, bytes,
961 OUT_RAWDATA+1, NO_SEG, NO_SEG);
970 out (offset, segment, bytes,
971 OUT_RAWDATA+1, NO_SEG, NO_SEG);
981 *bytes = *codes++ ^ condval[ins->condition];
982 out (offset, segment, bytes,
983 OUT_RAWDATA+1, NO_SEG, NO_SEG);
993 out (offset, segment, bytes,
994 OUT_RAWDATA+1, NO_SEG, NO_SEG);
998 case 0340: case 0341: case 0342:
999 if (ins->oprs[0].segment != NO_SEG)
1000 errfunc (ERR_PANIC, "non-constant BSS size in pass two");
1002 long size = ins->oprs[0].offset << (c-0340);
1004 out (offset, segment, NULL,
1005 OUT_RESERVE+size, NO_SEG, NO_SEG);
1010 case 0370: case 0371: case 0372:
1014 *bytes = bits==16 ? 3 : 5;
1015 out (offset, segment, bytes,
1016 OUT_RAWDATA+1, NO_SEG, NO_SEG);
1020 default: /* can't do it by 'case' statements */
1021 if (c>=0100 && c<=0277) { /* it's an EA */
1027 if (c<=0177) /* pick rfield from operand b */
1028 rfield = regval (&ins->oprs[c&7]);
1029 else /* rfield is constant */
1032 if (!process_ea (&ins->oprs[(c>>3)&7], &ea_data, bits, rfield,
1035 errfunc (ERR_NONFATAL, "invalid effective address");
1039 *p++ = ea_data.modrm;
1040 if (ea_data.sib_present)
1044 out (offset, segment, bytes, OUT_RAWDATA + s,
1047 switch (ea_data.bytes) {
1051 if (ins->oprs[(c>>3)&7].segment != NO_SEG) {
1052 data = ins->oprs[(c>>3)&7].offset;
1053 out (offset, segment, &data, OUT_ADDRESS+1,
1054 ins->oprs[(c>>3)&7].segment,
1055 ins->oprs[(c>>3)&7].wrt);
1057 *bytes = ins->oprs[(c>>3)&7].offset;
1058 out (offset, segment, bytes, OUT_RAWDATA+1,
1065 data = ins->oprs[(c>>3)&7].offset;
1066 out (offset, segment, &data,
1067 OUT_ADDRESS+ea_data.bytes,
1068 ins->oprs[(c>>3)&7].segment, ins->oprs[(c>>3)&7].wrt);
1074 errfunc (ERR_PANIC, "internal instruction table corrupt"
1075 ": instruction code 0x%02X given", c);
1079 static int regval (operand *o)
1081 switch (o->basereg) {
1082 case R_EAX: case R_AX: case R_AL: case R_ES: case R_CR0: case R_DR0:
1083 case R_ST0: case R_MM0: case R_XMM0:
1085 case R_ECX: case R_CX: case R_CL: case R_CS: case R_DR1: case R_ST1:
1086 case R_MM1: case R_XMM1:
1088 case R_EDX: case R_DX: case R_DL: case R_SS: case R_CR2: case R_DR2:
1089 case R_ST2: case R_MM2: case R_XMM2:
1091 case R_EBX: case R_BX: case R_BL: case R_DS: case R_CR3: case R_DR3:
1092 case R_TR3: case R_ST3: case R_MM3: case R_XMM3:
1094 case R_ESP: case R_SP: case R_AH: case R_FS: case R_CR4: case R_TR4:
1095 case R_ST4: case R_MM4: case R_XMM4:
1097 case R_EBP: case R_BP: case R_CH: case R_GS: case R_TR5: case R_ST5:
1098 case R_MM5: case R_XMM5:
1100 case R_ESI: case R_SI: case R_DH: case R_DR6: case R_TR6: case R_ST6:
1101 case R_MM6: case R_XMM6:
1103 case R_EDI: case R_DI: case R_BH: case R_DR7: case R_TR7: case R_ST7:
1104 case R_MM7: case R_XMM7:
1106 default: /* panic */
1107 errfunc (ERR_PANIC, "invalid register operand given to regval()");
1112 static int matches (struct itemplate *itemp, insn *instruction)
1114 int i, size[3], asize, oprs, ret;
1121 if (itemp->opcode != instruction->opcode) return 0;
1124 * Count the operands
1126 if (itemp->operands != instruction->operands) return 0;
1129 * Check that no spurious colons or TOs are present
1131 for (i=0; i<itemp->operands; i++)
1132 if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON|TO))
1136 * Check that the operand flags all match up
1138 for (i=0; i<itemp->operands; i++)
1139 if (itemp->opd[i] & ~instruction->oprs[i].type ||
1140 ((itemp->opd[i] & SIZE_MASK) &&
1141 ((itemp->opd[i] ^ instruction->oprs[i].type) & SIZE_MASK)))
1143 if ((itemp->opd[i] & ~instruction->oprs[i].type & NON_SIZE) ||
1144 (instruction->oprs[i].type & SIZE_MASK))
1152 * Check operand sizes
1154 if (itemp->flags & IF_ARMASK) {
1155 size[0] = size[1] = size[2] = 0;
1157 switch (itemp->flags & IF_ARMASK) {
1158 case IF_AR0: i = 0; break;
1159 case IF_AR1: i = 1; break;
1160 case IF_AR2: i = 2; break;
1161 default: break; /* Shouldn't happen */
1163 if (itemp->flags & IF_SB) {
1165 } else if (itemp->flags & IF_SW) {
1167 } else if (itemp->flags & IF_SD) {
1172 if (itemp->flags & IF_SB) {
1174 oprs = itemp->operands;
1175 } else if (itemp->flags & IF_SW) {
1177 oprs = itemp->operands;
1178 } else if (itemp->flags & IF_SD) {
1180 oprs = itemp->operands;
1182 size[0] = size[1] = size[2] = asize;
1185 if (itemp->flags & (IF_SM | IF_SM2)) {
1186 oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
1188 for (i=0; i<oprs; i++) {
1189 if ( (asize = itemp->opd[i] & SIZE_MASK) != 0) {
1191 for (j=0; j<oprs; j++)
1197 oprs = itemp->operands;
1200 for (i=0; i<itemp->operands; i++)
1201 if (!(itemp->opd[i] & SIZE_MASK) &&
1202 (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
1207 * Check template is okay at the set cpu level
1209 if ((itemp->flags & IF_PLEVEL) > cpu) return 3;
1212 * Check if special handling needed for Jumps
1214 if ((unsigned char)(itemp->code[0]) >= 0370) return 99;
1219 static ea *process_ea (operand *input, ea *output, int addrbits, int rfield,
1222 if (!(REGISTER & ~input->type)) { /* it's a single register */
1223 static int regs[] = {
1224 R_AL, R_CL, R_DL, R_BL, R_AH, R_CH, R_DH, R_BH,
1225 R_AX, R_CX, R_DX, R_BX, R_SP, R_BP, R_SI, R_DI,
1226 R_EAX, R_ECX, R_EDX, R_EBX, R_ESP, R_EBP, R_ESI, R_EDI,
1227 R_MM0, R_MM1, R_MM2, R_MM3, R_MM4, R_MM5, R_MM6, R_MM7,
1228 R_XMM0, R_XMM1, R_XMM2, R_XMM3, R_XMM4, R_XMM5, R_XMM6, R_XMM7
1232 for (i=0; i<elements(regs); i++)
1233 if (input->basereg == regs[i]) break;
1234 if (i<elements(regs)) {
1235 output->sib_present = FALSE;/* no SIB necessary */
1236 output->bytes = 0; /* no offset necessary either */
1237 output->modrm = 0xC0 | (rfield << 3) | (i & 7);
1241 } else { /* it's a memory reference */
1242 if (input->basereg==-1 && (input->indexreg==-1 || input->scale==0)) {
1243 /* it's a pure offset */
1244 if (input->addr_size)
1245 addrbits = input->addr_size;
1246 output->sib_present = FALSE;
1247 output->bytes = (addrbits==32 ? 4 : 2);
1248 output->modrm = (addrbits==32 ? 5 : 6) | (rfield << 3);
1250 else { /* it's an indirection */
1251 int i=input->indexreg, b=input->basereg, s=input->scale;
1252 long o=input->offset, seg=input->segment;
1253 int hb=input->hintbase, ht=input->hinttype;
1256 if (s==0) i = -1; /* make this easy, at least */
1258 if (i==R_EAX || i==R_EBX || i==R_ECX || i==R_EDX
1259 || i==R_EBP || i==R_ESP || i==R_ESI || i==R_EDI
1260 || b==R_EAX || b==R_EBX || b==R_ECX || b==R_EDX
1261 || b==R_EBP || b==R_ESP || b==R_ESI || b==R_EDI) {
1262 /* it must be a 32-bit memory reference. Firstly we have
1263 * to check that all registers involved are type Exx. */
1264 if (i!=-1 && i!=R_EAX && i!=R_EBX && i!=R_ECX && i!=R_EDX
1265 && i!=R_EBP && i!=R_ESP && i!=R_ESI && i!=R_EDI)
1267 if (b!=-1 && b!=R_EAX && b!=R_EBX && b!=R_ECX && b!=R_EDX
1268 && b!=R_EBP && b!=R_ESP && b!=R_ESI && b!=R_EDI)
1271 /* While we're here, ensure the user didn't specify WORD. */
1272 if (input->addr_size == 16)
1275 /* now reorganise base/index */
1276 if (s == 1 && b != i && b != -1 && i != -1 &&
1277 ((hb==b&&ht==EAH_NOTBASE) || (hb==i&&ht==EAH_MAKEBASE)))
1278 t = b, b = i, i = t; /* swap if hints say so */
1279 if (b==i) /* convert EAX+2*EAX to 3*EAX */
1281 if (b==-1 && s==1 && !(hb == i && ht == EAH_NOTBASE))
1282 b = i, i = -1; /* make single reg base, unless hint */
1283 if (((s==2 && i!=R_ESP && !(input->eaflags & EAF_TIMESTWO)) ||
1284 s==3 || s==5 || s==9) && b==-1)
1285 b = i, s--; /* convert 3*EAX to EAX+2*EAX */
1286 if (s==1 && i==R_ESP) /* swap ESP into base if scale is 1 */
1288 if (i==R_ESP || (s!=1 && s!=2 && s!=4 && s!=8 && i!=-1))
1289 return NULL; /* wrong, for various reasons */
1291 if (i==-1 && b!=R_ESP) {/* no SIB needed */
1294 case R_EAX: rm = 0; break;
1295 case R_ECX: rm = 1; break;
1296 case R_EDX: rm = 2; break;
1297 case R_EBX: rm = 3; break;
1298 case R_EBP: rm = 5; break;
1299 case R_ESI: rm = 6; break;
1300 case R_EDI: rm = 7; break;
1301 case -1: rm = 5; break;
1302 default: /* should never happen */
1305 if (b==-1 || (b!=R_EBP && o==0 &&
1306 seg==NO_SEG && !forw_ref &&
1308 (EAF_BYTEOFFS|EAF_WORDOFFS))))
1310 else if (input->eaflags & EAF_BYTEOFFS ||
1311 (o>=-128 && o<=127 && seg==NO_SEG && !forw_ref &&
1312 !(input->eaflags & EAF_WORDOFFS))) {
1318 output->sib_present = FALSE;
1319 output->bytes = (b==-1 || mod==2 ? 4 : mod);
1320 output->modrm = (mod<<6) | (rfield<<3) | rm;
1322 else { /* we need a SIB */
1323 int mod, scale, index, base;
1326 case R_EAX: base = 0; break;
1327 case R_ECX: base = 1; break;
1328 case R_EDX: base = 2; break;
1329 case R_EBX: base = 3; break;
1330 case R_ESP: base = 4; break;
1331 case R_EBP: case -1: base = 5; break;
1332 case R_ESI: base = 6; break;
1333 case R_EDI: base = 7; break;
1334 default: /* then what the smeg is it? */
1335 return NULL; /* panic */
1339 case R_EAX: index = 0; break;
1340 case R_ECX: index = 1; break;
1341 case R_EDX: index = 2; break;
1342 case R_EBX: index = 3; break;
1343 case -1: index = 4; break;
1344 case R_EBP: index = 5; break;
1345 case R_ESI: index = 6; break;
1346 case R_EDI: index = 7; break;
1347 default: /* then what the smeg is it? */
1348 return NULL; /* panic */
1353 case 1: scale = 0; break;
1354 case 2: scale = 1; break;
1355 case 4: scale = 2; break;
1356 case 8: scale = 3; break;
1357 default: /* then what the smeg is it? */
1358 return NULL; /* panic */
1361 if (b==-1 || (b!=R_EBP && o==0 &&
1362 seg==NO_SEG && !forw_ref &&
1364 (EAF_BYTEOFFS|EAF_WORDOFFS))))
1366 else if (input->eaflags & EAF_BYTEOFFS ||
1367 (o>=-128 && o<=127 && seg==NO_SEG && !forw_ref &&
1368 !(input->eaflags & EAF_WORDOFFS)))
1373 output->sib_present = TRUE;
1374 output->bytes = (b==-1 || mod==2 ? 4 : mod);
1375 output->modrm = (mod<<6) | (rfield<<3) | 4;
1376 output->sib = (scale<<6) | (index<<3) | base;
1379 else { /* it's 16-bit */
1382 /* check all registers are BX, BP, SI or DI */
1383 if ((b!=-1 && b!=R_BP && b!=R_BX && b!=R_SI && b!=R_DI) ||
1384 (i!=-1 && i!=R_BP && i!=R_BX && i!=R_SI && i!=R_DI))
1387 /* ensure the user didn't specify DWORD */
1388 if (input->addr_size == 32)
1391 if (s!=1 && i!=-1) return NULL;/* no can do, in 16-bit EA */
1392 if (b==-1 && i!=-1) b ^= i ^= b ^= i; /* swap them round */
1393 if ((b==R_SI || b==R_DI) && i!=-1)
1394 b ^= i ^= b ^= i; /* have BX/BP as base, SI/DI index */
1395 if (b==i) return NULL;/* shouldn't ever happen, in theory */
1396 if (i!=-1 && b!=-1 &&
1397 (i==R_BP || i==R_BX || b==R_SI || b==R_DI))
1398 return NULL; /* invalid combinations */
1399 if (b==-1) /* pure offset: handled above */
1400 return NULL; /* so if it gets to here, panic! */
1404 switch (i*256 + b) {
1405 case R_SI*256+R_BX: rm=0; break;
1406 case R_DI*256+R_BX: rm=1; break;
1407 case R_SI*256+R_BP: rm=2; break;
1408 case R_DI*256+R_BP: rm=3; break;
1412 case R_SI: rm=4; break;
1413 case R_DI: rm=5; break;
1414 case R_BP: rm=6; break;
1415 case R_BX: rm=7; break;
1417 if (rm==-1) /* can't happen, in theory */
1418 return NULL; /* so panic if it does */
1420 if (o==0 && seg==NO_SEG && !forw_ref && rm!=6 &&
1421 !(input->eaflags & (EAF_BYTEOFFS|EAF_WORDOFFS)))
1423 else if (input->eaflags & EAF_BYTEOFFS ||
1424 (o>=-128 && o<=127 && seg==NO_SEG && !forw_ref &&
1425 !(input->eaflags & EAF_WORDOFFS)))
1430 output->sib_present = FALSE; /* no SIB - it's 16-bit */
1431 output->bytes = mod; /* bytes of offset needed */
1432 output->modrm = (mod<<6) | (rfield<<3) | rm;
1436 output->size = 1 + output->sib_present + output->bytes;
1440 static int chsize (operand *input, int addrbits)
1442 if (!(MEMORY & ~input->type)) {
1443 int i=input->indexreg, b=input->basereg;
1445 if (input->scale==0) i = -1;
1447 if (i == -1 && b == -1) /* pure offset */
1448 return (input->addr_size != 0 && input->addr_size != addrbits);
1450 if (i==R_EAX || i==R_EBX || i==R_ECX || i==R_EDX
1451 || i==R_EBP || i==R_ESP || i==R_ESI || i==R_EDI
1452 || b==R_EAX || b==R_EBX || b==R_ECX || b==R_EDX
1453 || b==R_EBP || b==R_ESP || b==R_ESI || b==R_EDI)
1454 return (addrbits==16);
1456 return (addrbits==32);