1 /* assemble.c code generation for the Netwide Assembler
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the licence given in the file "Licence"
6 * distributed in the NASM archive.
8 * the actual codes (C syntax, i.e. octal):
9 * \0 - terminates the code. (Unless it's a literal of course.)
10 * \1, \2, \3 - that many literal bytes follow in the code stream
11 * \4, \6 - the POP/PUSH (respectively) codes for CS, DS, ES, SS
12 * (POP is never used for CS) depending on operand 0
13 * \5, \7 - the second byte of POP/PUSH codes for FS, GS, depending
15 * \10, \11, \12 - a literal byte follows in the code stream, to be added
16 * to the register value of operand 0, 1 or 2
17 * \17 - encodes the literal byte 0. (Some compilers don't take
18 * kindly to a zero byte in the _middle_ of a compile time
19 * string constant, so I had to put this hack in.)
20 * \14, \15, \16 - a signed byte immediate operand, from operand 0, 1 or 2
21 * \20, \21, \22 - a byte immediate operand, from operand 0, 1 or 2
22 * \24, \25, \26 - an unsigned byte immediate operand, from operand 0, 1 or 2
23 * \30, \31, \32 - a word immediate operand, from operand 0, 1 or 2
24 * \34, \35, \36 - select between \3[012] and \4[012] depending on 16/32 bit
25 * assembly mode or the address-size override on the operand
26 * \37 - a word constant, from the _segment_ part of operand 0
27 * \40, \41, \42 - a long immediate operand, from operand 0, 1 or 2
28 * \50, \51, \52 - a byte relative operand, from operand 0, 1 or 2
29 * \60, \61, \62 - a word relative operand, from operand 0, 1 or 2
30 * \64, \65, \66 - select between \6[012] and \7[012] depending on 16/32 bit
31 * assembly mode or the address-size override on the operand
32 * \70, \71, \72 - a long relative operand, from operand 0, 1 or 2
33 * \1ab - a ModRM, calculated on EA in operand a, with the spare
34 * field the register value of operand b.
35 * \2ab - a ModRM, calculated on EA in operand a, with the spare
36 * field equal to digit b.
37 * \30x - might be an 0x67 byte, depending on the address size of
38 * the memory reference in operand x.
39 * \310 - indicates fixed 16-bit address size, i.e. optional 0x67.
40 * \311 - indicates fixed 32-bit address size, i.e. optional 0x67.
41 * \320 - indicates fixed 16-bit operand size, i.e. optional 0x66.
42 * \321 - indicates fixed 32-bit operand size, i.e. optional 0x66.
43 * \322 - indicates that this instruction is only valid when the
44 * operand size is the default (instruction to disassembler,
45 * generates no code in the assembler)
46 * \330 - a literal byte follows in the code stream, to be added
47 * to the condition code value of the instruction.
48 * \331 - instruction not valid with REP prefix. Hint for
49 * disassembler only; for SSE instructions.
50 * \332 - disassemble a rep (0xF3 byte) prefix as repe not rep.
51 * \333 - REP prefix (0xF3 byte); for SSE instructions. Not encoded
52 * as a literal byte in order to aid the disassembler.
53 * \340 - reserve <operand 0> bytes of uninitialised storage.
54 * Operand 0 had better be a segmentless constant.
65 extern struct itemplate *nasm_instructions[];
68 int sib_present; /* is a SIB byte necessary? */
69 int bytes; /* # of bytes of offset needed */
70 int size; /* lazy - this is sib+bytes+1 */
71 unsigned char modrm, sib; /* the bytes themselves */
75 static struct ofmt *outfmt;
78 static long calcsize (long, long, int, insn *, char *);
79 static void gencode (long, long, int, insn *, char *, long);
80 static int regval (operand *o);
81 static int matches (struct itemplate *, insn *);
82 static ea * process_ea (operand *, ea *, int, int, int);
83 static int chsize (operand *, int);
86 * This routine wrappers the real output format's output routine,
87 * in order to pass a copy of the data off to the listing file
88 * generator at the same time.
90 static void out (long offset, long segto, void *data, unsigned long type,
91 long segment, long wrt)
96 if ((type & OUT_TYPMASK) == OUT_ADDRESS) {
97 if (segment != NO_SEG || wrt != NO_SEG) {
99 * This address is relocated. We must write it as
100 * OUT_ADDRESS, so there's no work to be done here.
102 list->output (offset, data, type);
105 unsigned char p[4], *q = p;
107 * This is a non-relocated address, and we're going to
108 * convert it into RAWDATA format.
110 if ((type & OUT_SIZMASK) == 4) {
111 WRITELONG (q, * (long *) data);
112 list->output (offset, p, OUT_RAWDATA+4);
115 WRITESHORT (q, * (long *) data);
116 list->output (offset, p, OUT_RAWDATA+2);
120 else if ((type & OUT_TYPMASK) == OUT_RAWDATA) {
121 list->output (offset, data, type);
123 else if ((type & OUT_TYPMASK) == OUT_RESERVE) {
124 list->output (offset, NULL, type);
126 else if ((type & OUT_TYPMASK) == OUT_REL2ADR ||
127 (type & OUT_TYPMASK) == OUT_REL4ADR) {
128 list->output (offset, data, type);
131 if (src_get(&lineno,&lnfname))
132 outfmt->current_dfmt->linenum(lnfname,lineno,segto);
134 outfmt->output (segto, data, type, segment, wrt);
137 long assemble (long segment, long offset, int bits,
138 insn *instruction, struct ofmt *output, efunc error,
141 struct itemplate *temp;
147 long wsize = 0; /* size for DB etc. */
149 errfunc = error; /* to pass to other functions */
150 outfmt = output; /* likewise */
151 list = listgen; /* and again */
153 switch (instruction->opcode)
156 case I_DB: wsize = 1; break;
157 case I_DW: wsize = 2; break;
158 case I_DD: wsize = 4; break;
159 case I_DQ: wsize = 8; break;
160 case I_DT: wsize = 10; break;
165 long t = instruction->times;
167 errfunc(ERR_PANIC, "instruction->times < 0 (%ld) in assemble()",t);
169 while (t--) /* repeat TIMES times */
171 for (e = instruction->eops; e; e = e->next)
173 if (e->type == EOT_DB_NUMBER)
176 if (e->segment != NO_SEG)
177 errfunc (ERR_NONFATAL,
178 "one-byte relocation attempted");
180 unsigned char out_byte = e->offset;
181 out (offset, segment, &out_byte, OUT_RAWDATA+1,
185 else if (wsize > 5) {
186 errfunc (ERR_NONFATAL, "integer supplied to a D%c"
187 " instruction", wsize==8 ? 'Q' : 'T');
190 out (offset, segment, &e->offset,
191 OUT_ADDRESS+wsize, e->segment,
195 else if (e->type == EOT_DB_STRING)
199 out (offset, segment, e->stringval,
200 OUT_RAWDATA+e->stringlen, NO_SEG, NO_SEG);
201 align = e->stringlen % wsize;
204 align = wsize - align;
205 out (offset, segment, "\0\0\0\0\0\0\0\0",
206 OUT_RAWDATA+align, NO_SEG, NO_SEG);
208 offset += e->stringlen + align;
211 if (t > 0 && t == instruction->times-1)
214 * Dummy call to list->output to give the offset to the
217 list->output (offset, NULL, OUT_RAWDATA);
218 list->uplevel (LIST_TIMES);
221 if (instruction->times > 1)
222 list->downlevel (LIST_TIMES);
223 return offset - start;
226 if (instruction->opcode == I_INCBIN)
228 static char fname[FILENAME_MAX];
232 len = FILENAME_MAX-1;
233 if (len > instruction->eops->stringlen)
234 len = instruction->eops->stringlen;
235 strncpy (fname, instruction->eops->stringval, len);
238 if ( (fp = fopen(fname, "rb")) == NULL)
239 error (ERR_NONFATAL, "`incbin': unable to open file `%s'", fname);
240 else if (fseek(fp, 0L, SEEK_END) < 0)
241 error (ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
245 static char buf[2048];
246 long t = instruction->times;
250 if (instruction->eops->next) {
251 base = instruction->eops->next->offset;
253 if (instruction->eops->next->next &&
254 len > instruction->eops->next->next->offset)
255 len = instruction->eops->next->next->offset;
258 * Dummy call to list->output to give the offset to the
261 list->output (offset, NULL, OUT_RAWDATA);
262 list->uplevel(LIST_INCBIN);
267 fseek (fp, base, SEEK_SET);
270 long m = fread (buf, 1, (l>sizeof(buf)?sizeof(buf):l),
274 * This shouldn't happen unless the file
275 * actually changes while we are reading
278 error (ERR_NONFATAL, "`incbin': unexpected EOF while"
279 " reading file `%s'", fname);
280 t=0; /* Try to exit cleanly */
283 out (offset, segment, buf, OUT_RAWDATA+m,
288 list->downlevel(LIST_INCBIN);
289 if (instruction->times > 1) {
291 * Dummy call to list->output to give the offset to the
294 list->output (offset, NULL, OUT_RAWDATA);
295 list->uplevel(LIST_TIMES);
296 list->downlevel(LIST_TIMES);
299 return instruction->times * len;
301 return 0; /* if we're here, there's an error */
305 temp = nasm_instructions[instruction->opcode];
306 while (temp->opcode != -1) {
307 int m = matches (temp, instruction);
309 if (m == 100) /* matches! */
311 char *codes = temp->code;
312 long insn_size = calcsize(segment, offset, bits,
314 itimes = instruction->times;
315 if (insn_size < 0) /* shouldn't be, on pass two */
316 error (ERR_PANIC, "errors made it through from pass one");
317 else while (itimes--) {
318 insn_end = offset + insn_size;
319 for (j=0; j<instruction->nprefix; j++) {
321 switch (instruction->prefixes[j]) {
324 case P_REPNE: case P_REPNZ:
326 case P_REPE: case P_REPZ: case P_REP:
328 case R_CS: c = 0x2E; break;
329 case R_DS: c = 0x3E; break;
330 case R_ES: c = 0x26; break;
331 case R_FS: c = 0x64; break;
332 case R_GS: c = 0x65; break;
333 case R_SS: c = 0x36; break;
352 "invalid instruction prefix");
355 out (offset, segment, &c, OUT_RAWDATA+1,
360 gencode (segment, offset, bits, instruction, codes, insn_end);
362 if (itimes > 0 && itimes == instruction->times-1) {
364 * Dummy call to list->output to give the offset to the
367 list->output (offset, NULL, OUT_RAWDATA);
368 list->uplevel (LIST_TIMES);
371 if (instruction->times > 1)
372 list->downlevel (LIST_TIMES);
373 return offset - start;
380 if (temp->opcode == -1) { /* didn't match any instruction */
381 if (size_prob == 1) /* would have matched, but for size */
382 error (ERR_NONFATAL, "operation size not specified");
383 else if (size_prob == 2)
384 error (ERR_NONFATAL, "mismatch in operand sizes");
387 "invalid combination of opcode and operands");
392 long insn_size (long segment, long offset, int bits,
393 insn *instruction, efunc error)
395 struct itemplate *temp;
397 errfunc = error; /* to pass to other functions */
399 if (instruction->opcode == -1)
402 if (instruction->opcode == I_DB ||
403 instruction->opcode == I_DW ||
404 instruction->opcode == I_DD ||
405 instruction->opcode == I_DQ ||
406 instruction->opcode == I_DT)
409 long isize, osize, wsize = 0; /* placate gcc */
412 switch (instruction->opcode)
414 case I_DB: wsize = 1; break;
415 case I_DW: wsize = 2; break;
416 case I_DD: wsize = 4; break;
417 case I_DQ: wsize = 8; break;
418 case I_DT: wsize = 10; break;
421 for (e = instruction->eops; e; e = e->next)
426 if (e->type == EOT_DB_NUMBER)
428 else if (e->type == EOT_DB_STRING)
429 osize = e->stringlen;
431 align = (-osize) % wsize;
434 isize += osize + align;
436 return isize * instruction->times;
439 if (instruction->opcode == I_INCBIN)
441 char fname[FILENAME_MAX];
445 len = FILENAME_MAX-1;
446 if (len > instruction->eops->stringlen)
447 len = instruction->eops->stringlen;
448 strncpy (fname, instruction->eops->stringval, len);
450 if ( (fp = fopen(fname, "rb")) == NULL )
451 error (ERR_NONFATAL, "`incbin': unable to open file `%s'", fname);
452 else if (fseek(fp, 0L, SEEK_END) < 0)
453 error (ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
459 if (instruction->eops->next)
461 len -= instruction->eops->next->offset;
462 if (instruction->eops->next->next &&
463 len > instruction->eops->next->next->offset)
465 len = instruction->eops->next->next->offset;
468 return instruction->times * len;
470 return 0; /* if we're here, there's an error */
473 temp = nasm_instructions[instruction->opcode];
474 while (temp->opcode != -1) {
475 if (matches(temp, instruction) == 100) {
476 /* we've matched an instruction. */
478 char * codes = temp->code;
481 isize = calcsize(segment, offset, bits, instruction, codes);
484 for (j = 0; j < instruction->nprefix; j++)
486 if ((instruction->prefixes[j] != P_A16 &&
487 instruction->prefixes[j] != P_O16 && bits==16) ||
488 (instruction->prefixes[j] != P_A32 &&
489 instruction->prefixes[j] != P_O32 && bits==32))
494 return isize * instruction->times;
498 return -1; /* didn't match any instruction */
501 static long calcsize (long segment, long offset, int bits,
502 insn *ins, char *codes)
507 (void) segment; /* Don't warn that this parameter is unused */
508 (void) offset; /* Don't warn that this parameter is unused */
510 while (*codes) switch (c = *codes++) {
511 case 01: case 02: case 03:
512 codes += c, length += c; break;
513 case 04: case 05: case 06: case 07:
515 case 010: case 011: case 012:
516 codes++, length++; break;
519 case 014: case 015: case 016:
521 case 020: case 021: case 022:
523 case 024: case 025: case 026:
525 case 030: case 031: case 032:
527 case 034: case 035: case 036:
528 length += ((ins->oprs[c-034].addr_size ?
529 ins->oprs[c-034].addr_size : bits) == 16 ? 2 : 4); break;
532 case 040: case 041: case 042:
534 case 050: case 051: case 052:
536 case 060: case 061: case 062:
538 case 064: case 065: case 066:
539 length += ((ins->oprs[c-064].addr_size ?
540 ins->oprs[c-064].addr_size : bits) == 16 ? 2 : 4); break;
541 case 070: case 071: case 072:
543 case 0300: case 0301: case 0302:
544 length += chsize (&ins->oprs[c-0300], bits);
547 length += (bits==32);
550 length += (bits==16);
555 length += (bits==32);
558 length += (bits==16);
563 codes++, length++; break;
569 case 0340: case 0341: case 0342:
570 if (ins->oprs[0].segment != NO_SEG)
571 errfunc (ERR_NONFATAL, "attempt to reserve non-constant"
572 " quantity of BSS space");
574 length += ins->oprs[0].offset << (c-0340);
576 default: /* can't do it by 'case' statements */
577 if (c>=0100 && c<=0277) { /* it's an EA */
579 if (!process_ea (&ins->oprs[(c>>3)&7], &ea_data, bits, 0,
581 errfunc (ERR_NONFATAL, "invalid effective address");
584 length += ea_data.size;
586 errfunc (ERR_PANIC, "internal instruction table corrupt"
587 ": instruction code 0x%02X given", c);
592 static void gencode (long segment, long offset, int bits,
593 insn *ins, char *codes, long insn_end)
595 static char condval[] = { /* conditional opcodes */
596 0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
597 0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
598 0x0, 0xA, 0xA, 0xB, 0x8, 0x4
601 unsigned char bytes[4];
605 switch (c = *codes++)
607 case 01: case 02: case 03:
608 out (offset, segment, codes, OUT_RAWDATA+c, NO_SEG, NO_SEG);
614 switch (ins->oprs[0].basereg)
617 bytes[0] = 0x0E + (c == 0x04 ? 1 : 0); break;
619 bytes[0] = 0x1E + (c == 0x04 ? 1 : 0); break;
621 bytes[0] = 0x06 + (c == 0x04 ? 1 : 0); break;
623 bytes[0] = 0x16 + (c == 0x04 ? 1 : 0); break;
625 errfunc (ERR_PANIC, "bizarre 8086 segment register received");
627 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
632 switch (ins->oprs[0].basereg) {
633 case R_FS: bytes[0] = 0xA0 + (c == 0x05 ? 1 : 0); break;
634 case R_GS: bytes[0] = 0xA8 + (c == 0x05 ? 1 : 0); break;
636 errfunc (ERR_PANIC, "bizarre 386 segment register received");
638 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
642 case 010: case 011: case 012:
643 bytes[0] = *codes++ + regval(&ins->oprs[c-010]);
644 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
650 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
654 case 014: case 015: case 016:
655 if (ins->oprs[c-014].offset < -128
656 || ins->oprs[c-014].offset > 127)
658 errfunc (ERR_WARNING, "signed byte value exceeds bounds");
661 if (ins->oprs[c-014].segment != NO_SEG)
663 data = ins->oprs[c-014].offset;
664 out (offset, segment, &data, OUT_ADDRESS+1,
665 ins->oprs[c-014].segment, ins->oprs[c-014].wrt);
668 bytes[0] = ins->oprs[c-014].offset;
669 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
674 case 020: case 021: case 022:
675 if (ins->oprs[c-020].offset < -256
676 || ins->oprs[c-020].offset > 255)
678 errfunc (ERR_WARNING, "byte value exceeds bounds");
680 if (ins->oprs[c-020].segment != NO_SEG) {
681 data = ins->oprs[c-020].offset;
682 out (offset, segment, &data, OUT_ADDRESS+1,
683 ins->oprs[c-020].segment, ins->oprs[c-020].wrt);
686 bytes[0] = ins->oprs[c-020].offset;
687 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
692 case 024: case 025: case 026:
693 if (ins->oprs[c-024].offset < 0 || ins->oprs[c-024].offset > 255)
694 errfunc (ERR_WARNING, "unsigned byte value exceeds bounds");
695 if (ins->oprs[c-024].segment != NO_SEG) {
696 data = ins->oprs[c-024].offset;
697 out (offset, segment, &data, OUT_ADDRESS+1,
698 ins->oprs[c-024].segment, ins->oprs[c-024].wrt);
701 bytes[0] = ins->oprs[c-024].offset;
702 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
707 case 030: case 031: case 032:
708 if (ins->oprs[c-030].segment == NO_SEG &&
709 ins->oprs[c-030].wrt == NO_SEG &&
710 (ins->oprs[c-030].offset < -65536L ||
711 ins->oprs[c-030].offset > 65535L))
713 errfunc (ERR_WARNING, "word value exceeds bounds");
715 data = ins->oprs[c-030].offset;
716 out (offset, segment, &data, OUT_ADDRESS+2,
717 ins->oprs[c-030].segment, ins->oprs[c-030].wrt);
721 case 034: case 035: case 036:
722 data = ins->oprs[c-034].offset;
723 size = ((ins->oprs[c-034].addr_size ?
724 ins->oprs[c-034].addr_size : bits) == 16 ? 2 : 4);
725 if (size==16 && (data < -65536L || data > 65535L))
726 errfunc (ERR_WARNING, "word value exceeds bounds");
727 out (offset, segment, &data, OUT_ADDRESS+size,
728 ins->oprs[c-034].segment, ins->oprs[c-034].wrt);
733 if (ins->oprs[0].segment == NO_SEG)
734 errfunc (ERR_NONFATAL, "value referenced by FAR is not"
737 out (offset, segment, &data, OUT_ADDRESS+2,
738 outfmt->segbase(1+ins->oprs[0].segment),
743 case 040: case 041: case 042:
744 data = ins->oprs[c-040].offset;
745 out (offset, segment, &data, OUT_ADDRESS+4,
746 ins->oprs[c-040].segment, ins->oprs[c-040].wrt);
750 case 050: case 051: case 052:
751 if (ins->oprs[c-050].segment != segment)
752 errfunc (ERR_NONFATAL, "short relative jump outside segment");
753 data = ins->oprs[c-050].offset - insn_end;
754 if (data > 127 || data < -128)
755 errfunc (ERR_NONFATAL, "short jump is out of range");
757 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
761 case 060: case 061: case 062:
762 if (ins->oprs[c-060].segment != segment) {
763 data = ins->oprs[c-060].offset;
764 out (offset, segment, &data, OUT_REL2ADR+insn_end-offset,
765 ins->oprs[c-060].segment, ins->oprs[c-060].wrt);
767 data = ins->oprs[c-060].offset - insn_end;
768 out (offset, segment, &data,
769 OUT_ADDRESS+2, NO_SEG, NO_SEG);
774 case 064: case 065: case 066:
775 size = ((ins->oprs[c-064].addr_size ?
776 ins->oprs[c-064].addr_size : bits) == 16 ? 2 : 4);
777 if (ins->oprs[c-064].segment != segment) {
778 data = ins->oprs[c-064].offset;
779 size = (bits == 16 ? OUT_REL2ADR : OUT_REL4ADR);
780 out (offset, segment, &data, size+insn_end-offset,
781 ins->oprs[c-064].segment, ins->oprs[c-064].wrt);
782 size = (bits == 16 ? 2 : 4);
784 data = ins->oprs[c-064].offset - insn_end;
785 out (offset, segment, &data,
786 OUT_ADDRESS+size, NO_SEG, NO_SEG);
791 case 070: case 071: case 072:
792 if (ins->oprs[c-070].segment != segment) {
793 data = ins->oprs[c-070].offset;
794 out (offset, segment, &data, OUT_REL4ADR+insn_end-offset,
795 ins->oprs[c-070].segment, ins->oprs[c-070].wrt);
797 data = ins->oprs[c-070].offset - insn_end;
798 out (offset, segment, &data,
799 OUT_ADDRESS+4, NO_SEG, NO_SEG);
804 case 0300: case 0301: case 0302:
805 if (chsize (&ins->oprs[c-0300], bits)) {
807 out (offset, segment, bytes,
808 OUT_RAWDATA+1, NO_SEG, NO_SEG);
817 out (offset, segment, bytes,
818 OUT_RAWDATA+1, NO_SEG, NO_SEG);
827 out (offset, segment, bytes,
828 OUT_RAWDATA+1, NO_SEG, NO_SEG);
840 out (offset, segment, bytes,
841 OUT_RAWDATA+1, NO_SEG, NO_SEG);
850 out (offset, segment, bytes,
851 OUT_RAWDATA+1, NO_SEG, NO_SEG);
861 *bytes = *codes++ + condval[ins->condition];
862 out (offset, segment, bytes,
863 OUT_RAWDATA+1, NO_SEG, NO_SEG);
873 out (offset, segment, bytes,
874 OUT_RAWDATA+1, NO_SEG, NO_SEG);
878 case 0340: case 0341: case 0342:
879 if (ins->oprs[0].segment != NO_SEG)
880 errfunc (ERR_PANIC, "non-constant BSS size in pass two");
882 long size = ins->oprs[0].offset << (c-0340);
884 out (offset, segment, NULL,
885 OUT_RESERVE+size, NO_SEG, NO_SEG);
890 default: /* can't do it by 'case' statements */
891 if (c>=0100 && c<=0277) { /* it's an EA */
897 if (c<=0177) /* pick rfield from operand b */
898 rfield = regval (&ins->oprs[c&7]);
899 else /* rfield is constant */
902 if (!process_ea (&ins->oprs[(c>>3)&7], &ea_data, bits, rfield,
905 errfunc (ERR_NONFATAL, "invalid effective address");
909 *p++ = ea_data.modrm;
910 if (ea_data.sib_present)
914 out (offset, segment, bytes, OUT_RAWDATA + s,
917 switch (ea_data.bytes) {
921 if (ins->oprs[(c>>3)&7].segment != NO_SEG) {
922 data = ins->oprs[(c>>3)&7].offset;
923 out (offset, segment, &data, OUT_ADDRESS+1,
924 ins->oprs[(c>>3)&7].segment,
925 ins->oprs[(c>>3)&7].wrt);
927 *bytes = ins->oprs[(c>>3)&7].offset;
928 out (offset, segment, bytes, OUT_RAWDATA+1,
935 data = ins->oprs[(c>>3)&7].offset;
936 out (offset, segment, &data,
937 OUT_ADDRESS+ea_data.bytes,
938 ins->oprs[(c>>3)&7].segment, ins->oprs[(c>>3)&7].wrt);
944 errfunc (ERR_PANIC, "internal instruction table corrupt"
945 ": instruction code 0x%02X given", c);
949 static int regval (operand *o)
951 switch (o->basereg) {
952 case R_EAX: case R_AX: case R_AL: case R_ES: case R_CR0: case R_DR0:
953 case R_ST0: case R_MM0: case R_XMM0:
955 case R_ECX: case R_CX: case R_CL: case R_CS: case R_DR1: case R_ST1:
956 case R_MM1: case R_XMM1:
958 case R_EDX: case R_DX: case R_DL: case R_SS: case R_CR2: case R_DR2:
959 case R_ST2: case R_MM2: case R_XMM2:
961 case R_EBX: case R_BX: case R_BL: case R_DS: case R_CR3: case R_DR3:
962 case R_TR3: case R_ST3: case R_MM3: case R_XMM3:
964 case R_ESP: case R_SP: case R_AH: case R_FS: case R_CR4: case R_TR4:
965 case R_ST4: case R_MM4: case R_XMM4:
967 case R_EBP: case R_BP: case R_CH: case R_GS: case R_TR5: case R_ST5:
968 case R_MM5: case R_XMM5:
970 case R_ESI: case R_SI: case R_DH: case R_DR6: case R_TR6: case R_ST6:
971 case R_MM6: case R_XMM6:
973 case R_EDI: case R_DI: case R_BH: case R_DR7: case R_TR7: case R_ST7:
974 case R_MM7: case R_XMM7:
977 errfunc (ERR_PANIC, "invalid register operand given to regval()");
982 static int matches (struct itemplate *itemp, insn *instruction)
984 int i, size[3], asize, oprs, ret;
991 if (itemp->opcode != instruction->opcode) return 0;
996 if (itemp->operands != instruction->operands) return 0;
999 * Check that no spurious colons or TOs are present
1001 for (i=0; i<itemp->operands; i++)
1002 if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON|TO))
1006 * Check that the operand flags all match up
1008 for (i=0; i<itemp->operands; i++)
1009 if (itemp->opd[i] & ~instruction->oprs[i].type ||
1010 ((itemp->opd[i] & SIZE_MASK) &&
1011 ((itemp->opd[i] ^ instruction->oprs[i].type) & SIZE_MASK)))
1013 if ((itemp->opd[i] & ~instruction->oprs[i].type & NON_SIZE) ||
1014 (instruction->oprs[i].type & SIZE_MASK))
1021 * Check operand sizes
1023 if (itemp->flags & IF_ARMASK) {
1024 size[0] = size[1] = size[2] = 0;
1026 switch (itemp->flags & IF_ARMASK) {
1027 case IF_AR0: i = 0; break;
1028 case IF_AR1: i = 1; break;
1029 case IF_AR2: i = 2; break;
1030 default: break; /* Shouldn't happen */
1032 if (itemp->flags & IF_SB) {
1034 } else if (itemp->flags & IF_SW) {
1036 } else if (itemp->flags & IF_SD) {
1041 if (itemp->flags & IF_SB) {
1043 oprs = itemp->operands;
1044 } else if (itemp->flags & IF_SW) {
1046 oprs = itemp->operands;
1047 } else if (itemp->flags & IF_SD) {
1049 oprs = itemp->operands;
1051 size[0] = size[1] = size[2] = asize;
1054 if (itemp->flags & (IF_SM | IF_SM2)) {
1055 oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
1057 for (i=0; i<oprs; i++) {
1058 if ( (asize = itemp->opd[i] & SIZE_MASK) != 0) {
1060 for (j=0; j<oprs; j++)
1066 oprs = itemp->operands;
1069 for (i=0; i<itemp->operands; i++)
1070 if (!(itemp->opd[i] & SIZE_MASK) &&
1071 (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
1077 static ea *process_ea (operand *input, ea *output, int addrbits, int rfield,
1080 if (!(REGISTER & ~input->type)) { /* it's a single register */
1081 static int regs[] = {
1082 R_AL, R_CL, R_DL, R_BL, R_AH, R_CH, R_DH, R_BH,
1083 R_AX, R_CX, R_DX, R_BX, R_SP, R_BP, R_SI, R_DI,
1084 R_EAX, R_ECX, R_EDX, R_EBX, R_ESP, R_EBP, R_ESI, R_EDI,
1085 R_MM0, R_MM1, R_MM2, R_MM3, R_MM4, R_MM5, R_MM6, R_MM7,
1086 R_XMM0, R_XMM1, R_XMM2, R_XMM3, R_XMM4, R_XMM5, R_XMM6, R_XMM7
1090 for (i=0; i<elements(regs); i++)
1091 if (input->basereg == regs[i]) break;
1092 if (i<elements(regs)) {
1093 output->sib_present = FALSE;/* no SIB necessary */
1094 output->bytes = 0; /* no offset necessary either */
1095 output->modrm = 0xC0 | (rfield << 3) | (i & 7);
1099 } else { /* it's a memory reference */
1100 if (input->basereg==-1 && (input->indexreg==-1 || input->scale==0)) {
1101 /* it's a pure offset */
1102 if (input->addr_size)
1103 addrbits = input->addr_size;
1104 output->sib_present = FALSE;
1105 output->bytes = (addrbits==32 ? 4 : 2);
1106 output->modrm = (addrbits==32 ? 5 : 6) | (rfield << 3);
1108 else { /* it's an indirection */
1109 int i=input->indexreg, b=input->basereg, s=input->scale;
1110 long o=input->offset, seg=input->segment;
1111 int hb=input->hintbase, ht=input->hinttype;
1114 if (s==0) i = -1; /* make this easy, at least */
1116 if (i==R_EAX || i==R_EBX || i==R_ECX || i==R_EDX
1117 || i==R_EBP || i==R_ESP || i==R_ESI || i==R_EDI
1118 || b==R_EAX || b==R_EBX || b==R_ECX || b==R_EDX
1119 || b==R_EBP || b==R_ESP || b==R_ESI || b==R_EDI) {
1120 /* it must be a 32-bit memory reference. Firstly we have
1121 * to check that all registers involved are type Exx. */
1122 if (i!=-1 && i!=R_EAX && i!=R_EBX && i!=R_ECX && i!=R_EDX
1123 && i!=R_EBP && i!=R_ESP && i!=R_ESI && i!=R_EDI)
1125 if (b!=-1 && b!=R_EAX && b!=R_EBX && b!=R_ECX && b!=R_EDX
1126 && b!=R_EBP && b!=R_ESP && b!=R_ESI && b!=R_EDI)
1129 /* While we're here, ensure the user didn't specify WORD. */
1130 if (input->addr_size == 16)
1133 /* now reorganise base/index */
1134 if (s == 1 && b != i && b != -1 && i != -1 &&
1135 ((hb==b&&ht==EAH_NOTBASE) || (hb==i&&ht==EAH_MAKEBASE)))
1136 t = b, b = i, i = t; /* swap if hints say so */
1137 if (b==i) /* convert EAX+2*EAX to 3*EAX */
1139 if (b==-1 && s==1 && !(hb == i && ht == EAH_NOTBASE))
1140 b = i, i = -1; /* make single reg base, unless hint */
1141 if (((s==2 && i!=R_ESP && !(input->eaflags & EAF_TIMESTWO)) ||
1142 s==3 || s==5 || s==9) && b==-1)
1143 b = i, s--; /* convert 3*EAX to EAX+2*EAX */
1144 if (s==1 && i==R_ESP) /* swap ESP into base if scale is 1 */
1146 if (i==R_ESP || (s!=1 && s!=2 && s!=4 && s!=8 && i!=-1))
1147 return NULL; /* wrong, for various reasons */
1149 if (i==-1 && b!=R_ESP) {/* no SIB needed */
1152 case R_EAX: rm = 0; break;
1153 case R_ECX: rm = 1; break;
1154 case R_EDX: rm = 2; break;
1155 case R_EBX: rm = 3; break;
1156 case R_EBP: rm = 5; break;
1157 case R_ESI: rm = 6; break;
1158 case R_EDI: rm = 7; break;
1159 case -1: rm = 5; break;
1160 default: /* should never happen */
1163 if (b==-1 || (b!=R_EBP && o==0 &&
1164 seg==NO_SEG && !forw_ref &&
1166 (EAF_BYTEOFFS|EAF_WORDOFFS))))
1168 else if (input->eaflags & EAF_BYTEOFFS ||
1169 (o>=-128 && o<=127 && seg==NO_SEG && !forw_ref &&
1170 !(input->eaflags & EAF_WORDOFFS))) {
1176 output->sib_present = FALSE;
1177 output->bytes = (b==-1 || mod==2 ? 4 : mod);
1178 output->modrm = (mod<<6) | (rfield<<3) | rm;
1180 else { /* we need a SIB */
1181 int mod, scale, index, base;
1184 case R_EAX: base = 0; break;
1185 case R_ECX: base = 1; break;
1186 case R_EDX: base = 2; break;
1187 case R_EBX: base = 3; break;
1188 case R_ESP: base = 4; break;
1189 case R_EBP: case -1: base = 5; break;
1190 case R_ESI: base = 6; break;
1191 case R_EDI: base = 7; break;
1192 default: /* then what the smeg is it? */
1193 return NULL; /* panic */
1197 case R_EAX: index = 0; break;
1198 case R_ECX: index = 1; break;
1199 case R_EDX: index = 2; break;
1200 case R_EBX: index = 3; break;
1201 case -1: index = 4; break;
1202 case R_EBP: index = 5; break;
1203 case R_ESI: index = 6; break;
1204 case R_EDI: index = 7; break;
1205 default: /* then what the smeg is it? */
1206 return NULL; /* panic */
1211 case 1: scale = 0; break;
1212 case 2: scale = 1; break;
1213 case 4: scale = 2; break;
1214 case 8: scale = 3; break;
1215 default: /* then what the smeg is it? */
1216 return NULL; /* panic */
1219 if (b==-1 || (b!=R_EBP && o==0 &&
1220 seg==NO_SEG && !forw_ref &&
1222 (EAF_BYTEOFFS|EAF_WORDOFFS))))
1224 else if (input->eaflags & EAF_BYTEOFFS ||
1225 (o>=-128 && o<=127 && seg==NO_SEG && !forw_ref &&
1226 !(input->eaflags & EAF_WORDOFFS)))
1231 output->sib_present = TRUE;
1232 output->bytes = (b==-1 || mod==2 ? 4 : mod);
1233 output->modrm = (mod<<6) | (rfield<<3) | 4;
1234 output->sib = (scale<<6) | (index<<3) | base;
1237 else { /* it's 16-bit */
1240 /* check all registers are BX, BP, SI or DI */
1241 if ((b!=-1 && b!=R_BP && b!=R_BX && b!=R_SI && b!=R_DI) ||
1242 (i!=-1 && i!=R_BP && i!=R_BX && i!=R_SI && i!=R_DI))
1245 /* ensure the user didn't specify DWORD */
1246 if (input->addr_size == 32)
1249 if (s!=1 && i!=-1) return NULL;/* no can do, in 16-bit EA */
1250 if (b==-1 && i!=-1) b ^= i ^= b ^= i; /* swap them round */
1251 if ((b==R_SI || b==R_DI) && i!=-1)
1252 b ^= i ^= b ^= i; /* have BX/BP as base, SI/DI index */
1253 if (b==i) return NULL;/* shouldn't ever happen, in theory */
1254 if (i!=-1 && b!=-1 &&
1255 (i==R_BP || i==R_BX || b==R_SI || b==R_DI))
1256 return NULL; /* invalid combinations */
1257 if (b==-1) /* pure offset: handled above */
1258 return NULL; /* so if it gets to here, panic! */
1262 switch (i*256 + b) {
1263 case R_SI*256+R_BX: rm=0; break;
1264 case R_DI*256+R_BX: rm=1; break;
1265 case R_SI*256+R_BP: rm=2; break;
1266 case R_DI*256+R_BP: rm=3; break;
1270 case R_SI: rm=4; break;
1271 case R_DI: rm=5; break;
1272 case R_BP: rm=6; break;
1273 case R_BX: rm=7; break;
1275 if (rm==-1) /* can't happen, in theory */
1276 return NULL; /* so panic if it does */
1278 if (o==0 && seg==NO_SEG && !forw_ref && rm!=6 &&
1279 !(input->eaflags & (EAF_BYTEOFFS|EAF_WORDOFFS)))
1281 else if (input->eaflags & EAF_BYTEOFFS ||
1282 (o>=-128 && o<=127 && seg==NO_SEG && !forw_ref &&
1283 !(input->eaflags & EAF_WORDOFFS)))
1288 output->sib_present = FALSE; /* no SIB - it's 16-bit */
1289 output->bytes = mod; /* bytes of offset needed */
1290 output->modrm = (mod<<6) | (rfield<<3) | rm;
1294 output->size = 1 + output->sib_present + output->bytes;
1298 static int chsize (operand *input, int addrbits)
1300 if (!(MEMORY & ~input->type)) {
1301 int i=input->indexreg, b=input->basereg;
1303 if (input->scale==0) i = -1;
1305 if (i == -1 && b == -1) /* pure offset */
1306 return (input->addr_size != 0 && input->addr_size != addrbits);
1308 if (i==R_EAX || i==R_EBX || i==R_ECX || i==R_EDX
1309 || i==R_EBP || i==R_ESP || i==R_ESI || i==R_EDI
1310 || b==R_EAX || b==R_EBX || b==R_ECX || b==R_EDX
1311 || b==R_EBP || b==R_ESP || b==R_ESI || b==R_EDI)
1312 return (addrbits==16);
1314 return (addrbits==32);