1 /* parser.c source line parser for the Netwide Assembler
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the licence given in the file "Licence"
6 * distributed in the NASM archive.
8 * initial version 27/iii/95 by Simon Tatham
24 static long reg_flags[] = { /* sizes and special flags */
25 0, REG8, REG_AL, REG_AX, REG8, REG8, REG16, REG16, REG8, REG_CL,
26 REG_CREG, REG_CREG, REG_CREG, REG_CR4, REG_CS, REG_CX, REG8,
27 REG16, REG8, REG_DREG, REG_DREG, REG_DREG, REG_DREG, REG_DREG,
28 REG_DREG, REG_DESS, REG_DX, REG_EAX, REG32, REG32, REG_ECX,
29 REG32, REG32, REG_DESS, REG32, REG32, REG_FSGS, REG_FSGS,
30 MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG,
31 REG16, REG16, REG_DESS, FPU0, FPUREG, FPUREG, FPUREG, FPUREG,
32 FPUREG, FPUREG, FPUREG, REG_TREG, REG_TREG, REG_TREG, REG_TREG,
36 enum { /* special tokens */
37 S_BYTE, S_DWORD, S_FAR, S_LONG, S_NEAR, S_QWORD, S_SHORT, S_TO,
41 static char *special_names[] = { /* and the actual text */
42 "byte", "dword", "far", "long", "near", "qword", "short", "to",
46 static char *prefix_names[] = {
47 "a16", "a32", "lock", "o16", "o32", "rep", "repe", "repne",
48 "repnz", "repz", "times"
52 * Evaluator datatype. Expressions, within the evaluator, are
53 * stored as an array of these beasts, terminated by a record with
54 * type==0. Mostly, it's a vector type: each type denotes some kind
55 * of a component, and the value denotes the multiple of that
56 * component present in the expression. The exception is the WRT
57 * type, whose `value' field denotes the segment to which the
58 * expression is relative. These segments will be segment-base
59 * types, i.e. either odd segment values or SEG_ABS types. So it is
60 * still valid to assume that anything with a `value' field of zero
64 long type; /* a register, or EXPR_xxx */
65 long value; /* must be >= 32 bits */
68 static void eval_reset(void);
69 static expr *evaluate(int);
72 * ASSUMPTION MADE HERE. The number of distinct register names
73 * (i.e. possible "type" fields for an expr structure) does not
76 #define EXPR_SIMPLE 126
78 #define EXPR_SEGBASE 128
80 static int is_reloc(expr *);
81 static int is_simple(expr *);
82 static int is_really_simple (expr *);
83 static long reloc_value(expr *);
84 static long reloc_seg(expr *);
85 static long reloc_wrt(expr *);
87 enum { /* token types, other than chars */
88 TOKEN_ID = 256, TOKEN_NUM, TOKEN_REG, TOKEN_INSN, TOKEN_ERRNUM,
89 TOKEN_HERE, TOKEN_BASE, TOKEN_SPECIAL, TOKEN_PREFIX, TOKEN_SHL,
90 TOKEN_SHR, TOKEN_SDIV, TOKEN_SMOD, TOKEN_SEG, TOKEN_WRT,
95 long t_integer, t_inttwo;
99 static char tempstorage[1024], *q;
100 static int bsi (char *string, char **array, int size);/* binary search */
102 static int nexttoken (void);
103 static int is_comma_next (void);
107 static struct tokenval tokval;
108 static lfunc labelfunc;
111 static struct ofmt *outfmt;
113 static long seg, ofs;
117 insn *parse_line (long segment, long offset, lfunc lookup_label, int pass,
118 char *buffer, insn *result, struct ofmt *output,
123 forward = result->forw_ref = FALSE;
126 labelfunc = lookup_label;
135 result->eops = NULL; /* must do this, whatever happens */
137 if (i==0) { /* blank line - ignore */
138 result->label = NULL; /* so, no label on it */
139 result->opcode = -1; /* and no instruction either */
142 if (i != TOKEN_ID && i != TOKEN_INSN && i != TOKEN_PREFIX &&
143 (i!=TOKEN_REG || (REG_SREG & ~reg_flags[tokval.t_integer]))) {
144 error (ERR_NONFATAL, "label or instruction expected"
145 " at start of line");
146 result->label = NULL;
151 if (i == TOKEN_ID) { /* there's a label here */
152 label = result->label = tokval.t_charptr;
154 if (i == ':') { /* skip over the optional colon */
156 } else if (i == 0 && pass == 1) {
157 error (ERR_WARNING|ERR_WARN_OL,
158 "label alone on a line without a colon might be in error");
160 } else /* no label; so, moving swiftly on */
161 result->label = NULL;
164 result->opcode = -1; /* this line contains just a label */
171 while (i == TOKEN_PREFIX ||
172 (i==TOKEN_REG && !(REG_SREG & ~reg_flags[tokval.t_integer]))) {
174 * Handle special case: the TIMES prefix.
176 if (i == TOKEN_PREFIX && tokval.t_integer == P_TIMES) {
181 value = evaluate (pass);
182 if (!value) { /* but, error in evaluator */
183 result->opcode = -1; /* unrecoverable parse error: */
184 return result; /* ignore this instruction */
186 if (!is_simple (value)) {
188 "non-constant argument supplied to TIMES");
191 result->times = value->value;
192 if (value->value < 0)
193 error(ERR_NONFATAL, "TIMES value %d is negative",
197 if (result->nprefix == MAXPREFIX)
199 "instruction has more than %d prefixes", MAXPREFIX);
201 result->prefixes[result->nprefix++] = tokval.t_integer;
206 if (i != TOKEN_INSN) {
207 error (ERR_NONFATAL, "parser: instruction expected");
212 result->opcode = tokval.t_integer;
213 result->condition = tokval.t_inttwo;
216 * RESB, RESW and RESD cannot be satisfied with incorrectly
217 * evaluated operands, since the correct values _must_ be known
218 * on the first pass. Hence, even in pass one, we set the
219 * `critical' flag on calling evaluate(), so that it will bomb
220 * out on undefined symbols. Nasty, but there's nothing we can
223 * For the moment, EQU has the same difficulty, so we'll
226 if (result->opcode == I_RESB ||
227 result->opcode == I_RESW ||
228 result->opcode == I_RESD ||
229 result->opcode == I_RESQ ||
230 result->opcode == I_REST ||
231 result->opcode == I_EQU)
234 critical = (pass==2 ? 2 : 0);
236 if (result->opcode == I_DB ||
237 result->opcode == I_DW ||
238 result->opcode == I_DD ||
239 result->opcode == I_DQ ||
240 result->opcode == I_DT ||
241 result->opcode == I_INCBIN) {
242 extop *eop, **tail = &result->eops;
246 * Begin to read the DB/DW/DD/DQ/DT operands.
252 eop = *tail = nasm_malloc(sizeof(extop));
255 eop->type = EOT_NOTHING;
258 if (i == TOKEN_NUM && tokval.t_charptr && is_comma_next()) {
259 eop->type = EOT_DB_STRING;
260 eop->stringval = tokval.t_charptr;
261 eop->stringlen = tokval.t_inttwo;
262 i = nexttoken(); /* eat the comma */
266 if (i == TOKEN_FLOAT || i == '-') {
273 if (i != TOKEN_FLOAT) {
279 if (i == TOKEN_FLOAT) {
280 eop->type = EOT_DB_STRING;
282 if (result->opcode == I_DD)
284 else if (result->opcode == I_DQ)
286 else if (result->opcode == I_DT)
289 error(ERR_NONFATAL, "floating-point constant"
290 " encountered in `D%c' instruction",
291 result->opcode == I_DW ? 'W' : 'B');
292 eop->type = EOT_NOTHING;
295 if (!float_const (tokval.t_charptr, sign,
296 (unsigned char *)eop->stringval,
297 eop->stringlen, error))
298 eop->type = EOT_NOTHING;
299 i = nexttoken(); /* eat the comma */
304 /* anything else */ {
307 value = evaluate (critical);
308 if (!value) { /* but, error in evaluator */
309 result->opcode = -1;/* unrecoverable parse error: */
310 return result; /* ignore this instruction */
312 if (is_reloc(value)) {
313 eop->type = EOT_DB_NUMBER;
314 eop->offset = reloc_value(value);
315 eop->segment = reloc_seg(value);
316 eop->wrt = reloc_wrt(value);
319 "`%s' operand %d: expression is not simple"
321 insn_names[result->opcode], oper_num);
326 * We're about to call nexttoken(), which will eat the
327 * comma that we're currently sitting on between
328 * arguments. However, we'd better check first that it
331 if (i == 0) /* also could be EOL */
334 error (ERR_NONFATAL, "comma expected after `%s' operand %d",
335 insn_names[result->opcode], oper_num);
336 result->opcode = -1;/* unrecoverable parse error: */
337 return result; /* ignore this instruction */
341 if (result->opcode == I_INCBIN) {
343 * Correct syntax for INCBIN is that there should be
344 * one string operand, followed by one or two numeric
347 if (!result->eops || result->eops->type != EOT_DB_STRING)
348 error (ERR_NONFATAL, "`incbin' expects a file name");
349 else if (result->eops->next &&
350 result->eops->next->type != EOT_DB_NUMBER)
351 error (ERR_NONFATAL, "`incbin': second parameter is",
353 else if (result->eops->next && result->eops->next->next &&
354 result->eops->next->next->type != EOT_DB_NUMBER)
355 error (ERR_NONFATAL, "`incbin': third parameter is",
357 else if (result->eops->next && result->eops->next->next &&
358 result->eops->next->next->next)
359 error (ERR_NONFATAL, "`incbin': more than three parameters");
363 * If we reach here, one of the above errors happened.
364 * Throw the instruction away.
373 /* right. Now we begin to parse the operands. There may be up to three
374 * of these, separated by commas, and terminated by a zero token. */
376 for (operand = 0; operand < 3; operand++) {
377 expr *seg, *value; /* used most of the time */
378 int mref; /* is this going to be a memory ref? */
379 int bracket; /* is it a [] mref, or a & mref? */
381 result->oprs[operand].addr_size = 0;/* have to zero this whatever */
383 if (i == 0) break; /* end of operands: get out of here */
384 result->oprs[operand].type = 0; /* so far, no override */
385 while (i == TOKEN_SPECIAL) {/* size specifiers */
386 switch ((int)tokval.t_integer) {
388 result->oprs[operand].type |= BITS8;
391 result->oprs[operand].type |= BITS16;
395 result->oprs[operand].type |= BITS32;
398 result->oprs[operand].type |= BITS64;
401 result->oprs[operand].type |= BITS80;
404 result->oprs[operand].type |= TO;
407 result->oprs[operand].type |= FAR;
410 result->oprs[operand].type |= NEAR;
413 result->oprs[operand].type |= SHORT;
419 if (i == '[' || i == '&') { /* memory reference */
421 bracket = (i == '[');
423 if (i == TOKEN_SPECIAL) { /* check for address size override */
424 switch ((int)tokval.t_integer) {
426 result->oprs[operand].addr_size = 16;
430 result->oprs[operand].addr_size = 32;
433 error (ERR_NONFATAL, "invalid size specification in"
434 " effective address");
438 } else { /* immediate operand, or register */
440 bracket = FALSE; /* placate optimisers */
445 value = evaluate (critical);
447 result->forw_ref = TRUE;
448 if (!value) { /* error in evaluator */
449 result->opcode = -1; /* unrecoverable parse error: */
450 return result; /* ignore this instruction */
452 if (i == ':' && mref) { /* it was seg:offset */
453 seg = value; /* so shift this into the segment */
454 i = nexttoken(); /* then skip the colon */
455 if (i == TOKEN_SPECIAL) { /* another check for size override */
456 switch ((int)tokval.t_integer) {
458 result->oprs[operand].addr_size = 16;
462 result->oprs[operand].addr_size = 32;
465 error (ERR_NONFATAL, "invalid size specification in"
466 " effective address");
470 value = evaluate (critical);
472 result->forw_ref = TRUE;
473 /* and get the offset */
474 if (!value) { /* but, error in evaluator */
475 result->opcode = -1; /* unrecoverable parse error: */
476 return result; /* ignore this instruction */
479 if (mref && bracket) { /* find ] at the end */
481 error (ERR_NONFATAL, "parser: expecting ]");
482 do { /* error recovery again */
484 } while (i != 0 && i != ',');
485 } else /* we got the required ] */
487 } else { /* immediate operand */
488 if (i != 0 && i != ',' && i != ':') {
489 error (ERR_NONFATAL, "comma or end of line expected");
490 do { /* error recovery */
492 } while (i != 0 && i != ',');
493 } else if (i == ':') {
494 result->oprs[operand].type |= COLON;
498 /* now convert the exprs returned from evaluate() into operand
501 if (mref) { /* it's a memory reference */
503 int b, i, s; /* basereg, indexreg, scale */
506 if (seg) { /* segment override */
507 if (seg[1].type!=0 || seg->value!=1 ||
508 REG_SREG & ~reg_flags[seg->type])
509 error (ERR_NONFATAL, "invalid segment override");
510 else if (result->nprefix == MAXPREFIX)
512 "instruction has more than %d prefixes",
515 result->prefixes[result->nprefix++] = seg->type;
518 b = i = -1, o = s = 0;
520 if (e->type < EXPR_SIMPLE) { /* this bit's a register */
521 if (e->value == 1) /* in fact it can be basereg */
523 else /* no, it has to be indexreg */
524 i = e->type, s = e->value;
527 if (e->type && e->type < EXPR_SIMPLE) {/* it's a second register */
528 if (e->value != 1) { /* it has to be indexreg */
529 if (i != -1) { /* but it can't be */
530 error(ERR_NONFATAL, "invalid effective address");
534 i = e->type, s = e->value;
535 } else { /* it can be basereg */
536 if (b != -1) /* or can it? */
543 if (e->type != 0) { /* is there an offset? */
544 if (e->type < EXPR_SIMPLE) {/* in fact, is there an error? */
545 error (ERR_NONFATAL, "invalid effective address");
549 if (e->type == EXPR_SIMPLE) {
553 if (e->type == EXPR_WRT) {
554 result->oprs[operand].wrt = e->value;
557 result->oprs[operand].wrt = NO_SEG;
559 * Look for a segment base type.
561 if (e->type && e->type < EXPR_SEGBASE) {
562 error (ERR_NONFATAL, "invalid effective address");
566 while (e->type && e->value == 0)
568 if (e->type && e->value != 1) {
569 error (ERR_NONFATAL, "invalid effective address");
574 result->oprs[operand].segment = e->type-EXPR_SEGBASE;
577 result->oprs[operand].segment = NO_SEG;
578 while (e->type && e->value == 0)
581 error (ERR_NONFATAL, "invalid effective address");
588 result->oprs[operand].wrt = NO_SEG;
589 result->oprs[operand].segment = NO_SEG;
592 if (e->type != 0) { /* there'd better be nothing left! */
593 error (ERR_NONFATAL, "invalid effective address");
598 result->oprs[operand].type |= MEMORY;
599 if (b==-1 && (i==-1 || s==0))
600 result->oprs[operand].type |= MEM_OFFS;
601 result->oprs[operand].basereg = b;
602 result->oprs[operand].indexreg = i;
603 result->oprs[operand].scale = s;
604 result->oprs[operand].offset = o;
605 } else { /* it's not a memory reference */
606 if (is_reloc(value)) { /* it's immediate */
607 result->oprs[operand].type |= IMMEDIATE;
608 result->oprs[operand].offset = reloc_value(value);
609 result->oprs[operand].segment = reloc_seg(value);
610 result->oprs[operand].wrt = reloc_wrt(value);
611 if (is_simple(value) && reloc_value(value)==1)
612 result->oprs[operand].type |= UNITY;
613 } else { /* it's a register */
614 if (value->type>=EXPR_SIMPLE || value->value!=1) {
615 error (ERR_NONFATAL, "invalid operand type");
619 /* clear overrides, except TO which applies to FPU regs */
620 result->oprs[operand].type &= TO;
621 result->oprs[operand].type |= REGISTER;
622 result->oprs[operand].type |= reg_flags[value->type];
623 result->oprs[operand].basereg = value->type;
628 result->operands = operand; /* set operand count */
630 while (operand<3) /* clear remaining operands */
631 result->oprs[operand++].type = 0;
634 * Transform RESW, RESD, RESQ, REST into RESB.
636 switch (result->opcode) {
637 case I_RESW: result->opcode=I_RESB; result->oprs[0].offset*=2; break;
638 case I_RESD: result->opcode=I_RESB; result->oprs[0].offset*=4; break;
639 case I_RESQ: result->opcode=I_RESB; result->oprs[0].offset*=8; break;
640 case I_REST: result->opcode=I_RESB; result->oprs[0].offset*=10; break;
646 static int is_comma_next (void) {
650 while (isspace(*p)) p++;
651 return (*p == ',' || *p == ';' || !*p);
655 * This tokeniser routine has only one side effect, that of
656 * updating `bufptr'. Hence by saving `bufptr', lookahead may be
660 static int nexttoken (void) {
661 char ourcopy[256], *r, *s;
663 while (isspace(*bufptr)) bufptr++;
664 if (!*bufptr) return 0;
666 /* we have a token; either an id, a number or a char */
667 if (isidstart(*bufptr) ||
668 (*bufptr == '$' && isidstart(bufptr[1]))) {
669 /* now we've got an identifier */
673 if (*bufptr == '$') {
678 tokval.t_charptr = q;
680 while (isidchar(*bufptr)) *q++ = *bufptr++;
682 for (s=tokval.t_charptr, r=ourcopy; *s; s++)
686 return TOKEN_ID; /* bypass all other checks */
687 /* right, so we have an identifier sitting in temp storage. now,
688 * is it actually a register or instruction name, or what? */
689 if ((tokval.t_integer=bsi(ourcopy, reg_names,
690 elements(reg_names)))>=0)
692 if ((tokval.t_integer=bsi(ourcopy, insn_names,
693 elements(insn_names)))>=0)
695 for (i=0; i<elements(icn); i++)
696 if (!strncmp(ourcopy, icn[i], strlen(icn[i]))) {
697 char *p = ourcopy + strlen(icn[i]);
698 tokval.t_integer = ico[i];
699 if ((tokval.t_inttwo=bsi(p, conditions,
700 elements(conditions)))>=0)
703 if ((tokval.t_integer=bsi(ourcopy, prefix_names,
704 elements(prefix_names)))>=0) {
705 tokval.t_integer += PREFIX_ENUM_START;
708 if ((tokval.t_integer=bsi(ourcopy, special_names,
709 elements(special_names)))>=0)
710 return TOKEN_SPECIAL;
711 if (!strcmp(ourcopy, "seg"))
713 if (!strcmp(ourcopy, "wrt"))
716 } else if (*bufptr == '$' && !isnumchar(bufptr[1])) {
718 * It's a $ sign with no following hex number; this must
719 * mean it's a Here token ($), evaluating to the current
720 * assembly location, or a Base token ($$), evaluating to
721 * the base of the current segment.
724 if (*bufptr == '$') {
729 } else if (isnumstart(*bufptr)) { /* now we've got a number */
734 while (isnumchar(*bufptr)) {
737 if (*bufptr == '.') {
739 * a floating point constant
742 while (isnumchar(*bufptr)) {
746 tokval.t_charptr = r;
750 tokval.t_integer = readnum(r, &rn_error);
752 return TOKEN_ERRNUM; /* some malformation occurred */
753 tokval.t_charptr = NULL;
755 } else if (*bufptr == '\'' || *bufptr == '"') {/* a char constant */
756 char quote = *bufptr++, *r;
757 r = tokval.t_charptr = bufptr;
758 while (*bufptr && *bufptr != quote) bufptr++;
759 tokval.t_inttwo = bufptr - r; /* store full version */
761 return TOKEN_ERRNUM; /* unmatched quotes */
762 tokval.t_integer = 0;
763 r = bufptr++; /* skip over final quote */
764 while (quote != *--r) {
765 tokval.t_integer = (tokval.t_integer<<8) + (unsigned char) *r;
768 } else if (*bufptr == ';') { /* a comment has happened - stay */
770 } else if ((*bufptr == '>' || *bufptr == '<' ||
771 *bufptr == '/' || *bufptr == '%') && bufptr[1] == *bufptr) {
773 return (bufptr[-2] == '>' ? TOKEN_SHR :
774 bufptr[-2] == '<' ? TOKEN_SHL :
775 bufptr[-2] == '/' ? TOKEN_SDIV :
777 } else /* just an ordinary char */
778 return (unsigned char) (*bufptr++);
781 /* return index of "string" in "array", or -1 if no match. */
782 static int bsi (char *string, char **array, int size) {
783 int i = -1, j = size; /* always, i < index < j */
786 int l = strcmp(string, array[k]);
787 if (l<0) /* it's in the first half */
789 else if (l>0) /* it's in the second half */
791 else /* we've got it :) */
794 return -1; /* we haven't got it :( */
797 void cleanup_insn (insn *i) {
802 i->eops = i->eops->next;
807 /* ------------- Evaluator begins here ------------------ */
809 static expr exprtempstorage[1024], *tempptr; /* store exprs in here */
812 * Add two vector datatypes. We have some bizarre behaviour on far-
813 * absolute segment types: we preserve them during addition _only_
814 * if one of the segments is a truly pure scalar.
816 static expr *add_vectors(expr *p, expr *q) {
820 preserve = is_really_simple(p) || is_really_simple(q);
822 while (p->type && q->type &&
823 p->type < EXPR_SEGBASE+SEG_ABS &&
824 q->type < EXPR_SEGBASE+SEG_ABS)
825 if (p->type > q->type) {
826 tempptr->type = q->type;
827 tempptr->value = q->value;
829 } else if (p->type < q->type) {
830 tempptr->type = p->type;
831 tempptr->value = p->value;
833 } else { /* *p and *q have same type */
834 tempptr->type = p->type;
835 tempptr->value = p->value + q->value;
839 (preserve || p->type < EXPR_SEGBASE+SEG_ABS)) {
840 tempptr->type = p->type;
841 tempptr->value = p->value;
845 (preserve || q->type < EXPR_SEGBASE+SEG_ABS)) {
846 tempptr->type = q->type;
847 tempptr->value = q->value;
850 (tempptr++)->type = 0;
856 * Multiply a vector by a scalar. Strip far-absolute segment part
859 static expr *scalar_mult(expr *vect, long scalar) {
862 while (p->type && p->type < EXPR_SEGBASE+SEG_ABS) {
863 p->value = scalar * (p->value);
871 static expr *scalarvect (long scalar) {
873 tempptr->type = EXPR_SIMPLE;
874 tempptr->value = scalar;
882 * Return TRUE if the argument is a simple scalar. (Or a far-
883 * absolute, which counts.)
885 static int is_simple (expr *vect) {
886 while (vect->type && !vect->value)
890 if (vect->type != EXPR_SIMPLE)
894 } while (vect->type && !vect->value);
895 if (vect->type && vect->type < EXPR_SEGBASE+SEG_ABS) return 0;
900 * Return TRUE if the argument is a simple scalar, _NOT_ a far-
903 static int is_really_simple (expr *vect) {
904 while (vect->type && !vect->value)
908 if (vect->type != EXPR_SIMPLE)
912 } while (vect->type && !vect->value);
913 if (vect->type) return 0;
918 * Return TRUE if the argument is relocatable (i.e. a simple
919 * scalar, plus at most one segment-base, plus possibly a WRT).
921 static int is_reloc (expr *vect) {
922 while (vect->type && !vect->value)
926 if (vect->type < EXPR_SIMPLE)
928 if (vect->type == EXPR_SIMPLE) {
931 } while (vect->type && !vect->value);
935 if (vect->type != EXPR_WRT && vect->value != 0 && vect->value != 1)
936 return 0; /* segment base multiplier non-unity */
939 } while (vect->type && (vect->type == EXPR_WRT || !vect->value));
946 * Return the scalar part of a relocatable vector. (Including
947 * simple scalar vectors - those qualify as relocatable.)
949 static long reloc_value (expr *vect) {
950 while (vect->type && !vect->value)
952 if (!vect->type) return 0;
953 if (vect->type == EXPR_SIMPLE)
960 * Return the segment number of a relocatable vector, or NO_SEG for
963 static long reloc_seg (expr *vect) {
964 while (vect->type && (vect->type == EXPR_WRT || !vect->value))
966 if (vect->type == EXPR_SIMPLE) {
969 } while (vect->type && (vect->type == EXPR_WRT || !vect->value));
974 return vect->type - EXPR_SEGBASE;
978 * Return the WRT segment number of a relocatable vector, or NO_SEG
979 * if no WRT part is present.
981 static long reloc_wrt (expr *vect) {
982 while (vect->type && vect->type < EXPR_WRT)
984 if (vect->type == EXPR_WRT) {
990 static void eval_reset(void) {
991 tempptr = exprtempstorage; /* initialise temporary storage */
995 * The SEG operator: calculate the segment part of a relocatable
996 * value. Return NULL, as usual, if an error occurs. Report the
999 static expr *segment_part (expr *e) {
1003 error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value");
1008 if (seg == NO_SEG) {
1009 error(ERR_NONFATAL, "cannot apply SEG to a non-relocatable value");
1011 } else if (seg & SEG_ABS)
1012 return scalarvect(seg & ~SEG_ABS);
1014 expr *f = tempptr++;
1015 tempptr++->type = 0;
1016 f->type = EXPR_SEGBASE+outfmt->segbase(seg+1);
1023 * Recursive-descent parser. Called with a single boolean operand,
1024 * which is TRUE if the evaluation is critical (i.e. unresolved
1025 * symbols are an error condition). Must update the global `i' to
1026 * reflect the token after the parsed string. May return NULL.
1028 * evaluate() should report its own errors: on return it is assumed
1029 * that if NULL has been returned, the error has already been
1034 * Grammar parsed is:
1036 * expr : expr0 [ WRT expr6 ]
1037 * expr0 : expr1 [ {|} expr1]
1038 * expr1 : expr2 [ {^} expr2]
1039 * expr2 : expr3 [ {&} expr3]
1040 * expr3 : expr4 [ {<<,>>} expr4...]
1041 * expr4 : expr5 [ {+,-} expr5...]
1042 * expr5 : expr6 [ {*,/,%,//,%%} expr6...]
1043 * expr6 : { ~,+,-,SEG } expr6
1050 static expr *expr0(int), *expr1(int), *expr2(int), *expr3(int);
1051 static expr *expr4(int), *expr5(int), *expr6(int);
1053 static expr *expr0(int critical) {
1056 e = expr1(critical);
1061 f = expr1(critical);
1064 if (!is_simple(e) || !is_simple(f)) {
1065 error(ERR_NONFATAL, "`|' operator may only be applied to"
1068 e = scalarvect (reloc_value(e) | reloc_value(f));
1073 static expr *expr1(int critical) {
1076 e = expr2(critical);
1081 f = expr2(critical);
1084 if (!is_simple(e) || !is_simple(f)) {
1085 error(ERR_NONFATAL, "`^' operator may only be applied to"
1088 e = scalarvect (reloc_value(e) ^ reloc_value(f));
1093 static expr *expr2(int critical) {
1096 e = expr3(critical);
1101 f = expr3(critical);
1104 if (!is_simple(e) || !is_simple(f)) {
1105 error(ERR_NONFATAL, "`&' operator may only be applied to"
1108 e = scalarvect (reloc_value(e) & reloc_value(f));
1113 static expr *expr3(int critical) {
1116 e = expr4(critical);
1119 while (i == TOKEN_SHL || i == TOKEN_SHR) {
1122 f = expr4(critical);
1125 if (!is_simple(e) || !is_simple(f)) {
1126 error(ERR_NONFATAL, "shift operator may only be applied to"
1131 e = scalarvect (reloc_value(e) << reloc_value(f));
1134 e = scalarvect (((unsigned long)reloc_value(e)) >>
1142 static expr *expr4(int critical) {
1145 e = expr5(critical);
1148 while (i == '+' || i == '-') {
1151 f = expr5(critical);
1156 e = add_vectors (e, f);
1159 e = add_vectors (e, scalar_mult(f, -1L));
1166 static expr *expr5(int critical) {
1169 e = expr6(critical);
1172 while (i == '*' || i == '/' || i == '*' ||
1173 i == TOKEN_SDIV || i == TOKEN_SMOD) {
1176 f = expr6(critical);
1179 if (j != '*' && (!is_simple(e) || !is_simple(f))) {
1180 error(ERR_NONFATAL, "division operator may only be applied to"
1184 if (j != '*' && reloc_value(f) == 0) {
1185 error(ERR_NONFATAL, "division by zero");
1191 e = scalar_mult (f, reloc_value(e));
1192 else if (is_simple(f))
1193 e = scalar_mult (e, reloc_value(f));
1195 error(ERR_NONFATAL, "unable to multiply two "
1196 "non-scalar objects");
1201 e = scalarvect (((unsigned long)reloc_value(e)) /
1202 ((unsigned long)reloc_value(f)));
1205 e = scalarvect (((unsigned long)reloc_value(e)) %
1206 ((unsigned long)reloc_value(f)));
1209 e = scalarvect (((signed long)reloc_value(e)) /
1210 ((signed long)reloc_value(f)));
1213 e = scalarvect (((signed long)reloc_value(e)) %
1214 ((signed long)reloc_value(f)));
1221 static expr *expr6(int critical) {
1223 long label_seg, label_ofs;
1227 e = expr6(critical);
1230 return scalar_mult (e, -1L);
1231 } else if (i == '+') {
1233 return expr6(critical);
1234 } else if (i == '~') {
1236 e = expr6(critical);
1239 if (!is_simple(e)) {
1240 error(ERR_NONFATAL, "`~' operator may only be applied to"
1244 return scalarvect(~reloc_value(e));
1245 } else if (i == TOKEN_SEG) {
1247 e = expr6(critical);
1250 return segment_part(e);
1251 } else if (i == '(') {
1253 e = expr0(critical);
1257 error(ERR_NONFATAL, "expecting `)'");
1262 } else if (i == TOKEN_NUM || i == TOKEN_REG || i == TOKEN_ID ||
1263 i == TOKEN_HERE || i == TOKEN_BASE) {
1267 e->type = EXPR_SIMPLE;
1268 e->value = tokval.t_integer;
1271 e->type = tokval.t_integer;
1278 * Since the whole line is parsed before the label it
1279 * defines is given to the label manager, we have
1280 * problems with lines such as
1282 * end: TIMES 512-(end-start) DB 0
1284 * where `end' is not known on pass one, despite not
1285 * really being a forward reference, and due to
1286 * criticality it is _needed_. Hence we check our label
1287 * against the currently defined one, and do our own
1288 * resolution of it if we have to.
1290 if (i == TOKEN_BASE) {
1293 } else if (i == TOKEN_HERE || !strcmp(tokval.t_charptr, label)) {
1296 } else if (!labelfunc(tokval.t_charptr, &label_seg, &label_ofs)) {
1297 if (critical == 2) {
1298 error (ERR_NONFATAL, "symbol `%s' undefined",
1301 } else if (critical == 1) {
1302 error (ERR_NONFATAL, "symbol `%s' not defined before use",
1311 e->type = EXPR_SIMPLE;
1312 e->value = label_ofs;
1313 if (label_seg!=NO_SEG) {
1315 tempptr->type = EXPR_SEGBASE + label_seg;
1326 error(ERR_NONFATAL, "expression syntax error");
1331 static expr *evaluate (int critical) {
1335 e = expr0 (critical);
1339 if (i == TOKEN_WRT) {
1340 i = nexttoken(); /* eat the WRT */
1341 f = expr6 (critical);
1345 e = scalar_mult (e, 1L); /* strip far-absolute segment part */
1347 expr *g = tempptr++;
1348 tempptr++->type = 0;
1351 error(ERR_NONFATAL, "invalid right-hand operand to WRT");
1354 g->value = reloc_seg(f);
1355 if (g->value == NO_SEG)
1356 g->value = reloc_value(f) | SEG_ABS;
1357 else if (!(g->value & SEG_ABS) && !(g->value % 2) && critical) {
1358 error(ERR_NONFATAL, "invalid right-hand operand to WRT");
1361 e = add_vectors (e, g);