1 /* nasmlib.c library routines for the Netwide Assembler
3 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
4 * Julian Hall. All rights reserved. The software is
5 * redistributable under the licence given in the file "Licence"
6 * distributed in the NASM archive.
17 static efunc nasm_malloc_error;
23 void nasm_set_malloc_error (efunc error) {
24 nasm_malloc_error = error;
26 logfp = fopen ("malloc.log", "w");
27 setvbuf (logfp, NULL, _IOLBF, BUFSIZ);
28 fprintf (logfp, "null pointer is %p\n", NULL);
33 void *nasm_malloc_log (char *file, int line, size_t size)
35 void *nasm_malloc (size_t size)
38 void *p = malloc(size);
40 nasm_malloc_error (ERR_FATAL | ERR_NOFILE, "out of memory");
43 fprintf(logfp, "%s %d malloc(%ld) returns %p\n",
44 file, line, (long)size, p);
50 void *nasm_realloc_log (char *file, int line, void *q, size_t size)
52 void *nasm_realloc (void *q, size_t size)
55 void *p = q ? realloc(q, size) : malloc(size);
57 nasm_malloc_error (ERR_FATAL | ERR_NOFILE, "out of memory");
60 fprintf(logfp, "%s %d realloc(%p,%ld) returns %p\n",
61 file, line, q, (long)size, p);
63 fprintf(logfp, "%s %d malloc(%ld) returns %p\n",
64 file, line, (long)size, p);
70 void nasm_free_log (char *file, int line, void *q)
72 void nasm_free (void *q)
78 fprintf(logfp, "%s %d free(%p)\n",
85 char *nasm_strdup_log (char *file, int line, char *s)
87 char *nasm_strdup (char *s)
91 int size = strlen(s)+1;
95 nasm_malloc_error (ERR_FATAL | ERR_NOFILE, "out of memory");
98 fprintf(logfp, "%s %d strdup(%ld) returns %p\n",
99 file, line, (long)size, p);
106 char *nasm_strndup_log (char *file, int line, char *s, size_t len)
108 char *nasm_strndup (char *s, size_t len)
116 nasm_malloc_error (ERR_FATAL | ERR_NOFILE, "out of memory");
119 fprintf(logfp, "%s %d strndup(%ld) returns %p\n",
120 file, line, (long)size, p);
127 int nasm_stricmp (char *s1, char *s2) {
128 while (*s1 && toupper(*s1) == toupper(*s2))
132 else if (toupper(*s1) < toupper(*s2))
138 int nasm_strnicmp (char *s1, char *s2, int n) {
139 while (n > 0 && *s1 && toupper(*s1) == toupper(*s2))
141 if ((!*s1 && !*s2) || n==0)
143 else if (toupper(*s1) < toupper(*s2))
149 #define lib_isnumchar(c) ( isalnum(c) || (c) == '$')
150 #define numvalue(c) ((c)>='a' ? (c)-'a'+10 : (c)>='A' ? (c)-'A'+10 : (c)-'0')
152 long readnum (char *str, int *error) {
155 unsigned long result, checklimit;
160 while (isspace(*r)) r++; /* find start of number */
163 while (lib_isnumchar(*q)) q++; /* find end of number */
166 * If it begins 0x, 0X or $, or ends in H, it's in hex. if it
167 * ends in Q, it's octal. if it ends in B, it's binary.
168 * Otherwise, it's ordinary decimal.
170 if (*r=='0' && (r[1]=='x' || r[1]=='X'))
174 else if (q[-1]=='H' || q[-1]=='h')
176 else if (q[-1]=='Q' || q[-1]=='q')
178 else if (q[-1]=='B' || q[-1]=='b')
184 * If this number has been found for us by something other than
185 * the ordinary scanners, then it might be malformed by having
186 * nothing between the prefix and the suffix. Check this case
195 * `checklimit' must be 2**32 / radix. We can't do that in
196 * 32-bit arithmetic, which we're (probably) using, so we
197 * cheat: since we know that all radices we use are even, we
198 * can divide 2**31 by radix/2 instead.
200 checklimit = 0x80000000UL / (radix>>1);
203 while (*r && r < q) {
204 if (*r<'0' || (*r>'9' && *r<'A') || numvalue(*r)>=radix) {
208 if (result >= checklimit)
210 result = radix * result + numvalue(*r);
215 nasm_malloc_error (ERR_WARNING | ERR_PASS1 | ERR_WARN_NOV,
216 "numeric constant %s does not fit in 32 bits",
222 static long next_seg;
224 void seg_init(void) {
228 long seg_alloc(void) {
229 return (next_seg += 2) - 2;
232 void fwriteshort (int data, FILE *fp) {
233 fputc ((int) (data & 255), fp);
234 fputc ((int) ((data >> 8) & 255), fp);
237 void fwritelong (long data, FILE *fp) {
238 fputc ((int) (data & 255), fp);
239 fputc ((int) ((data >> 8) & 255), fp);
240 fputc ((int) ((data >> 16) & 255), fp);
241 fputc ((int) ((data >> 24) & 255), fp);
244 void standard_extension (char *inname, char *outname, char *extension,
248 if (*outname) /* file name already exists, */
249 return; /* so do nothing */
252 while (*q) *p++ = *q++; /* copy, and find end of string */
253 *p = '\0'; /* terminate it */
254 while (p > outname && *--p != '.');/* find final period (or whatever) */
255 if (*p != '.') while (*p) p++; /* go back to end if none found */
256 if (!strcmp(p, extension)) { /* is the extension already there? */
258 error(ERR_WARNING | ERR_NOFILE,
259 "file name already ends in `%s': "
260 "output will be in `nasm.out'",
263 error(ERR_WARNING | ERR_NOFILE,
264 "file name already has no extension: "
265 "output will be in `nasm.out'");
266 strcpy(outname, "nasm.out");
268 strcpy(p, extension);
271 #define RAA_BLKSIZE 4096 /* this many longs allocated at once */
272 #define RAA_LAYERSIZE 1024 /* this many _pointers_ allocated */
274 typedef struct RAA RAA;
275 typedef union RAA_UNION RAA_UNION;
276 typedef struct RAA_LEAF RAA_LEAF;
277 typedef struct RAA_BRANCH RAA_BRANCH;
281 * Number of layers below this one to get to the real data. 0
282 * means this structure is a leaf, holding RAA_BLKSIZE real
283 * data items; 1 and above mean it's a branch, holding
284 * RAA_LAYERSIZE pointers to the next level branch or leaf
289 * Number of real data items spanned by one position in the
290 * `data' array at this level. This number is 1, trivially, for
291 * a leaf (level 0): for a level 1 branch it should be
292 * RAA_BLKSIZE, and for a level 2 branch it's
293 * RAA_LAYERSIZE*RAA_BLKSIZE.
298 long data[RAA_BLKSIZE];
301 struct RAA *data[RAA_LAYERSIZE];
306 #define LEAFSIZ (sizeof(RAA)-sizeof(RAA_UNION)+sizeof(RAA_LEAF))
307 #define BRANCHSIZ (sizeof(RAA)-sizeof(RAA_UNION)+sizeof(RAA_BRANCH))
309 #define LAYERSIZ(r) ( (r)->layers==0 ? RAA_BLKSIZE : RAA_LAYERSIZE )
311 static struct RAA *real_raa_init (int layers) {
315 r = nasm_malloc (LEAFSIZ);
316 memset (r->u.l.data, 0, sizeof(r->u.l.data));
320 r = nasm_malloc (BRANCHSIZ);
321 memset (r->u.b.data, 0, sizeof(r->u.b.data));
323 r->stepsize = RAA_BLKSIZE;
325 r->stepsize *= RAA_LAYERSIZE;
330 struct RAA *raa_init (void) {
331 return real_raa_init (0);
334 void raa_free (struct RAA *r) {
339 for (p = r->u.b.data; p - r->u.b.data < RAA_LAYERSIZE; p++)
345 long raa_read (struct RAA *r, long posn) {
346 if (posn > r->stepsize * LAYERSIZ(r))
348 while (r->layers > 0) {
350 l = ldiv (posn, r->stepsize);
351 r = r->u.b.data[l.quot];
353 if (!r) /* better check this */
356 return r->u.l.data[posn];
359 struct RAA *raa_write (struct RAA *r, long posn, long value) {
363 nasm_malloc_error (ERR_PANIC, "negative position in raa_write");
365 while (r->stepsize * LAYERSIZ(r) < posn) {
367 * Must go up a layer.
371 s = nasm_malloc (BRANCHSIZ);
372 memset (s->u.b.data, 0, sizeof(r->u.b.data));
373 s->layers = r->layers + 1;
374 s->stepsize = RAA_LAYERSIZE * r->stepsize;
381 while (r->layers > 0) {
384 l = ldiv (posn, r->stepsize);
385 s = &r->u.b.data[l.quot];
387 *s = real_raa_init (r->layers - 1);
392 r->u.l.data[posn] = value;
397 #define SAA_MAXLEN 8192
401 * members `end' and `elem_len' are only valid in first link in
402 * list; `rptr' and `rpos' are used for reading
404 struct SAA *next, *end, *rptr;
405 long elem_len, length, posn, start, rpos;
409 struct SAA *saa_init (long elem_len) {
412 if (elem_len > SAA_MAXLEN)
413 nasm_malloc_error (ERR_PANIC | ERR_NOFILE, "SAA with huge elements");
415 s = nasm_malloc (sizeof(struct SAA));
416 s->posn = s->start = 0L;
417 s->elem_len = elem_len;
418 s->length = SAA_MAXLEN - (SAA_MAXLEN % elem_len);
419 s->data = nasm_malloc (s->length);
426 void saa_free (struct SAA *s) {
437 void *saa_wstruct (struct SAA *s) {
440 if (s->end->length - s->end->posn < s->elem_len) {
441 s->end->next = nasm_malloc (sizeof(struct SAA));
442 s->end->next->start = s->end->start + s->end->posn;
443 s->end = s->end->next;
444 s->end->length = s->length;
447 s->end->data = nasm_malloc (s->length);
450 p = s->end->data + s->end->posn;
451 s->end->posn += s->elem_len;
455 void saa_wbytes (struct SAA *s, void *data, long len) {
459 long l = s->end->length - s->end->posn;
464 memcpy (s->end->data + s->end->posn, d, l);
467 memset (s->end->data + s->end->posn, 0, l);
472 s->end->next = nasm_malloc (sizeof(struct SAA));
473 s->end->next->start = s->end->start + s->end->posn;
474 s->end = s->end->next;
475 s->end->length = s->length;
478 s->end->data = nasm_malloc (s->length);
483 void saa_rewind (struct SAA *s) {
488 void *saa_rstruct (struct SAA *s) {
494 if (s->rptr->posn - s->rpos < s->elem_len) {
495 s->rptr = s->rptr->next;
497 return NULL; /* end of array */
501 p = s->rptr->data + s->rpos;
502 s->rpos += s->elem_len;
506 void *saa_rbytes (struct SAA *s, long *len) {
512 p = s->rptr->data + s->rpos;
513 *len = s->rptr->posn - s->rpos;
514 s->rptr = s->rptr->next;
519 void saa_rnbytes (struct SAA *s, void *data, long len) {
528 l = s->rptr->posn - s->rpos;
532 memcpy (d, s->rptr->data + s->rpos, l);
538 s->rptr = s->rptr->next;
544 void saa_fread (struct SAA *s, long posn, void *data, long len) {
549 if (!s->rptr || posn > s->rptr->start + s->rpos)
551 while (posn >= s->rptr->start + s->rptr->posn) {
552 s->rptr = s->rptr->next;
554 return; /* what else can we do?! */
558 pos = posn - s->rptr->start;
560 long l = s->rptr->posn - pos;
563 memcpy (cdata, s->rptr->data+pos, l);
573 void saa_fwrite (struct SAA *s, long posn, void *data, long len) {
578 if (!s->rptr || posn > s->rptr->start + s->rpos)
580 while (posn >= s->rptr->start + s->rptr->posn) {
581 s->rptr = s->rptr->next;
583 return; /* what else can we do?! */
587 pos = posn - s->rptr->start;
589 long l = s->rptr->posn - pos;
592 memcpy (s->rptr->data+pos, cdata, l);
602 void saa_fpwrite (struct SAA *s, FILE *fp) {
607 while ( (data = saa_rbytes (s, &len)) )
608 fwrite (data, 1, len, fp);
612 * Register, instruction, condition-code and prefix keywords used
616 static char *special_names[] = {
617 "byte", "dword", "far", "long", "near", "nosplit", "qword",
618 "short", "to", "tword", "word"
620 static char *prefix_names[] = {
621 "a16", "a32", "lock", "o16", "o32", "rep", "repe", "repne",
622 "repnz", "repz", "times"
627 * Standard scanner routine used by parser.c and some output
628 * formats. It keeps a succession of temporary-storage strings in
629 * stdscan_tempstorage, which can be cleared using stdscan_reset.
631 static char **stdscan_tempstorage = NULL;
632 static int stdscan_tempsize = 0, stdscan_templen = 0;
633 #define STDSCAN_TEMP_DELTA 256
635 static void stdscan_pop(void) {
636 nasm_free (stdscan_tempstorage[--stdscan_templen]);
639 void stdscan_reset(void) {
640 while (stdscan_templen > 0)
644 static char *stdscan_copy(char *p, int len) {
647 text = nasm_malloc(len+1);
648 strncpy (text, p, len);
651 if (stdscan_templen >= stdscan_tempsize) {
652 stdscan_tempsize += STDSCAN_TEMP_DELTA;
653 stdscan_tempstorage = nasm_realloc(stdscan_tempstorage,
654 stdscan_tempsize*sizeof(char *));
656 stdscan_tempstorage[stdscan_templen++] = text;
661 char *stdscan_bufptr = NULL;
662 int stdscan (void *private_data, struct tokenval *tv) {
663 char ourcopy[256], *r, *s;
665 while (isspace(*stdscan_bufptr)) stdscan_bufptr++;
666 if (!*stdscan_bufptr)
667 return tv->t_type = 0;
669 /* we have a token; either an id, a number or a char */
670 if (isidstart(*stdscan_bufptr) ||
671 (*stdscan_bufptr == '$' && isidstart(stdscan_bufptr[1]))) {
672 /* now we've got an identifier */
676 if (*stdscan_bufptr == '$') {
681 r = stdscan_bufptr++;
682 while (isidchar(*stdscan_bufptr)) stdscan_bufptr++;
683 tv->t_charptr = stdscan_copy(r, stdscan_bufptr - r);
685 for (s=tv->t_charptr, r=ourcopy; *s; s++)
689 return tv->t_type = TOKEN_ID;/* bypass all other checks */
690 /* right, so we have an identifier sitting in temp storage. now,
691 * is it actually a register or instruction name, or what? */
692 if ((tv->t_integer=bsi(ourcopy, reg_names,
693 elements(reg_names)))>=0) {
694 tv->t_integer += EXPR_REG_START;
695 return tv->t_type = TOKEN_REG;
696 } else if ((tv->t_integer=bsi(ourcopy, insn_names,
697 elements(insn_names)))>=0) {
698 return tv->t_type = TOKEN_INSN;
700 for (i=0; i<elements(icn); i++)
701 if (!strncmp(ourcopy, icn[i], strlen(icn[i]))) {
702 char *p = ourcopy + strlen(icn[i]);
703 tv->t_integer = ico[i];
704 if ((tv->t_inttwo=bsi(p, conditions,
705 elements(conditions)))>=0)
706 return tv->t_type = TOKEN_INSN;
708 if ((tv->t_integer=bsi(ourcopy, prefix_names,
709 elements(prefix_names)))>=0) {
710 tv->t_integer += PREFIX_ENUM_START;
711 return tv->t_type = TOKEN_PREFIX;
713 if ((tv->t_integer=bsi(ourcopy, special_names,
714 elements(special_names)))>=0)
715 return tv->t_type = TOKEN_SPECIAL;
716 if (!strcmp(ourcopy, "seg"))
717 return tv->t_type = TOKEN_SEG;
718 if (!strcmp(ourcopy, "wrt"))
719 return tv->t_type = TOKEN_WRT;
720 return tv->t_type = TOKEN_ID;
721 } else if (*stdscan_bufptr == '$' && !isnumchar(stdscan_bufptr[1])) {
723 * It's a $ sign with no following hex number; this must
724 * mean it's a Here token ($), evaluating to the current
725 * assembly location, or a Base token ($$), evaluating to
726 * the base of the current segment.
729 if (*stdscan_bufptr == '$') {
731 return tv->t_type = TOKEN_BASE;
733 return tv->t_type = TOKEN_HERE;
734 } else if (isnumstart(*stdscan_bufptr)) { /* now we've got a number */
737 r = stdscan_bufptr++;
738 while (isnumchar(*stdscan_bufptr))
741 if (*stdscan_bufptr == '.') {
743 * a floating point constant
746 while (isnumchar(*stdscan_bufptr)) {
749 tv->t_charptr = stdscan_copy(r, stdscan_bufptr - r);
750 return tv->t_type = TOKEN_FLOAT;
752 r = stdscan_copy(r, stdscan_bufptr - r);
753 tv->t_integer = readnum(r, &rn_error);
756 return tv->t_type = TOKEN_ERRNUM;/* some malformation occurred */
757 tv->t_charptr = NULL;
758 return tv->t_type = TOKEN_NUM;
759 } else if (*stdscan_bufptr == '\'' ||
760 *stdscan_bufptr == '"') {/* a char constant */
761 char quote = *stdscan_bufptr++, *r;
762 r = tv->t_charptr = stdscan_bufptr;
763 while (*stdscan_bufptr && *stdscan_bufptr != quote) stdscan_bufptr++;
764 tv->t_inttwo = stdscan_bufptr - r; /* store full version */
765 if (!*stdscan_bufptr)
766 return tv->t_type = TOKEN_ERRNUM; /* unmatched quotes */
768 r = stdscan_bufptr++; /* skip over final quote */
769 while (quote != *--r) {
770 tv->t_integer = (tv->t_integer<<8) + (unsigned char) *r;
772 return tv->t_type = TOKEN_NUM;
773 } else if (*stdscan_bufptr == ';') { /* a comment has happened - stay */
774 return tv->t_type = 0;
775 } else if (stdscan_bufptr[0] == '>' && stdscan_bufptr[1] == '>') {
777 return tv->t_type = TOKEN_SHR;
778 } else if (stdscan_bufptr[0] == '<' && stdscan_bufptr[1] == '<') {
780 return tv->t_type = TOKEN_SHL;
781 } else if (stdscan_bufptr[0] == '/' && stdscan_bufptr[1] == '/') {
783 return tv->t_type = TOKEN_SDIV;
784 } else if (stdscan_bufptr[0] == '%' && stdscan_bufptr[1] == '%') {
786 return tv->t_type = TOKEN_SMOD;
787 } else if (stdscan_bufptr[0] == '=' && stdscan_bufptr[1] == '=') {
789 return tv->t_type = TOKEN_EQ;
790 } else if (stdscan_bufptr[0] == '<' && stdscan_bufptr[1] == '>') {
792 return tv->t_type = TOKEN_NE;
793 } else if (stdscan_bufptr[0] == '!' && stdscan_bufptr[1] == '=') {
795 return tv->t_type = TOKEN_NE;
796 } else if (stdscan_bufptr[0] == '<' && stdscan_bufptr[1] == '=') {
798 return tv->t_type = TOKEN_LE;
799 } else if (stdscan_bufptr[0] == '>' && stdscan_bufptr[1] == '=') {
801 return tv->t_type = TOKEN_GE;
802 } else if (stdscan_bufptr[0] == '&' && stdscan_bufptr[1] == '&') {
804 return tv->t_type = TOKEN_DBL_AND;
805 } else if (stdscan_bufptr[0] == '^' && stdscan_bufptr[1] == '^') {
807 return tv->t_type = TOKEN_DBL_XOR;
808 } else if (stdscan_bufptr[0] == '|' && stdscan_bufptr[1] == '|') {
810 return tv->t_type = TOKEN_DBL_OR;
811 } else /* just an ordinary char */
812 return tv->t_type = (unsigned char) (*stdscan_bufptr++);
816 * Return TRUE if the argument is a simple scalar. (Or a far-
817 * absolute, which counts.)
819 int is_simple (expr *vect) {
820 while (vect->type && !vect->value)
824 if (vect->type != EXPR_SIMPLE)
828 } while (vect->type && !vect->value);
829 if (vect->type && vect->type < EXPR_SEGBASE+SEG_ABS) return 0;
834 * Return TRUE if the argument is a simple scalar, _NOT_ a far-
837 int is_really_simple (expr *vect) {
838 while (vect->type && !vect->value)
842 if (vect->type != EXPR_SIMPLE)
846 } while (vect->type && !vect->value);
847 if (vect->type) return 0;
852 * Return TRUE if the argument is relocatable (i.e. a simple
853 * scalar, plus at most one segment-base, plus possibly a WRT).
855 int is_reloc (expr *vect) {
856 while (vect->type && !vect->value) /* skip initial value-0 terms */
858 if (!vect->type) /* trivially return TRUE if nothing */
859 return 1; /* is present apart from value-0s */
860 if (vect->type < EXPR_SIMPLE) /* FALSE if a register is present */
862 if (vect->type == EXPR_SIMPLE) { /* skip over a pure number term... */
865 } while (vect->type && !vect->value);
866 if (!vect->type) /* ...returning TRUE if that's all */
869 if (vect->type == EXPR_WRT) { /* skip over a WRT term... */
872 } while (vect->type && !vect->value);
873 if (!vect->type) /* ...returning TRUE if that's all */
876 if (vect->value != 0 && vect->value != 1)
877 return 0; /* segment base multiplier non-unity */
878 do { /* skip over _one_ seg-base term... */
880 } while (vect->type && !vect->value);
881 if (!vect->type) /* ...returning TRUE if that's all */
883 return 0; /* And return FALSE if there's more */
887 * Return TRUE if the argument contains an `unknown' part.
889 int is_unknown(expr *vect) {
890 while (vect->type && vect->type < EXPR_UNKNOWN)
892 return (vect->type == EXPR_UNKNOWN);
896 * Return TRUE if the argument contains nothing but an `unknown'
899 int is_just_unknown(expr *vect) {
900 while (vect->type && !vect->value)
902 return (vect->type == EXPR_UNKNOWN);
906 * Return the scalar part of a relocatable vector. (Including
907 * simple scalar vectors - those qualify as relocatable.)
909 long reloc_value (expr *vect) {
910 while (vect->type && !vect->value)
912 if (!vect->type) return 0;
913 if (vect->type == EXPR_SIMPLE)
920 * Return the segment number of a relocatable vector, or NO_SEG for
923 long reloc_seg (expr *vect) {
924 while (vect->type && (vect->type == EXPR_WRT || !vect->value))
926 if (vect->type == EXPR_SIMPLE) {
929 } while (vect->type && (vect->type == EXPR_WRT || !vect->value));
934 return vect->type - EXPR_SEGBASE;
938 * Return the WRT segment number of a relocatable vector, or NO_SEG
939 * if no WRT part is present.
941 long reloc_wrt (expr *vect) {
942 while (vect->type && vect->type < EXPR_WRT)
944 if (vect->type == EXPR_WRT) {
953 int bsi (char *string, char **array, int size) {
954 int i = -1, j = size; /* always, i < index < j */
957 int l = strcmp(string, array[k]);
958 if (l<0) /* it's in the first half */
960 else if (l>0) /* it's in the second half */
962 else /* we've got it :) */
965 return -1; /* we haven't got it :( */