1 /* -----------------------------------------------------------------------------
2 * See the LICENSE file for information on copyright, usage and redistribution
3 * of SWIG, and the README file for authors - http://www.swig.org/release.html.
7 * This file implements a general purpose C/C++ compatible lexical scanner.
8 * This scanner isn't intended to be plugged directly into a parser built
9 * with yacc. Rather, it contains a lot of generic code that could be used
10 * to easily construct yacc-compatible scanners.
11 * ----------------------------------------------------------------------------- */
13 char cvsroot_scanner_c[] = "$Id: scanner.c 11470 2009-07-29 20:50:39Z wsfulton $";
18 extern String *cparse_file;
19 extern int cparse_start_line;
22 String *text; /* Current token value */
23 List *scanobjs; /* Objects being scanned */
24 String *str; /* Current object being scanned */
25 char *idstart; /* Optional identifier start characters */
26 int nexttoken; /* Next token to be returned */
27 int start_line; /* Starting line of certain declarations */
29 int yylen; /* Length of text pushed into text */
31 String *error; /* Last error message (if any) */
32 int error_line; /* Error line number */
33 int freeze_line; /* Suspend line number updates */
36 /* -----------------------------------------------------------------------------
39 * Create a new scanner object
40 * ----------------------------------------------------------------------------- */
42 Scanner *NewScanner(void) {
44 s = (Scanner *) malloc(sizeof(Scanner));
51 s->scanobjs = NewList();
52 s->text = NewStringEmpty();
59 /* -----------------------------------------------------------------------------
62 * Delete a scanner object.
63 * ----------------------------------------------------------------------------- */
65 void DelScanner(Scanner * s) {
76 /* -----------------------------------------------------------------------------
79 * Clear the contents of a scanner object.
80 * ----------------------------------------------------------------------------- */
82 void Scanner_clear(Scanner * s) {
96 /* -----------------------------------------------------------------------------
99 * Push some new text into the scanner. The scanner will start parsing this text
100 * immediately before returning to the old text.
101 * ----------------------------------------------------------------------------- */
103 void Scanner_push(Scanner * s, String *txt) {
105 Push(s->scanobjs, txt);
107 Setline(s->str,s->line);
112 s->line = Getline(txt);
115 /* -----------------------------------------------------------------------------
116 * Scanner_pushtoken()
118 * Push a token into the scanner. This token will be returned on the next
119 * call to Scanner_token().
120 * ----------------------------------------------------------------------------- */
122 void Scanner_pushtoken(Scanner * s, int nt, const_String_or_char_ptr val) {
124 assert((nt >= 0) && (nt < SWIG_MAXTOKENS));
126 if ( Char(val) != Char(s->text) ) {
132 /* -----------------------------------------------------------------------------
133 * Scanner_set_location()
135 * Set the file and line number location of the scanner.
136 * ----------------------------------------------------------------------------- */
138 void Scanner_set_location(Scanner * s, String *file, int line) {
139 Setline(s->str, line);
140 Setfile(s->str, file);
144 /* -----------------------------------------------------------------------------
147 * Get the current file.
148 * ----------------------------------------------------------------------------- */
150 String *Scanner_file(Scanner * s) {
151 return Getfile(s->str);
154 /* -----------------------------------------------------------------------------
157 * Get the current line number
158 * ----------------------------------------------------------------------------- */
159 int Scanner_line(Scanner * s) {
163 /* -----------------------------------------------------------------------------
164 * Scanner_start_line()
166 * Get the line number on which the current token starts
167 * ----------------------------------------------------------------------------- */
168 int Scanner_start_line(Scanner * s) {
169 return s->start_line;
172 /* -----------------------------------------------------------------------------
175 * Change the set of additional characters that can be used to start an identifier.
176 * ----------------------------------------------------------------------------- */
178 void Scanner_idstart(Scanner * s, const char *id) {
180 s->idstart = Swig_copy_string(id);
183 /* -----------------------------------------------------------------------------
186 * Returns the next character from the scanner or 0 if end of the string.
187 * ----------------------------------------------------------------------------- */
188 static char nextchar(Scanner * s) {
192 while ((nc = Getc(s->str)) == EOF) {
195 Delitem(s->scanobjs, 0);
196 if (Len(s->scanobjs) == 0)
198 s->str = Getitem(s->scanobjs, 0);
200 s->line = Getline(s->str);
204 if ((nc == '\n') && (!s->freeze_line))
210 /* -----------------------------------------------------------------------------
213 * Sets error information on the scanner.
214 * ----------------------------------------------------------------------------- */
216 static void set_error(Scanner *s, int line, const_String_or_char_ptr msg) {
217 s->error_line = line;
218 s->error = NewString(msg);
221 /* -----------------------------------------------------------------------------
225 * Returns error information (if any)
226 * ----------------------------------------------------------------------------- */
229 Scanner_errmsg(Scanner *s) {
234 Scanner_errline(Scanner *s) {
235 return s->error_line;
238 /* -----------------------------------------------------------------------------
239 * Scanner_freeze_line()
241 * Freezes the current line number.
242 * ----------------------------------------------------------------------------- */
245 Scanner_freeze_line(Scanner *s, int val) {
246 s->freeze_line = val;
249 /* -----------------------------------------------------------------------------
252 * Retract n characters
253 * ----------------------------------------------------------------------------- */
254 static void retract(Scanner * s, int n) {
261 for (i = 0; i < n; i++) {
262 if (str[l - 1] == '\n') {
263 if (!s->freeze_line) s->line--;
265 Seek(s->str, -1, SEEK_CUR);
266 Delitem(s->text, DOH_END);
270 /* -----------------------------------------------------------------------------
273 * Get escape sequence. Called when a backslash is found in a string
274 * ----------------------------------------------------------------------------- */
276 static void get_escape(Scanner *s) {
288 Delitem(s->text, DOH_END);
289 Append(s->text,"\n");
293 Delitem(s->text, DOH_END);
294 Append(s->text,"\r");
298 Delitem(s->text, DOH_END);
299 Append(s->text,"\t");
303 Delitem(s->text, DOH_END);
304 Append(s->text,"\a");
308 Delitem(s->text, DOH_END);
309 Append(s->text,"\b");
313 Delitem(s->text, DOH_END);
314 Append(s->text,"\f");
318 Delitem(s->text, DOH_END);
319 Append(s->text,"\\");
323 Delitem(s->text, DOH_END);
324 Append(s->text,"\v");
328 Delitem(s->text, DOH_END);
329 Append(s->text,"\033");
333 Delitem(s->text, DOH_END);
334 Append(s->text,"\'");
338 Delitem(s->text, DOH_END);
339 Append(s->text,"\"");
343 Delitem(s->text, DOH_END);
349 Delitem(s->text, DOH_END);
350 } else if (c == 'x') {
352 Delitem(s->text, DOH_END);
358 Delitem(s->text, DOH_END);
359 Append(s->text, tmp);
366 Putc((char)result,s->text);
369 result = (result << 3) + (c - '0');
370 Delitem(s->text, DOH_END);
375 Putc((char)result, s->text);
379 result = (result << 4) + (c - '0');
381 result = (result << 4) + (10 + tolower(c) - 'a');
382 Delitem(s->text, DOH_END);
389 /* -----------------------------------------------------------------------------
392 * Return the raw value of the next token.
393 * ----------------------------------------------------------------------------- */
395 static int look(Scanner * s) {
401 s->start_line = s->line;
402 Setfile(s->text, Getfile(s->str));
406 if ((c = nextchar(s)) == 0)
409 /* Process delimiters */
412 return SWIG_TOKEN_ENDLINE;
413 } else if (!isspace(c)) {
417 Setline(s->text, s->line);
418 Setfile(s->text, Getfile(s->str));
423 if ((c = nextchar(s)) == 0)
426 state = 4; /* Possibly a SWIG directive */
428 /* Look for possible identifiers */
430 else if ((isalpha(c)) || (c == '_') ||
431 (s->idstart && strchr(s->idstart, c)))
434 /* Look for single character symbols */
437 return SWIG_TOKEN_LPAREN;
439 return SWIG_TOKEN_RPAREN;
441 return SWIG_TOKEN_SEMI;
443 return SWIG_TOKEN_COMMA;
447 return SWIG_TOKEN_RBRACE;
449 return SWIG_TOKEN_LBRACE;
467 return SWIG_TOKEN_NOT;
471 return SWIG_TOKEN_BACKSLASH;
473 return SWIG_TOKEN_LBRACKET;
475 return SWIG_TOKEN_RBRACKET;
477 return SWIG_TOKEN_AT;
481 return SWIG_TOKEN_POUND;
483 return SWIG_TOKEN_QUESTION;
485 /* Look for multi-character sequences */
488 state = 1; /* Comment (maybe) */
489 s->start_line = s->line;
491 else if (c == '\"') {
492 state = 2; /* Possibly a string */
493 s->start_line = s->line;
498 state = 5; /* maybe double colon */
500 state = 83; /* An octal or hex value */
501 else if (c == '\'') {
502 s->start_line = s->line;
504 state = 9; /* A character constant */
505 } else if (c == '`') {
506 s->start_line = s->line;
512 state = 100; /* Maybe a number, maybe just a period */
514 state = 8; /* A numerical value */
516 state = 99; /* An error */
519 case 1: /* Comment block */
520 if ((c = nextchar(s)) == 0)
523 state = 10; /* C++ style comment */
525 Setline(s->text, Getline(s->str));
526 Setfile(s->text, Getfile(s->str));
527 Append(s->text, "//");
528 } else if (c == '*') {
529 state = 11; /* C style comment */
531 Setline(s->text, Getline(s->str));
532 Setfile(s->text, Getfile(s->str));
533 Append(s->text, "/*");
534 } else if (c == '=') {
535 return SWIG_TOKEN_DIVEQUAL;
538 return SWIG_TOKEN_SLASH;
541 case 10: /* C++ style comment */
542 if ((c = nextchar(s)) == 0) {
543 Swig_error(cparse_file, cparse_start_line, "Unterminated comment\n");
544 return SWIG_TOKEN_ERROR;
548 return SWIG_TOKEN_COMMENT;
553 case 11: /* C style comment block */
554 if ((c = nextchar(s)) == 0) {
555 Swig_error(cparse_file, cparse_start_line, "Unterminated comment\n");
556 return SWIG_TOKEN_ERROR;
564 case 12: /* Still in C style comment */
565 if ((c = nextchar(s)) == 0) {
566 Swig_error(cparse_file, cparse_start_line, "Unterminated comment\n");
567 return SWIG_TOKEN_ERROR;
571 } else if (c == '/') {
572 return SWIG_TOKEN_COMMENT;
578 case 2: /* Processing a string */
579 if ((c = nextchar(s)) == 0) {
580 Swig_error(cparse_file, cparse_start_line, "Unterminated string\n");
581 return SWIG_TOKEN_ERROR;
584 Delitem(s->text, DOH_END);
585 return SWIG_TOKEN_STRING;
586 } else if (c == '\\') {
587 Delitem(s->text, DOH_END);
593 case 3: /* Maybe a not equals */
594 if ((c = nextchar(s)) == 0)
595 return SWIG_TOKEN_LNOT;
597 return SWIG_TOKEN_NOTEQUAL;
600 return SWIG_TOKEN_LNOT;
604 case 31: /* AND or Logical AND or ANDEQUAL */
605 if ((c = nextchar(s)) == 0)
606 return SWIG_TOKEN_AND;
608 return SWIG_TOKEN_LAND;
610 return SWIG_TOKEN_ANDEQUAL;
613 return SWIG_TOKEN_AND;
617 case 32: /* OR or Logical OR */
618 if ((c = nextchar(s)) == 0)
619 return SWIG_TOKEN_OR;
621 return SWIG_TOKEN_LOR;
623 return SWIG_TOKEN_OREQUAL;
626 return SWIG_TOKEN_OR;
630 case 33: /* EQUAL or EQUALTO */
631 if ((c = nextchar(s)) == 0)
632 return SWIG_TOKEN_EQUAL;
634 return SWIG_TOKEN_EQUALTO;
637 return SWIG_TOKEN_EQUAL;
641 case 4: /* A wrapper generator directive (maybe) */
642 if ((c = nextchar(s)) == 0)
643 return SWIG_TOKEN_PERCENT;
645 state = 40; /* Include block */
647 Setline(s->text, Getline(s->str));
648 Setfile(s->text, Getfile(s->str));
649 s->start_line = s->line;
650 } else if (s->idstart && strchr(s->idstart, '%') &&
651 ((isalpha(c)) || (c == '_'))) {
653 } else if (c == '=') {
654 return SWIG_TOKEN_MODEQUAL;
657 return SWIG_TOKEN_PERCENT;
661 case 40: /* Process an include block */
662 if ((c = nextchar(s)) == 0) {
663 Swig_error(cparse_file, cparse_start_line, "Unterminated block\n");
664 return SWIG_TOKEN_ERROR;
669 case 41: /* Still processing include block */
670 if ((c = nextchar(s)) == 0) {
671 set_error(s,s->start_line,"Unterminated code block");
675 Delitem(s->text, DOH_END);
676 Delitem(s->text, DOH_END);
677 Seek(s->text,0,SEEK_SET);
678 return SWIG_TOKEN_CODEBLOCK;
684 case 5: /* Maybe a double colon */
686 if ((c = nextchar(s)) == 0)
687 return SWIG_TOKEN_COLON;
692 return SWIG_TOKEN_COLON;
696 case 50: /* DCOLON, DCOLONSTAR */
697 if ((c = nextchar(s)) == 0)
698 return SWIG_TOKEN_DCOLON;
700 return SWIG_TOKEN_DCOLONSTAR;
703 return SWIG_TOKEN_DCOLON;
707 case 60: /* shift operators */
708 if ((c = nextchar(s)) == 0)
709 return SWIG_TOKEN_LESSTHAN;
713 return SWIG_TOKEN_LTEQUAL;
716 return SWIG_TOKEN_LESSTHAN;
720 if ((c = nextchar(s)) == 0)
721 return SWIG_TOKEN_GREATERTHAN;
725 return SWIG_TOKEN_GTEQUAL;
728 return SWIG_TOKEN_GREATERTHAN;
731 case 7: /* Identifier */
732 if ((c = nextchar(s)) == 0)
733 return SWIG_TOKEN_ID;
734 if (isalnum(c) || (c == '_') || (c == '$')) {
738 return SWIG_TOKEN_ID;
742 case 75: /* Special identifier $ */
743 if ((c = nextchar(s)) == 0)
744 return SWIG_TOKEN_DOLLAR;
745 if (isalnum(c) || (c == '_') || (c == '*') || (c == '&')) {
749 if (Len(s->text) == 1) return SWIG_TOKEN_DOLLAR;
750 return SWIG_TOKEN_ID;
754 case 8: /* A numerical digit */
755 if ((c = nextchar(s)) == 0)
756 return SWIG_TOKEN_INT;
759 } else if ((c == 'e') || (c == 'E')) {
761 } else if ((c == 'f') || (c == 'F')) {
762 Delitem(s->text, DOH_END);
763 return SWIG_TOKEN_FLOAT;
764 } else if (isdigit(c)) {
766 } else if ((c == 'l') || (c == 'L')) {
768 } else if ((c == 'u') || (c == 'U')) {
772 return SWIG_TOKEN_INT;
775 case 81: /* A floating pointer number of some sort */
776 if ((c = nextchar(s)) == 0)
777 return SWIG_TOKEN_DOUBLE;
780 else if ((c == 'e') || (c == 'E'))
782 else if ((c == 'f') || (c == 'F')) {
783 Delitem(s->text, DOH_END);
784 return SWIG_TOKEN_FLOAT;
785 } else if ((c == 'l') || (c == 'L')) {
786 Delitem(s->text, DOH_END);
787 return SWIG_TOKEN_DOUBLE;
790 return (SWIG_TOKEN_DOUBLE);
794 if ((c = nextchar(s)) == 0) {
796 return SWIG_TOKEN_INT;
798 if ((isdigit(c)) || (c == '-') || (c == '+'))
802 return (SWIG_TOKEN_INT);
806 /* Like case 82, but we've seen a decimal point. */
807 if ((c = nextchar(s)) == 0) {
809 return SWIG_TOKEN_DOUBLE;
811 if ((isdigit(c)) || (c == '-') || (c == '+'))
815 return (SWIG_TOKEN_DOUBLE);
819 /* Might be a hexadecimal or octal number */
820 if ((c = nextchar(s)) == 0)
821 return SWIG_TOKEN_INT;
824 else if ((c == 'x') || (c == 'X'))
828 else if ((c == 'l') || (c == 'L')) {
830 } else if ((c == 'u') || (c == 'U')) {
834 return SWIG_TOKEN_INT;
838 /* This is an octal number */
839 if ((c = nextchar(s)) == 0)
840 return SWIG_TOKEN_INT;
843 else if ((c == 'l') || (c == 'L')) {
845 } else if ((c == 'u') || (c == 'U')) {
849 return SWIG_TOKEN_INT;
853 /* This is an hex number */
854 if ((c = nextchar(s)) == 0)
855 return SWIG_TOKEN_INT;
858 else if ((c == 'l') || (c == 'L')) {
860 } else if ((c == 'u') || (c == 'U')) {
864 return SWIG_TOKEN_INT;
869 /* Rest of floating point number */
871 if ((c = nextchar(s)) == 0)
872 return SWIG_TOKEN_DOUBLE;
875 else if ((c == 'f') || (c == 'F')) {
876 Delitem(s->text, DOH_END);
877 return SWIG_TOKEN_FLOAT;
878 } else if ((c == 'l') || (c == 'L')) {
879 Delitem(s->text, DOH_END);
880 return SWIG_TOKEN_DOUBLE;
883 return SWIG_TOKEN_DOUBLE;
888 /* A long integer of some sort */
889 if ((c = nextchar(s)) == 0)
890 return SWIG_TOKEN_LONG;
891 if ((c == 'u') || (c == 'U')) {
892 return SWIG_TOKEN_ULONG;
893 } else if ((c == 'l') || (c == 'L')) {
897 return SWIG_TOKEN_LONG;
901 /* A long long integer */
904 if ((c = nextchar(s)) == 0)
905 return SWIG_TOKEN_LONGLONG;
906 if ((c == 'u') || (c == 'U')) {
907 return SWIG_TOKEN_ULONGLONG;
910 return SWIG_TOKEN_LONGLONG;
913 /* An unsigned number */
916 if ((c = nextchar(s)) == 0)
917 return SWIG_TOKEN_UINT;
918 if ((c == 'l') || (c == 'L')) {
922 return SWIG_TOKEN_UINT;
926 /* Possibly an unsigned long long or unsigned long */
928 if ((c = nextchar(s)) == 0)
929 return SWIG_TOKEN_ULONG;
930 if ((c == 'l') || (c == 'L'))
931 return SWIG_TOKEN_ULONGLONG;
934 return SWIG_TOKEN_ULONG;
937 /* A character constant */
939 if ((c = nextchar(s)) == 0) {
940 Swig_error(cparse_file, cparse_start_line, "Unterminated character constant\n");
941 return SWIG_TOKEN_ERROR;
944 Delitem(s->text, DOH_END);
945 return (SWIG_TOKEN_CHAR);
946 } else if (c == '\\') {
947 Delitem(s->text, DOH_END);
952 /* A period or maybe a floating point number */
955 if ((c = nextchar(s)) == 0)
961 return SWIG_TOKEN_PERIOD;
965 case 200: /* PLUS, PLUSPLUS, PLUSEQUAL */
966 if ((c = nextchar(s)) == 0)
967 return SWIG_TOKEN_PLUS;
969 return SWIG_TOKEN_PLUSPLUS;
971 return SWIG_TOKEN_PLUSEQUAL;
974 return SWIG_TOKEN_PLUS;
978 case 210: /* MINUS, MINUSMINUS, MINUSEQUAL, ARROW */
979 if ((c = nextchar(s)) == 0)
980 return SWIG_TOKEN_MINUS;
982 return SWIG_TOKEN_MINUSMINUS;
984 return SWIG_TOKEN_MINUSEQUAL;
989 return SWIG_TOKEN_MINUS;
993 case 211: /* ARROW, ARROWSTAR */
994 if ((c = nextchar(s)) == 0)
995 return SWIG_TOKEN_ARROW;
997 return SWIG_TOKEN_ARROWSTAR;
1000 return SWIG_TOKEN_ARROW;
1005 case 220: /* STAR, TIMESEQUAL */
1006 if ((c = nextchar(s)) == 0)
1007 return SWIG_TOKEN_STAR;
1009 return SWIG_TOKEN_TIMESEQUAL;
1012 return SWIG_TOKEN_STAR;
1016 case 230: /* XOR, XOREQUAL */
1017 if ((c = nextchar(s)) == 0)
1018 return SWIG_TOKEN_XOR;
1020 return SWIG_TOKEN_XOREQUAL;
1023 return SWIG_TOKEN_XOR;
1027 case 240: /* LSHIFT, LSEQUAL */
1028 if ((c = nextchar(s)) == 0)
1029 return SWIG_TOKEN_LSHIFT;
1031 return SWIG_TOKEN_LSEQUAL;
1034 return SWIG_TOKEN_LSHIFT;
1038 case 250: /* RSHIFT, RSEQUAL */
1039 if ((c = nextchar(s)) == 0)
1040 return SWIG_TOKEN_RSHIFT;
1042 return SWIG_TOKEN_RSEQUAL;
1045 return SWIG_TOKEN_RSHIFT;
1050 /* An illegal character */
1052 /* Reverse string */
1054 if ((c = nextchar(s)) == 0) {
1055 Swig_error(cparse_file, cparse_start_line, "Unterminated character constant\n");
1056 return SWIG_TOKEN_ERROR;
1059 Delitem(s->text, DOH_END);
1060 return (SWIG_TOKEN_RSTRING);
1065 return SWIG_TOKEN_ILLEGAL;
1070 /* -----------------------------------------------------------------------------
1073 * Real entry point to return the next token. Returns 0 if at end of input.
1074 * ----------------------------------------------------------------------------- */
1076 int Scanner_token(Scanner * s) {
1079 if (s->nexttoken >= 0) {
1086 if (!s->start_line) {
1087 Setline(s->text,s->line);
1089 Setline(s->text,s->start_line);
1094 /* -----------------------------------------------------------------------------
1097 * Return the lexene associated with the last returned token.
1098 * ----------------------------------------------------------------------------- */
1100 String *Scanner_text(Scanner * s) {
1104 /* -----------------------------------------------------------------------------
1105 * Scanner_skip_line()
1107 * Skips to the end of a line
1108 * ----------------------------------------------------------------------------- */
1110 void Scanner_skip_line(Scanner * s) {
1114 Setfile(s->text, Getfile(s->str));
1115 Setline(s->text, s->line);
1117 if ((c = nextchar(s)) == 0)
1121 } else if (c == '\n') {
1128 /* -----------------------------------------------------------------------------
1129 * Scanner_skip_balanced()
1131 * Skips a piece of code enclosed in begin/end symbols such as '{...}' or
1132 * (...). Ignores symbols inside comments or strings.
1133 * ----------------------------------------------------------------------------- */
1135 int Scanner_skip_balanced(Scanner * s, int startchar, int endchar) {
1140 char temp[2] = { 0, 0 };
1142 temp[0] = (char) startchar;
1144 Setfile(s->text, Getfile(s->str));
1145 Setline(s->text, s->line);
1147 Append(s->text, temp);
1148 while (num_levels > 0) {
1149 if ((c = nextchar(s)) == 0) {
1156 else if (c == endchar)
1216 /* -----------------------------------------------------------------------------
1217 * Scanner_isoperator()
1219 * Returns 0 or 1 depending on whether or not a token corresponds to a C/C++
1221 * ----------------------------------------------------------------------------- */
1224 Scanner_isoperator(int tokval) {
1225 if (tokval >= 100) return 1;