1 /* Parse C expressions for CCCP.
2 Copyright (C) 1987, 1992, 1994, 1995 Free Software Foundation.
4 This program is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 2, or (at your option) any
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
18 In other words, you are welcome to use, share and improve this program.
19 You are forbidden to forbid anyone else to use, share and improve
20 what you give them. Help stamp out software-hoarding!
22 Adapted from expread.y of GDB by Paul Rubin, July 1986. */
24 /* Parse a C expression from text in a string */
29 /* #define YYDEBUG 1 */
31 #ifdef MULTIBYTE_CHARS
38 typedef unsigned char U_CHAR;
40 /* This is used for communicating lists of keywords with cccp.c. */
48 /* Define a generic NULL if one hasn't already been defined. */
55 #if defined (USE_PROTOTYPES) ? USE_PROTOTYPES : defined (__STDC__)
56 #define GENERIC_PTR void *
58 #define GENERIC_PTR char *
62 /* Find the largest host integer type and set its size and type. */
64 #ifndef HOST_BITS_PER_WIDE_INT
66 #if HOST_BITS_PER_LONG > HOST_BITS_PER_INT
67 #define HOST_BITS_PER_WIDE_INT HOST_BITS_PER_LONG
68 #define HOST_WIDE_INT long
70 #define HOST_BITS_PER_WIDE_INT HOST_BITS_PER_INT
71 #define HOST_WIDE_INT int
77 #define NULL_PTR ((GENERIC_PTR)0)
82 HOST_WIDE_INT expression_value;
84 static jmp_buf parse_return_error;
86 /* Nonzero means count most punctuation as part of a name. */
87 static int keyword_parsing = 0;
89 /* Nonzero means do not evaluate this expression.
90 This is a count, since unevaluated expressions can nest. */
91 static int skip_evaluation;
93 /* some external tables of character types */
94 extern unsigned char is_idstart[], is_idchar[], is_hor_space[];
96 extern char *xmalloc ();
98 /* Flag for -pedantic. */
101 /* Flag for -traditional. */
102 extern int traditional;
104 #ifndef CHAR_TYPE_SIZE
105 #define CHAR_TYPE_SIZE BITS_PER_UNIT
108 #ifndef INT_TYPE_SIZE
109 #define INT_TYPE_SIZE BITS_PER_WORD
112 #ifndef LONG_TYPE_SIZE
113 #define LONG_TYPE_SIZE BITS_PER_WORD
116 #ifndef WCHAR_TYPE_SIZE
117 #define WCHAR_TYPE_SIZE INT_TYPE_SIZE
120 #ifndef MAX_CHAR_TYPE_SIZE
121 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
124 #ifndef MAX_INT_TYPE_SIZE
125 #define MAX_INT_TYPE_SIZE INT_TYPE_SIZE
128 #ifndef MAX_LONG_TYPE_SIZE
129 #define MAX_LONG_TYPE_SIZE LONG_TYPE_SIZE
132 #ifndef MAX_WCHAR_TYPE_SIZE
133 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
136 /* Yield nonzero if adding two numbers with A's and B's signs can yield a
137 number with SUM's sign, where A, B, and SUM are all C integers. */
138 #define possible_sum_sign(a, b, sum) ((((a) ^ (b)) | ~ ((a) ^ (sum))) < 0)
140 static void integer_overflow ();
141 static long left_shift ();
142 static long right_shift ();
146 struct constant {long value; int unsignedp;} integer;
147 struct name {U_CHAR *address; int length;} name;
148 struct arglist *keywords;
151 %type <integer> exp exp1 start
152 %type <keywords> keywords
153 %token <integer> INT CHAR
155 %token <integer> ERROR
165 %left '<' '>' LEQ GEQ
176 { expression_value = $1.value; }
179 /* Expressions, including the comma operator. */
183 pedwarn ("comma operator in operand of `#if'");
187 /* Expressions, not including the comma operator. */
188 exp : '-' exp %prec UNARY
189 { $$.value = - $2.value;
190 if (($$.value & $2.value) < 0 && ! $2.unsignedp)
192 $$.unsignedp = $2.unsignedp; }
193 | '!' exp %prec UNARY
194 { $$.value = ! $2.value;
196 | '+' exp %prec UNARY
198 | '~' exp %prec UNARY
199 { $$.value = ~ $2.value;
200 $$.unsignedp = $2.unsignedp; }
202 { $$.value = check_assertion ($2.address, $2.length,
206 { keyword_parsing = 1; }
208 { $$.value = check_assertion ($2.address, $2.length,
216 /* Binary operators in order of decreasing precedence. */
218 { $$.unsignedp = $1.unsignedp || $3.unsignedp;
220 $$.value = (unsigned long) $1.value * $3.value;
223 $$.value = $1.value * $3.value;
225 && ($$.value / $1.value != $3.value
226 || ($$.value & $1.value & $3.value) < 0))
232 if (!skip_evaluation)
233 error ("division by zero in #if");
236 $$.unsignedp = $1.unsignedp || $3.unsignedp;
238 $$.value = (unsigned long) $1.value / $3.value;
241 $$.value = $1.value / $3.value;
242 if (($$.value & $1.value & $3.value) < 0)
248 if (!skip_evaluation)
249 error ("division by zero in #if");
252 $$.unsignedp = $1.unsignedp || $3.unsignedp;
254 $$.value = (unsigned long) $1.value % $3.value;
256 $$.value = $1.value % $3.value; }
258 { $$.value = $1.value + $3.value;
259 $$.unsignedp = $1.unsignedp || $3.unsignedp;
261 && ! possible_sum_sign ($1.value, $3.value,
263 integer_overflow (); }
265 { $$.value = $1.value - $3.value;
266 $$.unsignedp = $1.unsignedp || $3.unsignedp;
268 && ! possible_sum_sign ($$.value, $3.value,
270 integer_overflow (); }
272 { $$.unsignedp = $1.unsignedp;
273 if ($3.value < 0 && ! $3.unsignedp)
274 $$.value = right_shift (&$1, -$3.value);
276 $$.value = left_shift (&$1, $3.value); }
278 { $$.unsignedp = $1.unsignedp;
279 if ($3.value < 0 && ! $3.unsignedp)
280 $$.value = left_shift (&$1, -$3.value);
282 $$.value = right_shift (&$1, $3.value); }
284 { $$.value = ($1.value == $3.value);
287 { $$.value = ($1.value != $3.value);
291 if ($1.unsignedp || $3.unsignedp)
292 $$.value = (unsigned long) $1.value <= $3.value;
294 $$.value = $1.value <= $3.value; }
297 if ($1.unsignedp || $3.unsignedp)
298 $$.value = (unsigned long) $1.value >= $3.value;
300 $$.value = $1.value >= $3.value; }
303 if ($1.unsignedp || $3.unsignedp)
304 $$.value = (unsigned long) $1.value < $3.value;
306 $$.value = $1.value < $3.value; }
309 if ($1.unsignedp || $3.unsignedp)
310 $$.value = (unsigned long) $1.value > $3.value;
312 $$.value = $1.value > $3.value; }
314 { $$.value = $1.value & $3.value;
315 $$.unsignedp = $1.unsignedp || $3.unsignedp; }
317 { $$.value = $1.value ^ $3.value;
318 $$.unsignedp = $1.unsignedp || $3.unsignedp; }
320 { $$.value = $1.value | $3.value;
321 $$.unsignedp = $1.unsignedp || $3.unsignedp; }
323 { skip_evaluation += !$1.value; }
325 { skip_evaluation -= !$1.value;
326 $$.value = ($1.value && $4.value);
329 { skip_evaluation += !!$1.value; }
331 { skip_evaluation -= !!$1.value;
332 $$.value = ($1.value || $4.value);
335 { skip_evaluation += !$1.value; }
337 { skip_evaluation += !!$1.value - !$1.value; }
339 { skip_evaluation -= !!$1.value;
340 $$.value = $1.value ? $4.value : $7.value;
341 $$.unsignedp = $4.unsignedp || $7.unsignedp; }
343 { $$ = yylval.integer; }
345 { $$ = yylval.integer; }
353 | '(' keywords ')' keywords
354 { struct arglist *temp;
355 $$ = (struct arglist *) xmalloc (sizeof (struct arglist));
357 $$->name = (U_CHAR *) "(";
360 while (temp != 0 && temp->next != 0)
362 temp->next = (struct arglist *) xmalloc (sizeof (struct arglist));
363 temp->next->next = $4;
364 temp->next->name = (U_CHAR *) ")";
365 temp->next->length = 1; }
367 { $$ = (struct arglist *) xmalloc (sizeof (struct arglist));
368 $$->name = $1.address;
369 $$->length = $1.length;
374 /* During parsing of a C expression, the pointer to the next character
375 is in this variable. */
379 /* Take care of parsing a number (anything that starts with a digit).
380 Set yylval and return the token type; update lexptr.
381 LEN is the number of characters in it. */
383 /* maybe needs to actually deal with floating point numbers */
389 register char *p = lexptr;
391 register unsigned long n = 0, nd, ULONG_MAX_over_base;
392 register int base = 10;
393 register int len = olen;
394 register int overflow = 0;
395 register int digit, largest_digit = 0;
398 for (c = 0; c < len; c++)
400 /* It's a float since it contains a point. */
401 yyerror ("floating point numbers not allowed in #if expressions");
405 yylval.integer.unsignedp = 0;
407 if (len >= 3 && (!strncmp (p, "0x", 2) || !strncmp (p, "0X", 2))) {
415 ULONG_MAX_over_base = (unsigned long) -1 / base;
417 for (; len > 0; len--) {
420 if (c >= '0' && c <= '9')
422 else if (base == 16 && c >= 'a' && c <= 'f')
423 digit = c - 'a' + 10;
424 else if (base == 16 && c >= 'A' && c <= 'F')
425 digit = c - 'A' + 10;
427 /* `l' means long, and `u' means unsigned. */
429 if (c == 'l' || c == 'L')
432 yyerror ("two `l's in integer constant");
435 else if (c == 'u' || c == 'U')
437 if (yylval.integer.unsignedp)
438 yyerror ("two `u's in integer constant");
439 yylval.integer.unsignedp = 1;
448 /* Don't look for any more digits after the suffixes. */
451 if (largest_digit < digit)
452 largest_digit = digit;
453 nd = n * base + digit;
454 overflow |= ULONG_MAX_over_base < n | nd < n;
459 yyerror ("Invalid number in #if expression");
463 if (base <= largest_digit)
464 warning ("integer constant contains digits beyond the radix");
467 warning ("integer constant out of range");
469 /* If too big to be signed, consider it unsigned. */
470 if ((long) n < 0 && ! yylval.integer.unsignedp)
473 warning ("integer constant is so large that it is unsigned");
474 yylval.integer.unsignedp = 1;
478 yylval.integer.value = n;
487 static struct token tokentab2[] = {
501 /* Read one token, getting characters through lexptr. */
507 register int namelen;
508 register unsigned char *tokstart;
509 register struct token *toktab;
514 tokstart = (unsigned char *) lexptr;
516 /* See if it is a special token of length 2. */
517 if (! keyword_parsing)
518 for (toktab = tokentab2; toktab->operator != NULL; toktab++)
519 if (c == *toktab->operator && tokstart[1] == toktab->operator[1]) {
521 if (toktab->token == ERROR)
523 char *buf = (char *) alloca (40);
524 sprintf (buf, "`%s' not allowed in operand of `#if'", toktab->operator);
527 return toktab->token;
542 /* Capital L may start a wide-string or wide-character constant. */
543 if (lexptr[1] == '\'')
549 if (lexptr[1] == '"')
553 goto string_constant;
561 if (keyword_parsing) {
562 char *start_ptr = lexptr - 1;
566 c = parse_escape (&lexptr);
570 yylval.name.address = tokstart;
571 yylval.name.length = lexptr - start_ptr;
575 /* This code for reading a character constant
576 handles multicharacter constants and wide characters.
577 It is mostly copied from c-lex.c. */
579 register int result = 0;
580 register num_chars = 0;
581 unsigned width = MAX_CHAR_TYPE_SIZE;
587 width = MAX_WCHAR_TYPE_SIZE;
588 #ifdef MULTIBYTE_CHARS
589 max_chars = MB_CUR_MAX;
595 max_chars = MAX_LONG_TYPE_SIZE / width;
597 token_buffer = (char *) alloca (max_chars + 1);
603 if (c == '\'' || c == EOF)
608 c = parse_escape (&lexptr);
609 if (width < HOST_BITS_PER_INT
610 && (unsigned) c >= (1 << width))
611 pedwarn ("escape sequence out of range for character");
616 /* Merge character into result; ignore excess chars. */
617 if (num_chars < max_chars + 1)
619 if (width < HOST_BITS_PER_INT)
620 result = (result << width) | (c & ((1 << width) - 1));
623 token_buffer[num_chars - 1] = c;
627 token_buffer[num_chars] = 0;
630 error ("malformatted character constant");
631 else if (num_chars == 0)
632 error ("empty character constant");
633 else if (num_chars > max_chars)
635 num_chars = max_chars;
636 error ("character constant too long");
638 else if (num_chars != 1 && ! traditional)
639 warning ("multi-character character constant");
641 /* If char type is signed, sign-extend the constant. */
644 int num_bits = num_chars * width;
646 if (lookup ("__CHAR_UNSIGNED__", sizeof ("__CHAR_UNSIGNED__")-1, -1)
647 || ((result >> (num_bits - 1)) & 1) == 0)
649 = result & ((unsigned long) ~0 >> (HOST_BITS_PER_LONG - num_bits));
652 = result | ~((unsigned long) ~0 >> (HOST_BITS_PER_LONG - num_bits));
656 #ifdef MULTIBYTE_CHARS
657 /* Set the initial shift state and convert the next sequence. */
659 /* In all locales L'\0' is zero and mbtowc will return zero,
662 || (num_chars == 1 && token_buffer[0] != '\0'))
665 (void) mbtowc (NULL_PTR, NULL_PTR, 0);
666 if (mbtowc (& wc, token_buffer, num_chars) == num_chars)
669 warning ("Ignoring invalid multibyte character");
672 yylval.integer.value = result;
676 /* This is always a signed type. */
677 yylval.integer.unsignedp = 0;
681 /* some of these chars are invalid in constant expressions;
682 maybe do something about them later */
715 if (keyword_parsing) {
716 char *start_ptr = lexptr;
721 c = parse_escape (&lexptr);
725 yylval.name.address = tokstart;
726 yylval.name.length = lexptr - start_ptr;
729 yyerror ("string constants not allowed in #if expressions");
733 if (c >= '0' && c <= '9' && !keyword_parsing) {
736 c = tokstart[namelen], is_idchar[c] || c == '.';
739 return parse_number (namelen);
742 /* It is a name. See how long it is. */
744 if (keyword_parsing) {
745 for (namelen = 0;; namelen++) {
746 if (is_hor_space[tokstart[namelen]])
748 if (tokstart[namelen] == '(' || tokstart[namelen] == ')')
750 if (tokstart[namelen] == '"' || tokstart[namelen] == '\'')
754 if (!is_idstart[c]) {
755 yyerror ("Invalid token in expression");
759 for (namelen = 0; is_idchar[tokstart[namelen]]; namelen++)
764 yylval.name.address = tokstart;
765 yylval.name.length = namelen;
770 /* Parse a C escape sequence. STRING_PTR points to a variable
771 containing a pointer to the string to parse. That pointer
772 is updated past the characters we use. The value of the
773 escape sequence is returned.
775 A negative value means the sequence \ newline was seen,
776 which is supposed to be equivalent to nothing at all.
778 If \ is followed by a null character, we return a negative
779 value and leave the string pointer pointing at the null character.
781 If \ is followed by 000, we return 0 and leave the string pointer
782 after the zeros. A value of 0 does not mean end of string. */
785 parse_escape (string_ptr)
788 register int c = *(*string_ptr)++;
798 pedwarn ("non-ANSI-standard escape sequence, `\\%c'", c);
803 return TARGET_NEWLINE;
825 register int i = c - '0';
826 register int count = 0;
829 c = *(*string_ptr)++;
830 if (c >= '0' && c <= '7')
831 i = (i << 3) + c - '0';
838 if ((i & ~((1 << MAX_CHAR_TYPE_SIZE) - 1)) != 0)
840 i &= (1 << MAX_CHAR_TYPE_SIZE) - 1;
841 warning ("octal character constant does not fit in a byte");
847 register unsigned i = 0, overflow = 0, digits_found = 0, digit;
850 c = *(*string_ptr)++;
851 if (c >= '0' && c <= '9')
853 else if (c >= 'a' && c <= 'f')
854 digit = c - 'a' + 10;
855 else if (c >= 'A' && c <= 'F')
856 digit = c - 'A' + 10;
862 overflow |= i ^ (i << 4 >> 4);
863 i = (i << 4) + digit;
867 yyerror ("\\x used with no following hex digits");
868 if (overflow | (i & ~((1 << BITS_PER_UNIT) - 1)))
870 i &= (1 << BITS_PER_UNIT) - 1;
871 warning ("hex character constant does not fit in a byte");
886 longjmp (parse_return_error, 1);
892 if (!skip_evaluation && pedantic)
893 pedwarn ("integer overflow in preprocessor expression");
901 if (b >= HOST_BITS_PER_LONG)
903 if (! a->unsignedp && a->value != 0)
907 else if (a->unsignedp)
908 return (unsigned long) a->value << b;
911 long l = a->value << b;
912 if (l >> b != a->value)
923 if (b >= HOST_BITS_PER_LONG)
924 return a->unsignedp ? 0 : a->value >> (HOST_BITS_PER_LONG - 1);
925 else if (a->unsignedp)
926 return (unsigned long) a->value >> b;
928 return a->value >> b;
931 /* This page contains the entry point to this file. */
933 /* Parse STRING as an expression, and complain if this fails
934 to use up all of the contents of STRING. */
935 /* We do not support C comments. They should be removed before
936 this function is called. */
939 parse_c_expression (string)
944 if (lexptr == 0 || *lexptr == 0) {
945 error ("empty #if expression");
946 return 0; /* don't include the #if group */
949 /* if there is some sort of scanning error, just return 0 and assume
950 the parsing routine has printed an error message somewhere.
951 there is surely a better thing to do than this. */
952 if (setjmp (parse_return_error))
956 return 0; /* actually this is never reached
957 the way things stand. */
959 error ("Junk after end of expression.");
961 return expression_value; /* set by yyparse () */
964 #ifdef TEST_EXP_READER
967 /* Main program for testing purposes. */
977 initialize_random_junk ();
980 printf ("enter expression: ");
982 while ((buf[n] = getchar ()) != '\n' && buf[n] != EOF)
987 printf ("parser returned %ld\n", parse_c_expression (buf));
993 /* table to tell if char can be part of a C identifier. */
994 unsigned char is_idchar[256];
995 /* table to tell if char can be first char of a c identifier. */
996 unsigned char is_idstart[256];
997 /* table to tell if c is horizontal space. isspace () thinks that
998 newline is space; this is not a good idea for this program. */
999 char is_hor_space[256];
1002 * initialize random junk in the hash table and maybe other places
1004 initialize_random_junk ()
1009 * Set up is_idchar and is_idstart tables. These should be
1010 * faster than saying (is_alpha (c) || c == '_'), etc.
1011 * Must do set up these things before calling any routines tthat
1014 for (i = 'a'; i <= 'z'; i++) {
1015 ++is_idchar[i - 'a' + 'A'];
1017 ++is_idstart[i - 'a' + 'A'];
1020 for (i = '0'; i <= '9'; i++)
1024 #if DOLLARS_IN_IDENTIFIERS
1029 /* horizontal space table */
1030 ++is_hor_space[' '];
1031 ++is_hor_space['\t'];
1036 printf ("error: %s\n", msg);
1041 printf ("warning: %s\n", msg);
1045 lookup (name, len, hash)
1050 return (DEFAULT_SIGNED_CHAR) ? 0 : ((struct hashnode *) -1);