1 /* YACC parser for D expressions, for GDB.
3 Copyright (C) 2014-2019 Free Software Foundation, Inc.
5 This file is part of GDB.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20 /* This file is derived from c-exp.y, jv-exp.y. */
22 /* Parse a D expression from text in a string,
23 and return the result as a struct expression pointer.
24 That structure contains arithmetic operations in reverse polish,
25 with constants represented by operations that are followed by special data.
26 See expression.h for the details of the format.
27 What is important here is that it can be built up sequentially
28 during the process of parsing; the lower levels of the tree always
29 come first in the result.
31 Note that malloc's and realloc's in this file are transformed to
32 xmalloc and xrealloc respectively by the same sed command in the
33 makefile that remaps any other malloc/realloc inserted by the parser
34 generator. Doing this with #defines and trying to control the interaction
35 with include files (<malloc.h> and <stdlib.h> for example) just became
36 too messy, particularly when such includes can be inserted at random
37 times by the parser generator. */
43 #include "expression.h"
45 #include "parser-defs.h"
49 #include "bfd.h" /* Required by objfiles.h. */
50 #include "symfile.h" /* Required by objfiles.h. */
51 #include "objfiles.h" /* For have_full_symbols and have_partial_symbols */
55 #define parse_type(ps) builtin_type (ps->gdbarch ())
56 #define parse_d_type(ps) builtin_d_type (ps->gdbarch ())
58 /* Remap normal yacc parser interface names (yyparse, yylex, yyerror,
60 #define GDB_YY_REMAP_PREFIX d_
63 /* The state of the parser, used internally when we are parsing the
66 static struct parser_state *pstate = NULL;
70 static int yylex (void);
72 static void yyerror (const char *);
74 static int type_aggregate_p (struct type *);
78 /* Although the yacc "value" of an expression is not used,
79 since the result is stored in the structure being created,
80 other node types do have values. */
94 struct typed_stoken tsval;
100 enum exp_opcode opcode;
101 struct stoken_vector svec;
105 /* YYSTYPE gets defined by %union */
106 static int parse_number (struct parser_state *, const char *,
107 int, int, YYSTYPE *);
110 %token <sval> IDENTIFIER UNKNOWN_NAME
111 %token <tsym> TYPENAME
112 %token <voidval> COMPLETE
114 /* A NAME_OR_INT is a symbol which is not known in the symbol table,
115 but which would parse as a valid number in the current input radix.
116 E.g. "c" when input_radix==16. Depending on the parse, it will be
117 turned into a name or into a number. */
119 %token <sval> NAME_OR_INT
121 %token <typed_val_int> INTEGER_LITERAL
122 %token <typed_val_float> FLOAT_LITERAL
123 %token <tsval> CHARACTER_LITERAL
124 %token <tsval> STRING_LITERAL
126 %type <svec> StringExp
127 %type <tval> BasicType TypeExp
128 %type <sval> IdentifierExp
129 %type <ival> ArrayLiteral
134 /* Keywords that have a constant value. */
135 %token TRUE_KEYWORD FALSE_KEYWORD NULL_KEYWORD
136 /* Class 'super' accessor. */
139 %token CAST_KEYWORD SIZEOF_KEYWORD
140 %token TYPEOF_KEYWORD TYPEID_KEYWORD
142 /* Comparison keywords. */
143 /* Type storage classes. */
144 %token IMMUTABLE_KEYWORD CONST_KEYWORD SHARED_KEYWORD
145 /* Non-scalar type keywords. */
146 %token STRUCT_KEYWORD UNION_KEYWORD
147 %token CLASS_KEYWORD INTERFACE_KEYWORD
148 %token ENUM_KEYWORD TEMPLATE_KEYWORD
149 %token DELEGATE_KEYWORD FUNCTION_KEYWORD
151 %token <sval> DOLLAR_VARIABLE
153 %token <opcode> ASSIGN_MODIFY
156 %right '=' ASSIGN_MODIFY
163 %left EQUAL NOTEQUAL '<' '>' LEQ GEQ
168 %left IDENTITY NOTIDENTITY
169 %right INCREMENT DECREMENT
181 /* Expressions, including the comma operator. */
189 | AssignExpression ',' CommaExpression
190 { write_exp_elt_opcode (pstate, BINOP_COMMA); }
194 ConditionalExpression
195 | ConditionalExpression '=' AssignExpression
196 { write_exp_elt_opcode (pstate, BINOP_ASSIGN); }
197 | ConditionalExpression ASSIGN_MODIFY AssignExpression
198 { write_exp_elt_opcode (pstate, BINOP_ASSIGN_MODIFY);
199 write_exp_elt_opcode (pstate, $2);
200 write_exp_elt_opcode (pstate, BINOP_ASSIGN_MODIFY); }
203 ConditionalExpression:
205 | OrOrExpression '?' Expression ':' ConditionalExpression
206 { write_exp_elt_opcode (pstate, TERNOP_COND); }
211 | OrOrExpression OROR AndAndExpression
212 { write_exp_elt_opcode (pstate, BINOP_LOGICAL_OR); }
217 | AndAndExpression ANDAND OrExpression
218 { write_exp_elt_opcode (pstate, BINOP_LOGICAL_AND); }
223 | OrExpression '|' XorExpression
224 { write_exp_elt_opcode (pstate, BINOP_BITWISE_IOR); }
229 | XorExpression '^' AndExpression
230 { write_exp_elt_opcode (pstate, BINOP_BITWISE_XOR); }
235 | AndExpression '&' CmpExpression
236 { write_exp_elt_opcode (pstate, BINOP_BITWISE_AND); }
247 ShiftExpression EQUAL ShiftExpression
248 { write_exp_elt_opcode (pstate, BINOP_EQUAL); }
249 | ShiftExpression NOTEQUAL ShiftExpression
250 { write_exp_elt_opcode (pstate, BINOP_NOTEQUAL); }
254 ShiftExpression IDENTITY ShiftExpression
255 { write_exp_elt_opcode (pstate, BINOP_EQUAL); }
256 | ShiftExpression NOTIDENTITY ShiftExpression
257 { write_exp_elt_opcode (pstate, BINOP_NOTEQUAL); }
261 ShiftExpression '<' ShiftExpression
262 { write_exp_elt_opcode (pstate, BINOP_LESS); }
263 | ShiftExpression LEQ ShiftExpression
264 { write_exp_elt_opcode (pstate, BINOP_LEQ); }
265 | ShiftExpression '>' ShiftExpression
266 { write_exp_elt_opcode (pstate, BINOP_GTR); }
267 | ShiftExpression GEQ ShiftExpression
268 { write_exp_elt_opcode (pstate, BINOP_GEQ); }
273 | ShiftExpression LSH AddExpression
274 { write_exp_elt_opcode (pstate, BINOP_LSH); }
275 | ShiftExpression RSH AddExpression
276 { write_exp_elt_opcode (pstate, BINOP_RSH); }
281 | AddExpression '+' MulExpression
282 { write_exp_elt_opcode (pstate, BINOP_ADD); }
283 | AddExpression '-' MulExpression
284 { write_exp_elt_opcode (pstate, BINOP_SUB); }
285 | AddExpression '~' MulExpression
286 { write_exp_elt_opcode (pstate, BINOP_CONCAT); }
291 | MulExpression '*' UnaryExpression
292 { write_exp_elt_opcode (pstate, BINOP_MUL); }
293 | MulExpression '/' UnaryExpression
294 { write_exp_elt_opcode (pstate, BINOP_DIV); }
295 | MulExpression '%' UnaryExpression
296 { write_exp_elt_opcode (pstate, BINOP_REM); }
300 { write_exp_elt_opcode (pstate, UNOP_ADDR); }
301 | INCREMENT UnaryExpression
302 { write_exp_elt_opcode (pstate, UNOP_PREINCREMENT); }
303 | DECREMENT UnaryExpression
304 { write_exp_elt_opcode (pstate, UNOP_PREDECREMENT); }
305 | '*' UnaryExpression
306 { write_exp_elt_opcode (pstate, UNOP_IND); }
307 | '-' UnaryExpression
308 { write_exp_elt_opcode (pstate, UNOP_NEG); }
309 | '+' UnaryExpression
310 { write_exp_elt_opcode (pstate, UNOP_PLUS); }
311 | '!' UnaryExpression
312 { write_exp_elt_opcode (pstate, UNOP_LOGICAL_NOT); }
313 | '~' UnaryExpression
314 { write_exp_elt_opcode (pstate, UNOP_COMPLEMENT); }
315 | TypeExp '.' SIZEOF_KEYWORD
316 { write_exp_elt_opcode (pstate, UNOP_SIZEOF); }
322 CAST_KEYWORD '(' TypeExp ')' UnaryExpression
323 { write_exp_elt_opcode (pstate, UNOP_CAST_TYPE); }
324 /* C style cast is illegal D, but is still recognised in
325 the grammar, so we keep this around for convenience. */
326 | '(' TypeExp ')' UnaryExpression
327 { write_exp_elt_opcode (pstate, UNOP_CAST_TYPE); }
333 | PostfixExpression HATHAT UnaryExpression
334 { write_exp_elt_opcode (pstate, BINOP_EXP); }
339 | PostfixExpression '.' COMPLETE
341 mark_struct_expression (pstate);
342 write_exp_elt_opcode (pstate, STRUCTOP_STRUCT);
345 write_exp_string (pstate, s);
346 write_exp_elt_opcode (pstate, STRUCTOP_STRUCT); }
347 | PostfixExpression '.' IDENTIFIER
348 { write_exp_elt_opcode (pstate, STRUCTOP_STRUCT);
349 write_exp_string (pstate, $3);
350 write_exp_elt_opcode (pstate, STRUCTOP_STRUCT); }
351 | PostfixExpression '.' IDENTIFIER COMPLETE
352 { mark_struct_expression (pstate);
353 write_exp_elt_opcode (pstate, STRUCTOP_STRUCT);
354 write_exp_string (pstate, $3);
355 write_exp_elt_opcode (pstate, STRUCTOP_STRUCT); }
356 | PostfixExpression '.' SIZEOF_KEYWORD
357 { write_exp_elt_opcode (pstate, UNOP_SIZEOF); }
358 | PostfixExpression INCREMENT
359 { write_exp_elt_opcode (pstate, UNOP_POSTINCREMENT); }
360 | PostfixExpression DECREMENT
361 { write_exp_elt_opcode (pstate, UNOP_POSTDECREMENT); }
370 | ArgumentList ',' AssignExpression
381 PostfixExpression '('
382 { start_arglist (); }
384 { write_exp_elt_opcode (pstate, OP_FUNCALL);
385 write_exp_elt_longcst (pstate, (LONGEST) end_arglist ());
386 write_exp_elt_opcode (pstate, OP_FUNCALL); }
390 PostfixExpression '[' ArgumentList ']'
391 { if (arglist_len > 0)
393 write_exp_elt_opcode (pstate, MULTI_SUBSCRIPT);
394 write_exp_elt_longcst (pstate, (LONGEST) arglist_len);
395 write_exp_elt_opcode (pstate, MULTI_SUBSCRIPT);
398 write_exp_elt_opcode (pstate, BINOP_SUBSCRIPT);
403 PostfixExpression '[' ']'
404 { /* Do nothing. */ }
405 | PostfixExpression '[' AssignExpression DOTDOT AssignExpression ']'
406 { write_exp_elt_opcode (pstate, TERNOP_SLICE); }
411 { /* Do nothing. */ }
413 { struct bound_minimal_symbol msymbol;
414 char *copy = copy_name ($1);
415 struct field_of_this_result is_a_field_of_this;
416 struct block_symbol sym;
418 /* Handle VAR, which could be local or global. */
419 sym = lookup_symbol (copy, pstate->expression_context_block,
420 VAR_DOMAIN, &is_a_field_of_this);
421 if (sym.symbol && SYMBOL_CLASS (sym.symbol) != LOC_TYPEDEF)
423 if (symbol_read_needs_frame (sym.symbol))
424 innermost_block.update (sym);
425 write_exp_elt_opcode (pstate, OP_VAR_VALUE);
426 write_exp_elt_block (pstate, sym.block);
427 write_exp_elt_sym (pstate, sym.symbol);
428 write_exp_elt_opcode (pstate, OP_VAR_VALUE);
430 else if (is_a_field_of_this.type != NULL)
432 /* It hangs off of `this'. Must not inadvertently convert from a
433 method call to data ref. */
434 innermost_block.update (sym);
435 write_exp_elt_opcode (pstate, OP_THIS);
436 write_exp_elt_opcode (pstate, OP_THIS);
437 write_exp_elt_opcode (pstate, STRUCTOP_PTR);
438 write_exp_string (pstate, $1);
439 write_exp_elt_opcode (pstate, STRUCTOP_PTR);
443 /* Lookup foreign name in global static symbols. */
444 msymbol = lookup_bound_minimal_symbol (copy);
445 if (msymbol.minsym != NULL)
446 write_exp_msymbol (pstate, msymbol);
447 else if (!have_full_symbols () && !have_partial_symbols ())
448 error (_("No symbol table is loaded. Use the \"file\" command"));
450 error (_("No symbol \"%s\" in current context."), copy);
453 | TypeExp '.' IdentifierExp
454 { struct type *type = check_typedef ($1);
456 /* Check if the qualified name is in the global
457 context. However if the symbol has not already
458 been resolved, it's not likely to be found. */
459 if (TYPE_CODE (type) == TYPE_CODE_MODULE)
461 struct bound_minimal_symbol msymbol;
462 struct block_symbol sym;
463 const char *type_name = TYPE_SAFE_NAME (type);
464 int type_name_len = strlen (type_name);
466 = string_printf ("%.*s.%.*s",
467 type_name_len, type_name,
471 lookup_symbol (name.c_str (),
472 (const struct block *) NULL,
476 write_exp_elt_opcode (pstate, OP_VAR_VALUE);
477 write_exp_elt_block (pstate, sym.block);
478 write_exp_elt_sym (pstate, sym.symbol);
479 write_exp_elt_opcode (pstate, OP_VAR_VALUE);
483 msymbol = lookup_bound_minimal_symbol (name.c_str ());
484 if (msymbol.minsym != NULL)
485 write_exp_msymbol (pstate, msymbol);
486 else if (!have_full_symbols () && !have_partial_symbols ())
487 error (_("No symbol table is loaded. Use the \"file\" command."));
489 error (_("No symbol \"%s\" in current context."),
493 /* Check if the qualified name resolves as a member
494 of an aggregate or an enum type. */
495 if (!type_aggregate_p (type))
496 error (_("`%s' is not defined as an aggregate type."),
497 TYPE_SAFE_NAME (type));
499 write_exp_elt_opcode (pstate, OP_SCOPE);
500 write_exp_elt_type (pstate, type);
501 write_exp_string (pstate, $3);
502 write_exp_elt_opcode (pstate, OP_SCOPE);
505 { write_dollar_variable (pstate, $1); }
508 parse_number (pstate, $1.ptr, $1.length, 0, &val);
509 write_exp_elt_opcode (pstate, OP_LONG);
510 write_exp_elt_type (pstate, val.typed_val_int.type);
511 write_exp_elt_longcst (pstate,
512 (LONGEST) val.typed_val_int.val);
513 write_exp_elt_opcode (pstate, OP_LONG); }
515 { struct type *type = parse_d_type (pstate)->builtin_void;
516 type = lookup_pointer_type (type);
517 write_exp_elt_opcode (pstate, OP_LONG);
518 write_exp_elt_type (pstate, type);
519 write_exp_elt_longcst (pstate, (LONGEST) 0);
520 write_exp_elt_opcode (pstate, OP_LONG); }
522 { write_exp_elt_opcode (pstate, OP_BOOL);
523 write_exp_elt_longcst (pstate, (LONGEST) 1);
524 write_exp_elt_opcode (pstate, OP_BOOL); }
526 { write_exp_elt_opcode (pstate, OP_BOOL);
527 write_exp_elt_longcst (pstate, (LONGEST) 0);
528 write_exp_elt_opcode (pstate, OP_BOOL); }
530 { write_exp_elt_opcode (pstate, OP_LONG);
531 write_exp_elt_type (pstate, $1.type);
532 write_exp_elt_longcst (pstate, (LONGEST)($1.val));
533 write_exp_elt_opcode (pstate, OP_LONG); }
535 { write_exp_elt_opcode (pstate, OP_FLOAT);
536 write_exp_elt_type (pstate, $1.type);
537 write_exp_elt_floatcst (pstate, $1.val);
538 write_exp_elt_opcode (pstate, OP_FLOAT); }
540 { struct stoken_vector vec;
543 write_exp_string_vector (pstate, $1.type, &vec); }
546 write_exp_string_vector (pstate, 0, &$1);
547 for (i = 0; i < $1.len; ++i)
548 free ($1.tokens[i].ptr);
551 { write_exp_elt_opcode (pstate, OP_ARRAY);
552 write_exp_elt_longcst (pstate, (LONGEST) 0);
553 write_exp_elt_longcst (pstate, (LONGEST) $1 - 1);
554 write_exp_elt_opcode (pstate, OP_ARRAY); }
555 | TYPEOF_KEYWORD '(' Expression ')'
556 { write_exp_elt_opcode (pstate, OP_TYPEOF); }
560 '[' ArgumentList_opt ']'
561 { $$ = arglist_len; }
570 { /* We copy the string here, and not in the
571 lexer, to guarantee that we do not leak a
572 string. Note that we follow the
573 NUL-termination convention of the
575 struct typed_stoken *vec = XNEW (struct typed_stoken);
580 vec->length = $1.length;
581 vec->ptr = (char *) malloc ($1.length + 1);
582 memcpy (vec->ptr, $1.ptr, $1.length + 1);
584 | StringExp STRING_LITERAL
585 { /* Note that we NUL-terminate here, but just
590 = XRESIZEVEC (struct typed_stoken, $$.tokens, $$.len);
592 p = (char *) malloc ($2.length + 1);
593 memcpy (p, $2.ptr, $2.length + 1);
595 $$.tokens[$$.len - 1].type = $2.type;
596 $$.tokens[$$.len - 1].length = $2.length;
597 $$.tokens[$$.len - 1].ptr = p;
603 { /* Do nothing. */ }
605 { write_exp_elt_opcode (pstate, OP_TYPE);
606 write_exp_elt_type (pstate, $1);
607 write_exp_elt_opcode (pstate, OP_TYPE); }
608 | BasicType BasicType2
609 { $$ = follow_types ($1);
610 write_exp_elt_opcode (pstate, OP_TYPE);
611 write_exp_elt_type (pstate, $$);
612 write_exp_elt_opcode (pstate, OP_TYPE);
618 { push_type (tp_pointer); }
620 { push_type (tp_pointer); }
621 | '[' INTEGER_LITERAL ']'
622 { push_type_int ($2.val);
623 push_type (tp_array); }
624 | '[' INTEGER_LITERAL ']' BasicType2
625 { push_type_int ($2.val);
626 push_type (tp_array); }
636 /* Return true if the type is aggregate-like. */
639 type_aggregate_p (struct type *type)
641 return (TYPE_CODE (type) == TYPE_CODE_STRUCT
642 || TYPE_CODE (type) == TYPE_CODE_UNION
643 || TYPE_CODE (type) == TYPE_CODE_MODULE
644 || (TYPE_CODE (type) == TYPE_CODE_ENUM
645 && TYPE_DECLARED_CLASS (type)));
648 /* Take care of parsing a number (anything that starts with a digit).
649 Set yylval and return the token type; update lexptr.
650 LEN is the number of characters in it. */
652 /*** Needs some error checking for the float case ***/
655 parse_number (struct parser_state *ps, const char *p,
656 int len, int parsed_float, YYSTYPE *putithere)
664 int base = input_radix;
668 /* We have found a "L" or "U" suffix. */
669 int found_suffix = 0;
672 struct type *signed_type;
673 struct type *unsigned_type;
679 /* Strip out all embedded '_' before passing to parse_float. */
680 s = (char *) alloca (len + 1);
691 /* Check suffix for `i' , `fi' or `li' (idouble, ifloat or ireal). */
692 if (len >= 1 && tolower (s[len - 1]) == 'i')
694 if (len >= 2 && tolower (s[len - 2]) == 'f')
696 putithere->typed_val_float.type
697 = parse_d_type (ps)->builtin_ifloat;
700 else if (len >= 2 && tolower (s[len - 2]) == 'l')
702 putithere->typed_val_float.type
703 = parse_d_type (ps)->builtin_ireal;
708 putithere->typed_val_float.type
709 = parse_d_type (ps)->builtin_idouble;
713 /* Check suffix for `f' or `l'' (float or real). */
714 else if (len >= 1 && tolower (s[len - 1]) == 'f')
716 putithere->typed_val_float.type
717 = parse_d_type (ps)->builtin_float;
720 else if (len >= 1 && tolower (s[len - 1]) == 'l')
722 putithere->typed_val_float.type
723 = parse_d_type (ps)->builtin_real;
726 /* Default type if no suffix. */
729 putithere->typed_val_float.type
730 = parse_d_type (ps)->builtin_double;
733 if (!parse_float (s, len,
734 putithere->typed_val_float.type,
735 putithere->typed_val_float.val))
738 return FLOAT_LITERAL;
741 /* Handle base-switching prefixes 0x, 0b, 0 */
774 continue; /* Ignore embedded '_'. */
775 if (c >= 'A' && c <= 'Z')
777 if (c != 'l' && c != 'u')
779 if (c >= '0' && c <= '9')
787 if (base > 10 && c >= 'a' && c <= 'f')
791 n += i = c - 'a' + 10;
793 else if (c == 'l' && long_p == 0)
798 else if (c == 'u' && unsigned_p == 0)
804 return ERROR; /* Char not a digit */
807 return ERROR; /* Invalid digit in this base. */
808 /* Portably test for integer overflow. */
809 if (c != 'l' && c != 'u')
811 ULONGEST n2 = prevn * base;
812 if ((n2 / base != prevn) || (n2 + i < prevn))
813 error (_("Numeric constant too large."));
818 /* An integer constant is an int or a long. An L suffix forces it to
819 be long, and a U suffix forces it to be unsigned. To figure out
820 whether it fits, we shift it right and see whether anything remains.
821 Note that we can't shift sizeof (LONGEST) * HOST_CHAR_BIT bits or
822 more in one operation, because many compilers will warn about such a
823 shift (which always produces a zero result). To deal with the case
824 where it is we just always shift the value more than once, with fewer
826 un = (ULONGEST) n >> 2;
827 if (long_p == 0 && (un >> 30) == 0)
829 high_bit = ((ULONGEST) 1) << 31;
830 signed_type = parse_d_type (ps)->builtin_int;
831 /* For decimal notation, keep the sign of the worked out type. */
832 if (base == 10 && !unsigned_p)
833 unsigned_type = parse_d_type (ps)->builtin_long;
835 unsigned_type = parse_d_type (ps)->builtin_uint;
840 if (sizeof (ULONGEST) * HOST_CHAR_BIT < 64)
841 /* A long long does not fit in a LONGEST. */
842 shift = (sizeof (ULONGEST) * HOST_CHAR_BIT - 1);
845 high_bit = (ULONGEST) 1 << shift;
846 signed_type = parse_d_type (ps)->builtin_long;
847 unsigned_type = parse_d_type (ps)->builtin_ulong;
850 putithere->typed_val_int.val = n;
852 /* If the high bit of the worked out type is set then this number
853 has to be unsigned_type. */
854 if (unsigned_p || (n & high_bit))
855 putithere->typed_val_int.type = unsigned_type;
857 putithere->typed_val_int.type = signed_type;
859 return INTEGER_LITERAL;
862 /* Temporary obstack used for holding strings. */
863 static struct obstack tempbuf;
864 static int tempbuf_init;
866 /* Parse a string or character literal from TOKPTR. The string or
867 character may be wide or unicode. *OUTPTR is set to just after the
868 end of the literal in the input string. The resulting token is
869 stored in VALUE. This returns a token value, either STRING or
870 CHAR, depending on what was parsed. *HOST_CHARS is set to the
871 number of host characters in the literal. */
874 parse_string_or_char (const char *tokptr, const char **outptr,
875 struct typed_stoken *value, int *host_chars)
879 /* Build the gdb internal form of the input string in tempbuf. Note
880 that the buffer is null byte terminated *only* for the
881 convenience of debugging gdb itself and printing the buffer
882 contents when the buffer contains no embedded nulls. Gdb does
883 not depend upon the buffer being null byte terminated, it uses
884 the length string instead. This allows gdb to handle C strings
885 (as well as strings in other languages) with embedded null
891 obstack_free (&tempbuf, NULL);
892 obstack_init (&tempbuf);
894 /* Skip the quote. */
906 *host_chars += c_parse_escape (&tokptr, &tempbuf);
912 obstack_1grow (&tempbuf, c);
914 /* FIXME: this does the wrong thing with multi-byte host
915 characters. We could use mbrlen here, but that would
916 make "set host-charset" a bit less useful. */
921 if (*tokptr != quote)
923 if (quote == '"' || quote == '`')
924 error (_("Unterminated string in expression."));
926 error (_("Unmatched single quote."));
930 /* FIXME: should instead use own language string_type enum
931 and handle D-specific string suffixes here. */
933 value->type = C_CHAR;
935 value->type = C_STRING;
937 value->ptr = (char *) obstack_base (&tempbuf);
938 value->length = obstack_object_size (&tempbuf);
942 return quote == '\'' ? CHARACTER_LITERAL : STRING_LITERAL;
949 enum exp_opcode opcode;
952 static const struct token tokentab3[] =
954 {"^^=", ASSIGN_MODIFY, BINOP_EXP},
955 {"<<=", ASSIGN_MODIFY, BINOP_LSH},
956 {">>=", ASSIGN_MODIFY, BINOP_RSH},
959 static const struct token tokentab2[] =
961 {"+=", ASSIGN_MODIFY, BINOP_ADD},
962 {"-=", ASSIGN_MODIFY, BINOP_SUB},
963 {"*=", ASSIGN_MODIFY, BINOP_MUL},
964 {"/=", ASSIGN_MODIFY, BINOP_DIV},
965 {"%=", ASSIGN_MODIFY, BINOP_REM},
966 {"|=", ASSIGN_MODIFY, BINOP_BITWISE_IOR},
967 {"&=", ASSIGN_MODIFY, BINOP_BITWISE_AND},
968 {"^=", ASSIGN_MODIFY, BINOP_BITWISE_XOR},
969 {"++", INCREMENT, BINOP_END},
970 {"--", DECREMENT, BINOP_END},
971 {"&&", ANDAND, BINOP_END},
972 {"||", OROR, BINOP_END},
973 {"^^", HATHAT, BINOP_END},
974 {"<<", LSH, BINOP_END},
975 {">>", RSH, BINOP_END},
976 {"==", EQUAL, BINOP_END},
977 {"!=", NOTEQUAL, BINOP_END},
978 {"<=", LEQ, BINOP_END},
979 {">=", GEQ, BINOP_END},
980 {"..", DOTDOT, BINOP_END},
983 /* Identifier-like tokens. */
984 static const struct token ident_tokens[] =
986 {"is", IDENTITY, BINOP_END},
987 {"!is", NOTIDENTITY, BINOP_END},
989 {"cast", CAST_KEYWORD, OP_NULL},
990 {"const", CONST_KEYWORD, OP_NULL},
991 {"immutable", IMMUTABLE_KEYWORD, OP_NULL},
992 {"shared", SHARED_KEYWORD, OP_NULL},
993 {"super", SUPER_KEYWORD, OP_NULL},
995 {"null", NULL_KEYWORD, OP_NULL},
996 {"true", TRUE_KEYWORD, OP_NULL},
997 {"false", FALSE_KEYWORD, OP_NULL},
999 {"init", INIT_KEYWORD, OP_NULL},
1000 {"sizeof", SIZEOF_KEYWORD, OP_NULL},
1001 {"typeof", TYPEOF_KEYWORD, OP_NULL},
1002 {"typeid", TYPEID_KEYWORD, OP_NULL},
1004 {"delegate", DELEGATE_KEYWORD, OP_NULL},
1005 {"function", FUNCTION_KEYWORD, OP_NULL},
1006 {"struct", STRUCT_KEYWORD, OP_NULL},
1007 {"union", UNION_KEYWORD, OP_NULL},
1008 {"class", CLASS_KEYWORD, OP_NULL},
1009 {"interface", INTERFACE_KEYWORD, OP_NULL},
1010 {"enum", ENUM_KEYWORD, OP_NULL},
1011 {"template", TEMPLATE_KEYWORD, OP_NULL},
1014 /* This is set if a NAME token appeared at the very end of the input
1015 string, with no whitespace separating the name from the EOF. This
1016 is used only when parsing to do field name completion. */
1017 static int saw_name_at_eof;
1019 /* This is set if the previously-returned token was a structure operator.
1020 This is used only when parsing to do field name completion. */
1021 static int last_was_structop;
1023 /* Depth of parentheses. */
1024 static int paren_depth;
1026 /* Read one token, getting characters through lexptr. */
1029 lex_one_token (struct parser_state *par_state)
1034 const char *tokstart;
1035 int saw_structop = last_was_structop;
1038 last_was_structop = 0;
1042 prev_lexptr = lexptr;
1045 /* See if it is a special token of length 3. */
1046 for (i = 0; i < sizeof tokentab3 / sizeof tokentab3[0]; i++)
1047 if (strncmp (tokstart, tokentab3[i].oper, 3) == 0)
1050 yylval.opcode = tokentab3[i].opcode;
1051 return tokentab3[i].token;
1054 /* See if it is a special token of length 2. */
1055 for (i = 0; i < sizeof tokentab2 / sizeof tokentab2[0]; i++)
1056 if (strncmp (tokstart, tokentab2[i].oper, 2) == 0)
1059 yylval.opcode = tokentab2[i].opcode;
1060 return tokentab2[i].token;
1063 switch (c = *tokstart)
1066 /* If we're parsing for field name completion, and the previous
1067 token allows such completion, return a COMPLETE token.
1068 Otherwise, we were already scanning the original text, and
1069 we're really done. */
1070 if (saw_name_at_eof)
1072 saw_name_at_eof = 0;
1075 else if (saw_structop)
1094 if (paren_depth == 0)
1101 if (pstate->comma_terminates && paren_depth == 0)
1107 /* Might be a floating point number. */
1108 if (lexptr[1] < '0' || lexptr[1] > '9')
1110 if (parse_completion)
1111 last_was_structop = 1;
1112 goto symbol; /* Nope, must be a symbol. */
1127 /* It's a number. */
1128 int got_dot = 0, got_e = 0, toktype;
1129 const char *p = tokstart;
1130 int hex = input_radix > 10;
1132 if (c == '0' && (p[1] == 'x' || p[1] == 'X'))
1140 /* Hex exponents start with 'p', because 'e' is a valid hex
1141 digit and thus does not indicate a floating point number
1142 when the radix is hex. */
1143 if ((!hex && !got_e && tolower (p[0]) == 'e')
1144 || (hex && !got_e && tolower (p[0] == 'p')))
1145 got_dot = got_e = 1;
1146 /* A '.' always indicates a decimal floating point number
1147 regardless of the radix. If we have a '..' then its the
1148 end of the number and the beginning of a slice. */
1149 else if (!got_dot && (p[0] == '.' && p[1] != '.'))
1151 /* This is the sign of the exponent, not the end of the number. */
1152 else if (got_e && (tolower (p[-1]) == 'e' || tolower (p[-1]) == 'p')
1153 && (*p == '-' || *p == '+'))
1155 /* We will take any letters or digits, ignoring any embedded '_'.
1156 parse_number will complain if past the radix, or if L or U are
1158 else if ((*p < '0' || *p > '9') && (*p != '_')
1159 && ((*p < 'a' || *p > 'z') && (*p < 'A' || *p > 'Z')))
1163 toktype = parse_number (par_state, tokstart, p - tokstart,
1164 got_dot|got_e, &yylval);
1165 if (toktype == ERROR)
1167 char *err_copy = (char *) alloca (p - tokstart + 1);
1169 memcpy (err_copy, tokstart, p - tokstart);
1170 err_copy[p - tokstart] = 0;
1171 error (_("Invalid number \"%s\"."), err_copy);
1179 const char *p = &tokstart[1];
1180 size_t len = strlen ("entry");
1182 while (isspace (*p))
1184 if (strncmp (p, "entry", len) == 0 && !isalnum (p[len])
1218 int result = parse_string_or_char (tokstart, &lexptr, &yylval.tsval,
1220 if (result == CHARACTER_LITERAL)
1223 error (_("Empty character constant."));
1224 else if (host_len > 2 && c == '\'')
1227 namelen = lexptr - tokstart - 1;
1230 else if (host_len > 1)
1231 error (_("Invalid character constant."));
1237 if (!(c == '_' || c == '$'
1238 || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
1239 /* We must have come across a bad character (e.g. ';'). */
1240 error (_("Invalid character '%c' in expression"), c);
1242 /* It's a name. See how long it is. */
1244 for (c = tokstart[namelen];
1245 (c == '_' || c == '$' || (c >= '0' && c <= '9')
1246 || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'));)
1247 c = tokstart[++namelen];
1249 /* The token "if" terminates the expression and is NOT
1250 removed from the input stream. */
1251 if (namelen == 2 && tokstart[0] == 'i' && tokstart[1] == 'f')
1254 /* For the same reason (breakpoint conditions), "thread N"
1255 terminates the expression. "thread" could be an identifier, but
1256 an identifier is never followed by a number without intervening
1257 punctuation. "task" is similar. Handle abbreviations of these,
1258 similarly to breakpoint.c:find_condition_and_thread. */
1260 && (strncmp (tokstart, "thread", namelen) == 0
1261 || strncmp (tokstart, "task", namelen) == 0)
1262 && (tokstart[namelen] == ' ' || tokstart[namelen] == '\t'))
1264 const char *p = tokstart + namelen + 1;
1266 while (*p == ' ' || *p == '\t')
1268 if (*p >= '0' && *p <= '9')
1276 yylval.sval.ptr = tokstart;
1277 yylval.sval.length = namelen;
1279 /* Catch specific keywords. */
1280 copy = copy_name (yylval.sval);
1281 for (i = 0; i < sizeof ident_tokens / sizeof ident_tokens[0]; i++)
1282 if (strcmp (copy, ident_tokens[i].oper) == 0)
1284 /* It is ok to always set this, even though we don't always
1285 strictly need to. */
1286 yylval.opcode = ident_tokens[i].opcode;
1287 return ident_tokens[i].token;
1290 if (*tokstart == '$')
1291 return DOLLAR_VARIABLE;
1294 = language_lookup_primitive_type (par_state->language (),
1295 par_state->gdbarch (), copy);
1296 if (yylval.tsym.type != NULL)
1299 /* Input names that aren't symbols but ARE valid hex numbers,
1300 when the input radix permits them, can be names or numbers
1301 depending on the parse. Note we support radixes > 16 here. */
1302 if ((tokstart[0] >= 'a' && tokstart[0] < 'a' + input_radix - 10)
1303 || (tokstart[0] >= 'A' && tokstart[0] < 'A' + input_radix - 10))
1305 YYSTYPE newlval; /* Its value is ignored. */
1306 int hextype = parse_number (par_state, tokstart, namelen, 0, &newlval);
1307 if (hextype == INTEGER_LITERAL)
1311 if (parse_completion && *lexptr == '\0')
1312 saw_name_at_eof = 1;
1317 /* An object of this type is pushed on a FIFO by the "outer" lexer. */
1318 struct token_and_value
1325 /* A FIFO of tokens that have been read but not yet returned to the
1327 static std::vector<token_and_value> token_fifo;
1329 /* Non-zero if the lexer should return tokens from the FIFO. */
1332 /* Temporary storage for yylex; this holds symbol names as they are
1334 static auto_obstack name_obstack;
1336 /* Classify an IDENTIFIER token. The contents of the token are in `yylval'.
1337 Updates yylval and returns the new token type. BLOCK is the block
1338 in which lookups start; this can be NULL to mean the global scope. */
1341 classify_name (struct parser_state *par_state, const struct block *block)
1343 struct block_symbol sym;
1345 struct field_of_this_result is_a_field_of_this;
1347 copy = copy_name (yylval.sval);
1349 sym = lookup_symbol (copy, block, VAR_DOMAIN, &is_a_field_of_this);
1350 if (sym.symbol && SYMBOL_CLASS (sym.symbol) == LOC_TYPEDEF)
1352 yylval.tsym.type = SYMBOL_TYPE (sym.symbol);
1355 else if (sym.symbol == NULL)
1357 /* Look-up first for a module name, then a type. */
1358 sym = lookup_symbol (copy, block, MODULE_DOMAIN, NULL);
1359 if (sym.symbol == NULL)
1360 sym = lookup_symbol (copy, block, STRUCT_DOMAIN, NULL);
1362 if (sym.symbol != NULL)
1364 yylval.tsym.type = SYMBOL_TYPE (sym.symbol);
1368 return UNKNOWN_NAME;
1374 /* Like classify_name, but used by the inner loop of the lexer, when a
1375 name might have already been seen. CONTEXT is the context type, or
1376 NULL if this is the first component of a name. */
1379 classify_inner_name (struct parser_state *par_state,
1380 const struct block *block, struct type *context)
1385 if (context == NULL)
1386 return classify_name (par_state, block);
1388 type = check_typedef (context);
1389 if (!type_aggregate_p (type))
1392 copy = copy_name (yylval.ssym.stoken);
1393 yylval.ssym.sym = d_lookup_nested_symbol (type, copy, block);
1395 if (yylval.ssym.sym.symbol == NULL)
1398 if (SYMBOL_CLASS (yylval.ssym.sym.symbol) == LOC_TYPEDEF)
1400 yylval.tsym.type = SYMBOL_TYPE (yylval.ssym.sym.symbol);
1407 /* The outer level of a two-level lexer. This calls the inner lexer
1408 to return tokens. It then either returns these tokens, or
1409 aggregates them into a larger token. This lets us work around a
1410 problem in our parsing approach, where the parser could not
1411 distinguish between qualified names and qualified types at the
1417 token_and_value current;
1419 struct type *context_type = NULL;
1420 int last_to_examine, next_to_examine, checkpoint;
1421 const struct block *search_block;
1423 if (popping && !token_fifo.empty ())
1427 /* Read the first token and decide what to do. */
1428 current.token = lex_one_token (pstate);
1429 if (current.token != IDENTIFIER && current.token != '.')
1430 return current.token;
1432 /* Read any sequence of alternating "." and identifier tokens into
1434 current.value = yylval;
1435 token_fifo.push_back (current);
1436 last_was_dot = current.token == '.';
1440 current.token = lex_one_token (pstate);
1441 current.value = yylval;
1442 token_fifo.push_back (current);
1444 if ((last_was_dot && current.token != IDENTIFIER)
1445 || (!last_was_dot && current.token != '.'))
1448 last_was_dot = !last_was_dot;
1452 /* We always read one extra token, so compute the number of tokens
1453 to examine accordingly. */
1454 last_to_examine = token_fifo.size () - 2;
1455 next_to_examine = 0;
1457 current = token_fifo[next_to_examine];
1460 /* If we are not dealing with a typename, now is the time to find out. */
1461 if (current.token == IDENTIFIER)
1463 yylval = current.value;
1464 current.token = classify_name (pstate, pstate->expression_context_block);
1465 current.value = yylval;
1468 /* If the IDENTIFIER is not known, it could be a package symbol,
1469 first try building up a name until we find the qualified module. */
1470 if (current.token == UNKNOWN_NAME)
1472 name_obstack.clear ();
1473 obstack_grow (&name_obstack, current.value.sval.ptr,
1474 current.value.sval.length);
1478 while (next_to_examine <= last_to_examine)
1480 token_and_value next;
1482 next = token_fifo[next_to_examine];
1485 if (next.token == IDENTIFIER && last_was_dot)
1487 /* Update the partial name we are constructing. */
1488 obstack_grow_str (&name_obstack, ".");
1489 obstack_grow (&name_obstack, next.value.sval.ptr,
1490 next.value.sval.length);
1492 yylval.sval.ptr = (char *) obstack_base (&name_obstack);
1493 yylval.sval.length = obstack_object_size (&name_obstack);
1495 current.token = classify_name (pstate,
1496 pstate->expression_context_block);
1497 current.value = yylval;
1499 /* We keep going until we find a TYPENAME. */
1500 if (current.token == TYPENAME)
1502 /* Install it as the first token in the FIFO. */
1503 token_fifo[0] = current;
1504 token_fifo.erase (token_fifo.begin () + 1,
1505 token_fifo.begin () + next_to_examine);
1509 else if (next.token == '.' && !last_was_dot)
1513 /* We've reached the end of the name. */
1518 /* Reset our current token back to the start, if we found nothing
1519 this means that we will just jump to do pop. */
1520 current = token_fifo[0];
1521 next_to_examine = 1;
1523 if (current.token != TYPENAME && current.token != '.')
1526 name_obstack.clear ();
1528 if (current.token == '.')
1529 search_block = NULL;
1532 gdb_assert (current.token == TYPENAME);
1533 search_block = pstate->expression_context_block;
1534 obstack_grow (&name_obstack, current.value.sval.ptr,
1535 current.value.sval.length);
1536 context_type = current.value.tsym.type;
1540 last_was_dot = current.token == '.';
1542 while (next_to_examine <= last_to_examine)
1544 token_and_value next;
1546 next = token_fifo[next_to_examine];
1549 if (next.token == IDENTIFIER && last_was_dot)
1553 yylval = next.value;
1554 classification = classify_inner_name (pstate, search_block,
1556 /* We keep going until we either run out of names, or until
1557 we have a qualified name which is not a type. */
1558 if (classification != TYPENAME && classification != IDENTIFIER)
1561 /* Accept up to this token. */
1562 checkpoint = next_to_examine;
1564 /* Update the partial name we are constructing. */
1565 if (context_type != NULL)
1567 /* We don't want to put a leading "." into the name. */
1568 obstack_grow_str (&name_obstack, ".");
1570 obstack_grow (&name_obstack, next.value.sval.ptr,
1571 next.value.sval.length);
1573 yylval.sval.ptr = (char *) obstack_base (&name_obstack);
1574 yylval.sval.length = obstack_object_size (&name_obstack);
1575 current.value = yylval;
1576 current.token = classification;
1580 if (classification == IDENTIFIER)
1583 context_type = yylval.tsym.type;
1585 else if (next.token == '.' && !last_was_dot)
1589 /* We've reached the end of the name. */
1594 /* If we have a replacement token, install it as the first token in
1595 the FIFO, and delete the other constituent tokens. */
1598 token_fifo[0] = current;
1600 token_fifo.erase (token_fifo.begin () + 1,
1601 token_fifo.begin () + checkpoint);
1605 current = token_fifo[0];
1606 token_fifo.erase (token_fifo.begin ());
1607 yylval = current.value;
1608 return current.token;
1612 d_parse (struct parser_state *par_state)
1614 /* Setting up the parser state. */
1615 scoped_restore pstate_restore = make_scoped_restore (&pstate);
1616 gdb_assert (par_state != NULL);
1619 scoped_restore restore_yydebug = make_scoped_restore (&yydebug,
1622 /* Initialize some state used by the lexer. */
1623 last_was_structop = 0;
1624 saw_name_at_eof = 0;
1627 token_fifo.clear ();
1629 name_obstack.clear ();
1635 yyerror (const char *msg)
1638 lexptr = prev_lexptr;
1640 error (_("A %s in expression, near `%s'."), msg, lexptr);