1 /* YACC parser for D expressions, for GDB.
3 Copyright (C) 2014-2019 Free Software Foundation, Inc.
5 This file is part of GDB.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20 /* This file is derived from c-exp.y, jv-exp.y. */
22 /* Parse a D expression from text in a string,
23 and return the result as a struct expression pointer.
24 That structure contains arithmetic operations in reverse polish,
25 with constants represented by operations that are followed by special data.
26 See expression.h for the details of the format.
27 What is important here is that it can be built up sequentially
28 during the process of parsing; the lower levels of the tree always
29 come first in the result.
31 Note that malloc's and realloc's in this file are transformed to
32 xmalloc and xrealloc respectively by the same sed command in the
33 makefile that remaps any other malloc/realloc inserted by the parser
34 generator. Doing this with #defines and trying to control the interaction
35 with include files (<malloc.h> and <stdlib.h> for example) just became
36 too messy, particularly when such includes can be inserted at random
37 times by the parser generator. */
43 #include "expression.h"
45 #include "parser-defs.h"
49 #include "bfd.h" /* Required by objfiles.h. */
50 #include "symfile.h" /* Required by objfiles.h. */
51 #include "objfiles.h" /* For have_full_symbols and have_partial_symbols */
55 #define parse_type(ps) builtin_type (parse_gdbarch (ps))
56 #define parse_d_type(ps) builtin_d_type (parse_gdbarch (ps))
58 /* Remap normal yacc parser interface names (yyparse, yylex, yyerror,
60 #define GDB_YY_REMAP_PREFIX d_
63 /* The state of the parser, used internally when we are parsing the
66 static struct parser_state *pstate = NULL;
70 static int yylex (void);
72 static void yyerror (const char *);
74 static int type_aggregate_p (struct type *);
78 /* Although the yacc "value" of an expression is not used,
79 since the result is stored in the structure being created,
80 other node types do have values. */
94 struct typed_stoken tsval;
101 enum exp_opcode opcode;
102 struct stoken_vector svec;
106 /* YYSTYPE gets defined by %union */
107 static int parse_number (struct parser_state *, const char *,
108 int, int, YYSTYPE *);
111 %token <sval> IDENTIFIER UNKNOWN_NAME
112 %token <tsym> TYPENAME
113 %token <voidval> COMPLETE
115 /* A NAME_OR_INT is a symbol which is not known in the symbol table,
116 but which would parse as a valid number in the current input radix.
117 E.g. "c" when input_radix==16. Depending on the parse, it will be
118 turned into a name or into a number. */
120 %token <sval> NAME_OR_INT
122 %token <typed_val_int> INTEGER_LITERAL
123 %token <typed_val_float> FLOAT_LITERAL
124 %token <tsval> CHARACTER_LITERAL
125 %token <tsval> STRING_LITERAL
127 %type <svec> StringExp
128 %type <tval> BasicType TypeExp
129 %type <sval> IdentifierExp
130 %type <ival> ArrayLiteral
135 /* Keywords that have a constant value. */
136 %token TRUE_KEYWORD FALSE_KEYWORD NULL_KEYWORD
137 /* Class 'super' accessor. */
140 %token CAST_KEYWORD SIZEOF_KEYWORD
141 %token TYPEOF_KEYWORD TYPEID_KEYWORD
143 /* Comparison keywords. */
144 /* Type storage classes. */
145 %token IMMUTABLE_KEYWORD CONST_KEYWORD SHARED_KEYWORD
146 /* Non-scalar type keywords. */
147 %token STRUCT_KEYWORD UNION_KEYWORD
148 %token CLASS_KEYWORD INTERFACE_KEYWORD
149 %token ENUM_KEYWORD TEMPLATE_KEYWORD
150 %token DELEGATE_KEYWORD FUNCTION_KEYWORD
152 %token <sval> DOLLAR_VARIABLE
154 %token <opcode> ASSIGN_MODIFY
157 %right '=' ASSIGN_MODIFY
164 %left EQUAL NOTEQUAL '<' '>' LEQ GEQ
169 %left IDENTITY NOTIDENTITY
170 %right INCREMENT DECREMENT
182 /* Expressions, including the comma operator. */
190 | AssignExpression ',' CommaExpression
191 { write_exp_elt_opcode (pstate, BINOP_COMMA); }
195 ConditionalExpression
196 | ConditionalExpression '=' AssignExpression
197 { write_exp_elt_opcode (pstate, BINOP_ASSIGN); }
198 | ConditionalExpression ASSIGN_MODIFY AssignExpression
199 { write_exp_elt_opcode (pstate, BINOP_ASSIGN_MODIFY);
200 write_exp_elt_opcode (pstate, $2);
201 write_exp_elt_opcode (pstate, BINOP_ASSIGN_MODIFY); }
204 ConditionalExpression:
206 | OrOrExpression '?' Expression ':' ConditionalExpression
207 { write_exp_elt_opcode (pstate, TERNOP_COND); }
212 | OrOrExpression OROR AndAndExpression
213 { write_exp_elt_opcode (pstate, BINOP_LOGICAL_OR); }
218 | AndAndExpression ANDAND OrExpression
219 { write_exp_elt_opcode (pstate, BINOP_LOGICAL_AND); }
224 | OrExpression '|' XorExpression
225 { write_exp_elt_opcode (pstate, BINOP_BITWISE_IOR); }
230 | XorExpression '^' AndExpression
231 { write_exp_elt_opcode (pstate, BINOP_BITWISE_XOR); }
236 | AndExpression '&' CmpExpression
237 { write_exp_elt_opcode (pstate, BINOP_BITWISE_AND); }
248 ShiftExpression EQUAL ShiftExpression
249 { write_exp_elt_opcode (pstate, BINOP_EQUAL); }
250 | ShiftExpression NOTEQUAL ShiftExpression
251 { write_exp_elt_opcode (pstate, BINOP_NOTEQUAL); }
255 ShiftExpression IDENTITY ShiftExpression
256 { write_exp_elt_opcode (pstate, BINOP_EQUAL); }
257 | ShiftExpression NOTIDENTITY ShiftExpression
258 { write_exp_elt_opcode (pstate, BINOP_NOTEQUAL); }
262 ShiftExpression '<' ShiftExpression
263 { write_exp_elt_opcode (pstate, BINOP_LESS); }
264 | ShiftExpression LEQ ShiftExpression
265 { write_exp_elt_opcode (pstate, BINOP_LEQ); }
266 | ShiftExpression '>' ShiftExpression
267 { write_exp_elt_opcode (pstate, BINOP_GTR); }
268 | ShiftExpression GEQ ShiftExpression
269 { write_exp_elt_opcode (pstate, BINOP_GEQ); }
274 | ShiftExpression LSH AddExpression
275 { write_exp_elt_opcode (pstate, BINOP_LSH); }
276 | ShiftExpression RSH AddExpression
277 { write_exp_elt_opcode (pstate, BINOP_RSH); }
282 | AddExpression '+' MulExpression
283 { write_exp_elt_opcode (pstate, BINOP_ADD); }
284 | AddExpression '-' MulExpression
285 { write_exp_elt_opcode (pstate, BINOP_SUB); }
286 | AddExpression '~' MulExpression
287 { write_exp_elt_opcode (pstate, BINOP_CONCAT); }
292 | MulExpression '*' UnaryExpression
293 { write_exp_elt_opcode (pstate, BINOP_MUL); }
294 | MulExpression '/' UnaryExpression
295 { write_exp_elt_opcode (pstate, BINOP_DIV); }
296 | MulExpression '%' UnaryExpression
297 { write_exp_elt_opcode (pstate, BINOP_REM); }
301 { write_exp_elt_opcode (pstate, UNOP_ADDR); }
302 | INCREMENT UnaryExpression
303 { write_exp_elt_opcode (pstate, UNOP_PREINCREMENT); }
304 | DECREMENT UnaryExpression
305 { write_exp_elt_opcode (pstate, UNOP_PREDECREMENT); }
306 | '*' UnaryExpression
307 { write_exp_elt_opcode (pstate, UNOP_IND); }
308 | '-' UnaryExpression
309 { write_exp_elt_opcode (pstate, UNOP_NEG); }
310 | '+' UnaryExpression
311 { write_exp_elt_opcode (pstate, UNOP_PLUS); }
312 | '!' UnaryExpression
313 { write_exp_elt_opcode (pstate, UNOP_LOGICAL_NOT); }
314 | '~' UnaryExpression
315 { write_exp_elt_opcode (pstate, UNOP_COMPLEMENT); }
316 | TypeExp '.' SIZEOF_KEYWORD
317 { write_exp_elt_opcode (pstate, UNOP_SIZEOF); }
323 CAST_KEYWORD '(' TypeExp ')' UnaryExpression
324 { write_exp_elt_opcode (pstate, UNOP_CAST_TYPE); }
325 /* C style cast is illegal D, but is still recognised in
326 the grammar, so we keep this around for convenience. */
327 | '(' TypeExp ')' UnaryExpression
328 { write_exp_elt_opcode (pstate, UNOP_CAST_TYPE); }
334 | PostfixExpression HATHAT UnaryExpression
335 { write_exp_elt_opcode (pstate, BINOP_EXP); }
340 | PostfixExpression '.' COMPLETE
342 mark_struct_expression (pstate);
343 write_exp_elt_opcode (pstate, STRUCTOP_STRUCT);
346 write_exp_string (pstate, s);
347 write_exp_elt_opcode (pstate, STRUCTOP_STRUCT); }
348 | PostfixExpression '.' IDENTIFIER
349 { write_exp_elt_opcode (pstate, STRUCTOP_STRUCT);
350 write_exp_string (pstate, $3);
351 write_exp_elt_opcode (pstate, STRUCTOP_STRUCT); }
352 | PostfixExpression '.' IDENTIFIER COMPLETE
353 { mark_struct_expression (pstate);
354 write_exp_elt_opcode (pstate, STRUCTOP_STRUCT);
355 write_exp_string (pstate, $3);
356 write_exp_elt_opcode (pstate, STRUCTOP_STRUCT); }
357 | PostfixExpression '.' SIZEOF_KEYWORD
358 { write_exp_elt_opcode (pstate, UNOP_SIZEOF); }
359 | PostfixExpression INCREMENT
360 { write_exp_elt_opcode (pstate, UNOP_POSTINCREMENT); }
361 | PostfixExpression DECREMENT
362 { write_exp_elt_opcode (pstate, UNOP_POSTDECREMENT); }
371 | ArgumentList ',' AssignExpression
382 PostfixExpression '('
383 { start_arglist (); }
385 { write_exp_elt_opcode (pstate, OP_FUNCALL);
386 write_exp_elt_longcst (pstate, (LONGEST) end_arglist ());
387 write_exp_elt_opcode (pstate, OP_FUNCALL); }
391 PostfixExpression '[' ArgumentList ']'
392 { if (arglist_len > 0)
394 write_exp_elt_opcode (pstate, MULTI_SUBSCRIPT);
395 write_exp_elt_longcst (pstate, (LONGEST) arglist_len);
396 write_exp_elt_opcode (pstate, MULTI_SUBSCRIPT);
399 write_exp_elt_opcode (pstate, BINOP_SUBSCRIPT);
404 PostfixExpression '[' ']'
405 { /* Do nothing. */ }
406 | PostfixExpression '[' AssignExpression DOTDOT AssignExpression ']'
407 { write_exp_elt_opcode (pstate, TERNOP_SLICE); }
412 { /* Do nothing. */ }
414 { struct bound_minimal_symbol msymbol;
415 char *copy = copy_name ($1);
416 struct field_of_this_result is_a_field_of_this;
417 struct block_symbol sym;
419 /* Handle VAR, which could be local or global. */
420 sym = lookup_symbol (copy, expression_context_block, VAR_DOMAIN,
421 &is_a_field_of_this);
422 if (sym.symbol && SYMBOL_CLASS (sym.symbol) != LOC_TYPEDEF)
424 if (symbol_read_needs_frame (sym.symbol))
425 innermost_block.update (sym);
426 write_exp_elt_opcode (pstate, OP_VAR_VALUE);
427 write_exp_elt_block (pstate, sym.block);
428 write_exp_elt_sym (pstate, sym.symbol);
429 write_exp_elt_opcode (pstate, OP_VAR_VALUE);
431 else if (is_a_field_of_this.type != NULL)
433 /* It hangs off of `this'. Must not inadvertently convert from a
434 method call to data ref. */
435 innermost_block.update (sym);
436 write_exp_elt_opcode (pstate, OP_THIS);
437 write_exp_elt_opcode (pstate, OP_THIS);
438 write_exp_elt_opcode (pstate, STRUCTOP_PTR);
439 write_exp_string (pstate, $1);
440 write_exp_elt_opcode (pstate, STRUCTOP_PTR);
444 /* Lookup foreign name in global static symbols. */
445 msymbol = lookup_bound_minimal_symbol (copy);
446 if (msymbol.minsym != NULL)
447 write_exp_msymbol (pstate, msymbol);
448 else if (!have_full_symbols () && !have_partial_symbols ())
449 error (_("No symbol table is loaded. Use the \"file\" command"));
451 error (_("No symbol \"%s\" in current context."), copy);
454 | TypeExp '.' IdentifierExp
455 { struct type *type = check_typedef ($1);
457 /* Check if the qualified name is in the global
458 context. However if the symbol has not already
459 been resolved, it's not likely to be found. */
460 if (TYPE_CODE (type) == TYPE_CODE_MODULE)
462 struct bound_minimal_symbol msymbol;
463 struct block_symbol sym;
464 const char *type_name = TYPE_SAFE_NAME (type);
465 int type_name_len = strlen (type_name);
467 = string_printf ("%.*s.%.*s",
468 type_name_len, type_name,
472 lookup_symbol (name.c_str (),
473 (const struct block *) NULL,
477 write_exp_elt_opcode (pstate, OP_VAR_VALUE);
478 write_exp_elt_block (pstate, sym.block);
479 write_exp_elt_sym (pstate, sym.symbol);
480 write_exp_elt_opcode (pstate, OP_VAR_VALUE);
484 msymbol = lookup_bound_minimal_symbol (name.c_str ());
485 if (msymbol.minsym != NULL)
486 write_exp_msymbol (pstate, msymbol);
487 else if (!have_full_symbols () && !have_partial_symbols ())
488 error (_("No symbol table is loaded. Use the \"file\" command."));
490 error (_("No symbol \"%s\" in current context."),
494 /* Check if the qualified name resolves as a member
495 of an aggregate or an enum type. */
496 if (!type_aggregate_p (type))
497 error (_("`%s' is not defined as an aggregate type."),
498 TYPE_SAFE_NAME (type));
500 write_exp_elt_opcode (pstate, OP_SCOPE);
501 write_exp_elt_type (pstate, type);
502 write_exp_string (pstate, $3);
503 write_exp_elt_opcode (pstate, OP_SCOPE);
506 { write_dollar_variable (pstate, $1); }
509 parse_number (pstate, $1.ptr, $1.length, 0, &val);
510 write_exp_elt_opcode (pstate, OP_LONG);
511 write_exp_elt_type (pstate, val.typed_val_int.type);
512 write_exp_elt_longcst (pstate,
513 (LONGEST) val.typed_val_int.val);
514 write_exp_elt_opcode (pstate, OP_LONG); }
516 { struct type *type = parse_d_type (pstate)->builtin_void;
517 type = lookup_pointer_type (type);
518 write_exp_elt_opcode (pstate, OP_LONG);
519 write_exp_elt_type (pstate, type);
520 write_exp_elt_longcst (pstate, (LONGEST) 0);
521 write_exp_elt_opcode (pstate, OP_LONG); }
523 { write_exp_elt_opcode (pstate, OP_BOOL);
524 write_exp_elt_longcst (pstate, (LONGEST) 1);
525 write_exp_elt_opcode (pstate, OP_BOOL); }
527 { write_exp_elt_opcode (pstate, OP_BOOL);
528 write_exp_elt_longcst (pstate, (LONGEST) 0);
529 write_exp_elt_opcode (pstate, OP_BOOL); }
531 { write_exp_elt_opcode (pstate, OP_LONG);
532 write_exp_elt_type (pstate, $1.type);
533 write_exp_elt_longcst (pstate, (LONGEST)($1.val));
534 write_exp_elt_opcode (pstate, OP_LONG); }
536 { write_exp_elt_opcode (pstate, OP_FLOAT);
537 write_exp_elt_type (pstate, $1.type);
538 write_exp_elt_floatcst (pstate, $1.val);
539 write_exp_elt_opcode (pstate, OP_FLOAT); }
541 { struct stoken_vector vec;
544 write_exp_string_vector (pstate, $1.type, &vec); }
547 write_exp_string_vector (pstate, 0, &$1);
548 for (i = 0; i < $1.len; ++i)
549 free ($1.tokens[i].ptr);
552 { write_exp_elt_opcode (pstate, OP_ARRAY);
553 write_exp_elt_longcst (pstate, (LONGEST) 0);
554 write_exp_elt_longcst (pstate, (LONGEST) $1 - 1);
555 write_exp_elt_opcode (pstate, OP_ARRAY); }
556 | TYPEOF_KEYWORD '(' Expression ')'
557 { write_exp_elt_opcode (pstate, OP_TYPEOF); }
561 '[' ArgumentList_opt ']'
562 { $$ = arglist_len; }
571 { /* We copy the string here, and not in the
572 lexer, to guarantee that we do not leak a
573 string. Note that we follow the
574 NUL-termination convention of the
576 struct typed_stoken *vec = XNEW (struct typed_stoken);
581 vec->length = $1.length;
582 vec->ptr = (char *) malloc ($1.length + 1);
583 memcpy (vec->ptr, $1.ptr, $1.length + 1);
585 | StringExp STRING_LITERAL
586 { /* Note that we NUL-terminate here, but just
591 = XRESIZEVEC (struct typed_stoken, $$.tokens, $$.len);
593 p = (char *) malloc ($2.length + 1);
594 memcpy (p, $2.ptr, $2.length + 1);
596 $$.tokens[$$.len - 1].type = $2.type;
597 $$.tokens[$$.len - 1].length = $2.length;
598 $$.tokens[$$.len - 1].ptr = p;
604 { /* Do nothing. */ }
606 { write_exp_elt_opcode (pstate, OP_TYPE);
607 write_exp_elt_type (pstate, $1);
608 write_exp_elt_opcode (pstate, OP_TYPE); }
609 | BasicType BasicType2
610 { $$ = follow_types ($1);
611 write_exp_elt_opcode (pstate, OP_TYPE);
612 write_exp_elt_type (pstate, $$);
613 write_exp_elt_opcode (pstate, OP_TYPE);
619 { push_type (tp_pointer); }
621 { push_type (tp_pointer); }
622 | '[' INTEGER_LITERAL ']'
623 { push_type_int ($2.val);
624 push_type (tp_array); }
625 | '[' INTEGER_LITERAL ']' BasicType2
626 { push_type_int ($2.val);
627 push_type (tp_array); }
637 /* Return true if the type is aggregate-like. */
640 type_aggregate_p (struct type *type)
642 return (TYPE_CODE (type) == TYPE_CODE_STRUCT
643 || TYPE_CODE (type) == TYPE_CODE_UNION
644 || TYPE_CODE (type) == TYPE_CODE_MODULE
645 || (TYPE_CODE (type) == TYPE_CODE_ENUM
646 && TYPE_DECLARED_CLASS (type)));
649 /* Take care of parsing a number (anything that starts with a digit).
650 Set yylval and return the token type; update lexptr.
651 LEN is the number of characters in it. */
653 /*** Needs some error checking for the float case ***/
656 parse_number (struct parser_state *ps, const char *p,
657 int len, int parsed_float, YYSTYPE *putithere)
665 int base = input_radix;
669 /* We have found a "L" or "U" suffix. */
670 int found_suffix = 0;
673 struct type *signed_type;
674 struct type *unsigned_type;
680 /* Strip out all embedded '_' before passing to parse_float. */
681 s = (char *) alloca (len + 1);
692 /* Check suffix for `i' , `fi' or `li' (idouble, ifloat or ireal). */
693 if (len >= 1 && tolower (s[len - 1]) == 'i')
695 if (len >= 2 && tolower (s[len - 2]) == 'f')
697 putithere->typed_val_float.type
698 = parse_d_type (ps)->builtin_ifloat;
701 else if (len >= 2 && tolower (s[len - 2]) == 'l')
703 putithere->typed_val_float.type
704 = parse_d_type (ps)->builtin_ireal;
709 putithere->typed_val_float.type
710 = parse_d_type (ps)->builtin_idouble;
714 /* Check suffix for `f' or `l'' (float or real). */
715 else if (len >= 1 && tolower (s[len - 1]) == 'f')
717 putithere->typed_val_float.type
718 = parse_d_type (ps)->builtin_float;
721 else if (len >= 1 && tolower (s[len - 1]) == 'l')
723 putithere->typed_val_float.type
724 = parse_d_type (ps)->builtin_real;
727 /* Default type if no suffix. */
730 putithere->typed_val_float.type
731 = parse_d_type (ps)->builtin_double;
734 if (!parse_float (s, len,
735 putithere->typed_val_float.type,
736 putithere->typed_val_float.val))
739 return FLOAT_LITERAL;
742 /* Handle base-switching prefixes 0x, 0b, 0 */
775 continue; /* Ignore embedded '_'. */
776 if (c >= 'A' && c <= 'Z')
778 if (c != 'l' && c != 'u')
780 if (c >= '0' && c <= '9')
788 if (base > 10 && c >= 'a' && c <= 'f')
792 n += i = c - 'a' + 10;
794 else if (c == 'l' && long_p == 0)
799 else if (c == 'u' && unsigned_p == 0)
805 return ERROR; /* Char not a digit */
808 return ERROR; /* Invalid digit in this base. */
809 /* Portably test for integer overflow. */
810 if (c != 'l' && c != 'u')
812 ULONGEST n2 = prevn * base;
813 if ((n2 / base != prevn) || (n2 + i < prevn))
814 error (_("Numeric constant too large."));
819 /* An integer constant is an int or a long. An L suffix forces it to
820 be long, and a U suffix forces it to be unsigned. To figure out
821 whether it fits, we shift it right and see whether anything remains.
822 Note that we can't shift sizeof (LONGEST) * HOST_CHAR_BIT bits or
823 more in one operation, because many compilers will warn about such a
824 shift (which always produces a zero result). To deal with the case
825 where it is we just always shift the value more than once, with fewer
827 un = (ULONGEST) n >> 2;
828 if (long_p == 0 && (un >> 30) == 0)
830 high_bit = ((ULONGEST) 1) << 31;
831 signed_type = parse_d_type (ps)->builtin_int;
832 /* For decimal notation, keep the sign of the worked out type. */
833 if (base == 10 && !unsigned_p)
834 unsigned_type = parse_d_type (ps)->builtin_long;
836 unsigned_type = parse_d_type (ps)->builtin_uint;
841 if (sizeof (ULONGEST) * HOST_CHAR_BIT < 64)
842 /* A long long does not fit in a LONGEST. */
843 shift = (sizeof (ULONGEST) * HOST_CHAR_BIT - 1);
846 high_bit = (ULONGEST) 1 << shift;
847 signed_type = parse_d_type (ps)->builtin_long;
848 unsigned_type = parse_d_type (ps)->builtin_ulong;
851 putithere->typed_val_int.val = n;
853 /* If the high bit of the worked out type is set then this number
854 has to be unsigned_type. */
855 if (unsigned_p || (n & high_bit))
856 putithere->typed_val_int.type = unsigned_type;
858 putithere->typed_val_int.type = signed_type;
860 return INTEGER_LITERAL;
863 /* Temporary obstack used for holding strings. */
864 static struct obstack tempbuf;
865 static int tempbuf_init;
867 /* Parse a string or character literal from TOKPTR. The string or
868 character may be wide or unicode. *OUTPTR is set to just after the
869 end of the literal in the input string. The resulting token is
870 stored in VALUE. This returns a token value, either STRING or
871 CHAR, depending on what was parsed. *HOST_CHARS is set to the
872 number of host characters in the literal. */
875 parse_string_or_char (const char *tokptr, const char **outptr,
876 struct typed_stoken *value, int *host_chars)
880 /* Build the gdb internal form of the input string in tempbuf. Note
881 that the buffer is null byte terminated *only* for the
882 convenience of debugging gdb itself and printing the buffer
883 contents when the buffer contains no embedded nulls. Gdb does
884 not depend upon the buffer being null byte terminated, it uses
885 the length string instead. This allows gdb to handle C strings
886 (as well as strings in other languages) with embedded null
892 obstack_free (&tempbuf, NULL);
893 obstack_init (&tempbuf);
895 /* Skip the quote. */
907 *host_chars += c_parse_escape (&tokptr, &tempbuf);
913 obstack_1grow (&tempbuf, c);
915 /* FIXME: this does the wrong thing with multi-byte host
916 characters. We could use mbrlen here, but that would
917 make "set host-charset" a bit less useful. */
922 if (*tokptr != quote)
924 if (quote == '"' || quote == '`')
925 error (_("Unterminated string in expression."));
927 error (_("Unmatched single quote."));
931 /* FIXME: should instead use own language string_type enum
932 and handle D-specific string suffixes here. */
934 value->type = C_CHAR;
936 value->type = C_STRING;
938 value->ptr = (char *) obstack_base (&tempbuf);
939 value->length = obstack_object_size (&tempbuf);
943 return quote == '\'' ? CHARACTER_LITERAL : STRING_LITERAL;
950 enum exp_opcode opcode;
953 static const struct token tokentab3[] =
955 {"^^=", ASSIGN_MODIFY, BINOP_EXP},
956 {"<<=", ASSIGN_MODIFY, BINOP_LSH},
957 {">>=", ASSIGN_MODIFY, BINOP_RSH},
960 static const struct token tokentab2[] =
962 {"+=", ASSIGN_MODIFY, BINOP_ADD},
963 {"-=", ASSIGN_MODIFY, BINOP_SUB},
964 {"*=", ASSIGN_MODIFY, BINOP_MUL},
965 {"/=", ASSIGN_MODIFY, BINOP_DIV},
966 {"%=", ASSIGN_MODIFY, BINOP_REM},
967 {"|=", ASSIGN_MODIFY, BINOP_BITWISE_IOR},
968 {"&=", ASSIGN_MODIFY, BINOP_BITWISE_AND},
969 {"^=", ASSIGN_MODIFY, BINOP_BITWISE_XOR},
970 {"++", INCREMENT, BINOP_END},
971 {"--", DECREMENT, BINOP_END},
972 {"&&", ANDAND, BINOP_END},
973 {"||", OROR, BINOP_END},
974 {"^^", HATHAT, BINOP_END},
975 {"<<", LSH, BINOP_END},
976 {">>", RSH, BINOP_END},
977 {"==", EQUAL, BINOP_END},
978 {"!=", NOTEQUAL, BINOP_END},
979 {"<=", LEQ, BINOP_END},
980 {">=", GEQ, BINOP_END},
981 {"..", DOTDOT, BINOP_END},
984 /* Identifier-like tokens. */
985 static const struct token ident_tokens[] =
987 {"is", IDENTITY, BINOP_END},
988 {"!is", NOTIDENTITY, BINOP_END},
990 {"cast", CAST_KEYWORD, OP_NULL},
991 {"const", CONST_KEYWORD, OP_NULL},
992 {"immutable", IMMUTABLE_KEYWORD, OP_NULL},
993 {"shared", SHARED_KEYWORD, OP_NULL},
994 {"super", SUPER_KEYWORD, OP_NULL},
996 {"null", NULL_KEYWORD, OP_NULL},
997 {"true", TRUE_KEYWORD, OP_NULL},
998 {"false", FALSE_KEYWORD, OP_NULL},
1000 {"init", INIT_KEYWORD, OP_NULL},
1001 {"sizeof", SIZEOF_KEYWORD, OP_NULL},
1002 {"typeof", TYPEOF_KEYWORD, OP_NULL},
1003 {"typeid", TYPEID_KEYWORD, OP_NULL},
1005 {"delegate", DELEGATE_KEYWORD, OP_NULL},
1006 {"function", FUNCTION_KEYWORD, OP_NULL},
1007 {"struct", STRUCT_KEYWORD, OP_NULL},
1008 {"union", UNION_KEYWORD, OP_NULL},
1009 {"class", CLASS_KEYWORD, OP_NULL},
1010 {"interface", INTERFACE_KEYWORD, OP_NULL},
1011 {"enum", ENUM_KEYWORD, OP_NULL},
1012 {"template", TEMPLATE_KEYWORD, OP_NULL},
1015 /* This is set if a NAME token appeared at the very end of the input
1016 string, with no whitespace separating the name from the EOF. This
1017 is used only when parsing to do field name completion. */
1018 static int saw_name_at_eof;
1020 /* This is set if the previously-returned token was a structure operator.
1021 This is used only when parsing to do field name completion. */
1022 static int last_was_structop;
1024 /* Read one token, getting characters through lexptr. */
1027 lex_one_token (struct parser_state *par_state)
1032 const char *tokstart;
1033 int saw_structop = last_was_structop;
1036 last_was_structop = 0;
1040 prev_lexptr = lexptr;
1043 /* See if it is a special token of length 3. */
1044 for (i = 0; i < sizeof tokentab3 / sizeof tokentab3[0]; i++)
1045 if (strncmp (tokstart, tokentab3[i].oper, 3) == 0)
1048 yylval.opcode = tokentab3[i].opcode;
1049 return tokentab3[i].token;
1052 /* See if it is a special token of length 2. */
1053 for (i = 0; i < sizeof tokentab2 / sizeof tokentab2[0]; i++)
1054 if (strncmp (tokstart, tokentab2[i].oper, 2) == 0)
1057 yylval.opcode = tokentab2[i].opcode;
1058 return tokentab2[i].token;
1061 switch (c = *tokstart)
1064 /* If we're parsing for field name completion, and the previous
1065 token allows such completion, return a COMPLETE token.
1066 Otherwise, we were already scanning the original text, and
1067 we're really done. */
1068 if (saw_name_at_eof)
1070 saw_name_at_eof = 0;
1073 else if (saw_structop)
1092 if (paren_depth == 0)
1099 if (comma_terminates && paren_depth == 0)
1105 /* Might be a floating point number. */
1106 if (lexptr[1] < '0' || lexptr[1] > '9')
1108 if (parse_completion)
1109 last_was_structop = 1;
1110 goto symbol; /* Nope, must be a symbol. */
1125 /* It's a number. */
1126 int got_dot = 0, got_e = 0, toktype;
1127 const char *p = tokstart;
1128 int hex = input_radix > 10;
1130 if (c == '0' && (p[1] == 'x' || p[1] == 'X'))
1138 /* Hex exponents start with 'p', because 'e' is a valid hex
1139 digit and thus does not indicate a floating point number
1140 when the radix is hex. */
1141 if ((!hex && !got_e && tolower (p[0]) == 'e')
1142 || (hex && !got_e && tolower (p[0] == 'p')))
1143 got_dot = got_e = 1;
1144 /* A '.' always indicates a decimal floating point number
1145 regardless of the radix. If we have a '..' then its the
1146 end of the number and the beginning of a slice. */
1147 else if (!got_dot && (p[0] == '.' && p[1] != '.'))
1149 /* This is the sign of the exponent, not the end of the number. */
1150 else if (got_e && (tolower (p[-1]) == 'e' || tolower (p[-1]) == 'p')
1151 && (*p == '-' || *p == '+'))
1153 /* We will take any letters or digits, ignoring any embedded '_'.
1154 parse_number will complain if past the radix, or if L or U are
1156 else if ((*p < '0' || *p > '9') && (*p != '_')
1157 && ((*p < 'a' || *p > 'z') && (*p < 'A' || *p > 'Z')))
1161 toktype = parse_number (par_state, tokstart, p - tokstart,
1162 got_dot|got_e, &yylval);
1163 if (toktype == ERROR)
1165 char *err_copy = (char *) alloca (p - tokstart + 1);
1167 memcpy (err_copy, tokstart, p - tokstart);
1168 err_copy[p - tokstart] = 0;
1169 error (_("Invalid number \"%s\"."), err_copy);
1177 const char *p = &tokstart[1];
1178 size_t len = strlen ("entry");
1180 while (isspace (*p))
1182 if (strncmp (p, "entry", len) == 0 && !isalnum (p[len])
1216 int result = parse_string_or_char (tokstart, &lexptr, &yylval.tsval,
1218 if (result == CHARACTER_LITERAL)
1221 error (_("Empty character constant."));
1222 else if (host_len > 2 && c == '\'')
1225 namelen = lexptr - tokstart - 1;
1228 else if (host_len > 1)
1229 error (_("Invalid character constant."));
1235 if (!(c == '_' || c == '$'
1236 || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
1237 /* We must have come across a bad character (e.g. ';'). */
1238 error (_("Invalid character '%c' in expression"), c);
1240 /* It's a name. See how long it is. */
1242 for (c = tokstart[namelen];
1243 (c == '_' || c == '$' || (c >= '0' && c <= '9')
1244 || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'));)
1245 c = tokstart[++namelen];
1247 /* The token "if" terminates the expression and is NOT
1248 removed from the input stream. */
1249 if (namelen == 2 && tokstart[0] == 'i' && tokstart[1] == 'f')
1252 /* For the same reason (breakpoint conditions), "thread N"
1253 terminates the expression. "thread" could be an identifier, but
1254 an identifier is never followed by a number without intervening
1255 punctuation. "task" is similar. Handle abbreviations of these,
1256 similarly to breakpoint.c:find_condition_and_thread. */
1258 && (strncmp (tokstart, "thread", namelen) == 0
1259 || strncmp (tokstart, "task", namelen) == 0)
1260 && (tokstart[namelen] == ' ' || tokstart[namelen] == '\t'))
1262 const char *p = tokstart + namelen + 1;
1264 while (*p == ' ' || *p == '\t')
1266 if (*p >= '0' && *p <= '9')
1274 yylval.sval.ptr = tokstart;
1275 yylval.sval.length = namelen;
1277 /* Catch specific keywords. */
1278 copy = copy_name (yylval.sval);
1279 for (i = 0; i < sizeof ident_tokens / sizeof ident_tokens[0]; i++)
1280 if (strcmp (copy, ident_tokens[i].oper) == 0)
1282 /* It is ok to always set this, even though we don't always
1283 strictly need to. */
1284 yylval.opcode = ident_tokens[i].opcode;
1285 return ident_tokens[i].token;
1288 if (*tokstart == '$')
1289 return DOLLAR_VARIABLE;
1292 = language_lookup_primitive_type (parse_language (par_state),
1293 parse_gdbarch (par_state), copy);
1294 if (yylval.tsym.type != NULL)
1297 /* Input names that aren't symbols but ARE valid hex numbers,
1298 when the input radix permits them, can be names or numbers
1299 depending on the parse. Note we support radixes > 16 here. */
1300 if ((tokstart[0] >= 'a' && tokstart[0] < 'a' + input_radix - 10)
1301 || (tokstart[0] >= 'A' && tokstart[0] < 'A' + input_radix - 10))
1303 YYSTYPE newlval; /* Its value is ignored. */
1304 int hextype = parse_number (par_state, tokstart, namelen, 0, &newlval);
1305 if (hextype == INTEGER_LITERAL)
1309 if (parse_completion && *lexptr == '\0')
1310 saw_name_at_eof = 1;
1315 /* An object of this type is pushed on a FIFO by the "outer" lexer. */
1316 struct token_and_value
1323 /* A FIFO of tokens that have been read but not yet returned to the
1325 static std::vector<token_and_value> token_fifo;
1327 /* Non-zero if the lexer should return tokens from the FIFO. */
1330 /* Temporary storage for yylex; this holds symbol names as they are
1332 static auto_obstack name_obstack;
1334 /* Classify an IDENTIFIER token. The contents of the token are in `yylval'.
1335 Updates yylval and returns the new token type. BLOCK is the block
1336 in which lookups start; this can be NULL to mean the global scope. */
1339 classify_name (struct parser_state *par_state, const struct block *block)
1341 struct block_symbol sym;
1343 struct field_of_this_result is_a_field_of_this;
1345 copy = copy_name (yylval.sval);
1347 sym = lookup_symbol (copy, block, VAR_DOMAIN, &is_a_field_of_this);
1348 if (sym.symbol && SYMBOL_CLASS (sym.symbol) == LOC_TYPEDEF)
1350 yylval.tsym.type = SYMBOL_TYPE (sym.symbol);
1353 else if (sym.symbol == NULL)
1355 /* Look-up first for a module name, then a type. */
1356 sym = lookup_symbol (copy, block, MODULE_DOMAIN, NULL);
1357 if (sym.symbol == NULL)
1358 sym = lookup_symbol (copy, block, STRUCT_DOMAIN, NULL);
1360 if (sym.symbol != NULL)
1362 yylval.tsym.type = SYMBOL_TYPE (sym.symbol);
1366 return UNKNOWN_NAME;
1372 /* Like classify_name, but used by the inner loop of the lexer, when a
1373 name might have already been seen. CONTEXT is the context type, or
1374 NULL if this is the first component of a name. */
1377 classify_inner_name (struct parser_state *par_state,
1378 const struct block *block, struct type *context)
1383 if (context == NULL)
1384 return classify_name (par_state, block);
1386 type = check_typedef (context);
1387 if (!type_aggregate_p (type))
1390 copy = copy_name (yylval.ssym.stoken);
1391 yylval.ssym.sym = d_lookup_nested_symbol (type, copy, block);
1393 if (yylval.ssym.sym.symbol == NULL)
1396 if (SYMBOL_CLASS (yylval.ssym.sym.symbol) == LOC_TYPEDEF)
1398 yylval.tsym.type = SYMBOL_TYPE (yylval.ssym.sym.symbol);
1405 /* The outer level of a two-level lexer. This calls the inner lexer
1406 to return tokens. It then either returns these tokens, or
1407 aggregates them into a larger token. This lets us work around a
1408 problem in our parsing approach, where the parser could not
1409 distinguish between qualified names and qualified types at the
1415 token_and_value current;
1417 struct type *context_type = NULL;
1418 int last_to_examine, next_to_examine, checkpoint;
1419 const struct block *search_block;
1421 if (popping && !token_fifo.empty ())
1425 /* Read the first token and decide what to do. */
1426 current.token = lex_one_token (pstate);
1427 if (current.token != IDENTIFIER && current.token != '.')
1428 return current.token;
1430 /* Read any sequence of alternating "." and identifier tokens into
1432 current.value = yylval;
1433 token_fifo.push_back (current);
1434 last_was_dot = current.token == '.';
1438 current.token = lex_one_token (pstate);
1439 current.value = yylval;
1440 token_fifo.push_back (current);
1442 if ((last_was_dot && current.token != IDENTIFIER)
1443 || (!last_was_dot && current.token != '.'))
1446 last_was_dot = !last_was_dot;
1450 /* We always read one extra token, so compute the number of tokens
1451 to examine accordingly. */
1452 last_to_examine = token_fifo.size () - 2;
1453 next_to_examine = 0;
1455 current = token_fifo[next_to_examine];
1458 /* If we are not dealing with a typename, now is the time to find out. */
1459 if (current.token == IDENTIFIER)
1461 yylval = current.value;
1462 current.token = classify_name (pstate, expression_context_block);
1463 current.value = yylval;
1466 /* If the IDENTIFIER is not known, it could be a package symbol,
1467 first try building up a name until we find the qualified module. */
1468 if (current.token == UNKNOWN_NAME)
1470 name_obstack.clear ();
1471 obstack_grow (&name_obstack, current.value.sval.ptr,
1472 current.value.sval.length);
1476 while (next_to_examine <= last_to_examine)
1478 token_and_value next;
1480 next = token_fifo[next_to_examine];
1483 if (next.token == IDENTIFIER && last_was_dot)
1485 /* Update the partial name we are constructing. */
1486 obstack_grow_str (&name_obstack, ".");
1487 obstack_grow (&name_obstack, next.value.sval.ptr,
1488 next.value.sval.length);
1490 yylval.sval.ptr = (char *) obstack_base (&name_obstack);
1491 yylval.sval.length = obstack_object_size (&name_obstack);
1493 current.token = classify_name (pstate, expression_context_block);
1494 current.value = yylval;
1496 /* We keep going until we find a TYPENAME. */
1497 if (current.token == TYPENAME)
1499 /* Install it as the first token in the FIFO. */
1500 token_fifo[0] = current;
1501 token_fifo.erase (token_fifo.begin () + 1,
1502 token_fifo.begin () + next_to_examine);
1506 else if (next.token == '.' && !last_was_dot)
1510 /* We've reached the end of the name. */
1515 /* Reset our current token back to the start, if we found nothing
1516 this means that we will just jump to do pop. */
1517 current = token_fifo[0];
1518 next_to_examine = 1;
1520 if (current.token != TYPENAME && current.token != '.')
1523 name_obstack.clear ();
1525 if (current.token == '.')
1526 search_block = NULL;
1529 gdb_assert (current.token == TYPENAME);
1530 search_block = expression_context_block;
1531 obstack_grow (&name_obstack, current.value.sval.ptr,
1532 current.value.sval.length);
1533 context_type = current.value.tsym.type;
1537 last_was_dot = current.token == '.';
1539 while (next_to_examine <= last_to_examine)
1541 token_and_value next;
1543 next = token_fifo[next_to_examine];
1546 if (next.token == IDENTIFIER && last_was_dot)
1550 yylval = next.value;
1551 classification = classify_inner_name (pstate, search_block,
1553 /* We keep going until we either run out of names, or until
1554 we have a qualified name which is not a type. */
1555 if (classification != TYPENAME && classification != IDENTIFIER)
1558 /* Accept up to this token. */
1559 checkpoint = next_to_examine;
1561 /* Update the partial name we are constructing. */
1562 if (context_type != NULL)
1564 /* We don't want to put a leading "." into the name. */
1565 obstack_grow_str (&name_obstack, ".");
1567 obstack_grow (&name_obstack, next.value.sval.ptr,
1568 next.value.sval.length);
1570 yylval.sval.ptr = (char *) obstack_base (&name_obstack);
1571 yylval.sval.length = obstack_object_size (&name_obstack);
1572 current.value = yylval;
1573 current.token = classification;
1577 if (classification == IDENTIFIER)
1580 context_type = yylval.tsym.type;
1582 else if (next.token == '.' && !last_was_dot)
1586 /* We've reached the end of the name. */
1591 /* If we have a replacement token, install it as the first token in
1592 the FIFO, and delete the other constituent tokens. */
1595 token_fifo[0] = current;
1597 token_fifo.erase (token_fifo.begin () + 1,
1598 token_fifo.begin () + checkpoint);
1602 current = token_fifo[0];
1603 token_fifo.erase (token_fifo.begin ());
1604 yylval = current.value;
1605 return current.token;
1609 d_parse (struct parser_state *par_state)
1611 /* Setting up the parser state. */
1612 scoped_restore pstate_restore = make_scoped_restore (&pstate);
1613 gdb_assert (par_state != NULL);
1616 scoped_restore restore_yydebug = make_scoped_restore (&yydebug,
1619 /* Initialize some state used by the lexer. */
1620 last_was_structop = 0;
1621 saw_name_at_eof = 0;
1623 token_fifo.clear ();
1625 name_obstack.clear ();
1631 yyerror (const char *msg)
1634 lexptr = prev_lexptr;
1636 error (_("A %s in expression, near `%s'."), msg, lexptr);