X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=parse.y;h=8461337eed87db6cddd33119740a0af9d24cd112;hb=3185942a5234e26ab13fa02f9c51d340cec514f8;hp=0f6d930a251c50e8fddc9f1d0d8f88f7c4234610;hpb=7117c2d221b2aed4ede8600f6a36b7c1454b4f55;p=platform%2Fupstream%2Fbash.git diff --git a/parse.y b/parse.y index 0f6d930..8461337 100644 --- a/parse.y +++ b/parse.y @@ -1,22 +1,22 @@ -/* Yacc grammar for bash. */ +/* parse.y - Yacc grammar for bash. */ -/* Copyright (C) 1989-2002 Free Software Foundation, Inc. +/* Copyright (C) 1989-2009 Free Software Foundation, Inc. This file is part of GNU Bash, the Bourne Again SHell. - Bash is free software; you can redistribute it and/or modify it under - the terms of the GNU General Public License as published by the Free - Software Foundation; either version 2, or (at your option) any later - version. + Bash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - Bash is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - for more details. + Bash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with Bash; see the file LICENSE. If not, write to the Free Software - Foundation, 59 Temple Place, Suite 330, Boston, MA 02111 USA. */ + You should have received a copy of the GNU General Public License + along with Bash. If not, see . +*/ %{ #include "config.h" @@ -40,6 +40,8 @@ #include "memalloc.h" +#include "bashintl.h" + #define NEED_STRFTIME_DECL /* used in externs.h */ #include "shell.h" @@ -48,6 +50,7 @@ #include "parser.h" #include "mailcheck.h" #include "test.h" +#include "builtins.h" #include "builtins/common.h" #include "builtins/builtext.h" @@ -69,6 +72,8 @@ #if defined (ALIAS) # include "alias.h" +#else +typedef void *alias_t; #endif /* ALIAS */ #if defined (PROMPT_STRING_DECODE) @@ -110,10 +115,9 @@ extern int extended_glob; extern int eof_encountered; extern int no_line_editing, running_under_emacs; extern int current_command_number; -extern int sourcelevel; +extern int sourcelevel, parse_and_execute_level; extern int posixly_correct; extern int last_command_exit_value; -extern int interrupt_immediately; extern char *shell_name, *current_host_name; extern char *dist_version; extern int patch_level; @@ -144,6 +148,7 @@ static int yy_readline_unget __P((int)); static int yy_string_get __P((void)); static int yy_string_unget __P((int)); +static void rewind_input_string __P((void)); static int yy_stream_get __P((void)); static int yy_stream_unget __P((int)); @@ -166,12 +171,13 @@ static int time_command_acceptable __P((void)); static int special_case_tokens __P((char *)); static int read_token __P((int)); static char *parse_matched_pair __P((int, int, int, int *, int)); +static char *parse_comsub __P((int, int, int, int *, int)); #if defined (ARRAY_VARS) static char *parse_compound_assignment __P((int *)); #endif #if defined (DPAREN_ARITHMETIC) || defined (ARITH_FOR_COMMAND) static int parse_dparen __P((int)); -static int parse_arith_cmd __P((char **)); +static int parse_arith_cmd __P((char **, int)); #endif #if defined (COND_COMMAND) static void cond_error __P((void)); @@ -201,10 +207,6 @@ static void reset_readline_prompt __P((void)); #endif static void print_prompt __P((void)); -#if defined (HISTORY) -char *history_delimiting_chars __P((void)); -#endif - #if defined (HANDLE_MULTIBYTE) static void set_line_mbstate __P((void)); static char *shell_input_line_property = NULL; @@ -239,6 +241,11 @@ int expand_aliases = 0; decode_prompt_string. */ int promptvars = 1; +/* If non-zero, $'...' and $"..." are expanded when they appear within + a ${...} expansion, even when the expansion appears within double + quotes. */ +int extended_quote = 1; + /* The decoded prompt string. Used if READLINE is not defined or if editing is turned off. Analogous to current_readline_prompt. */ static char *current_decoded_prompt; @@ -246,6 +253,12 @@ static char *current_decoded_prompt; /* The number of lines read from input while creating the current command. */ int current_command_line_count; +/* The token that currently denotes the end of parse. */ +int shell_eof_token; + +/* The token currently being read. */ +int current_token; + /* Variables to manage the task of reading here documents, because we need to defer the reading until after a complete command has been collected. */ static REDIRECT *redir_stack[10]; @@ -270,6 +283,33 @@ static int function_bstart; /* The line number in a script at which an arithmetic for command starts. */ static int arith_for_lineno; +/* The current parser state. */ +static int parser_state; + +/* The last read token, or NULL. read_token () uses this for context + checking. */ +static int last_read_token; + +/* The token read prior to last_read_token. */ +static int token_before_that; + +/* The token read prior to token_before_that. */ +static int two_tokens_ago; + +/* The line number in a script where the word in a `case WORD', `select WORD' + or `for WORD' begins. This is a nested command maximum, since the array + index is decremented after a case, select, or for command is parsed. */ +#define MAX_CASE_NEST 128 +static int word_lineno[MAX_CASE_NEST]; +static int word_top = -1; + +/* If non-zero, it is the token that we want read_token to return + regardless of what text is (or isn't) present to be read. This + is reset by read_token. If token_to_read == WORD or + ASSIGNMENT_WORD, yylval.word should be set to word_desc_to_read. */ +static int token_to_read; +static WORD_DESC *word_desc_to_read; + static REDIRECTEE redir; %} @@ -287,7 +327,7 @@ static REDIRECTEE redir; in the case that they are preceded by a list_terminator. Members of the second group are for [[...]] commands. Members of the third group are recognized only under special circumstances. */ -%token IF THEN ELSE ELIF FI CASE ESAC FOR SELECT WHILE UNTIL DO DONE FUNCTION +%token IF THEN ELSE ELIF FI CASE ESAC FOR SELECT WHILE UNTIL DO DONE FUNCTION COPROC %token COND_START COND_END COND_ERROR %token IN BANG TIME TIMEOPT @@ -297,8 +337,9 @@ static REDIRECTEE redir; %token ARITH_CMD ARITH_FOR_EXPRS %token COND_CMD %token AND_AND OR_OR GREATER_GREATER LESS_LESS LESS_AND LESS_LESS_LESS -%token GREATER_AND SEMI_SEMI LESS_LESS_MINUS AND_GREATER LESS_GREATER -%token GREATER_BAR +%token GREATER_AND SEMI_SEMI SEMI_AND SEMI_SEMI_AND +%token LESS_LESS_MINUS AND_GREATER AND_GREATER_GREATER LESS_GREATER +%token GREATER_BAR BAR_AND /* The types that the various syntactical units return. */ @@ -309,18 +350,20 @@ static REDIRECTEE redir; %type arith_command %type cond_command %type arith_for_command +%type coproc %type function_def function_body if_command elif_clause subshell %type redirection redirection_list %type simple_command_element %type word_list pattern %type pattern_list case_clause_sequence case_clause %type timespec +%type list_terminator %start inputunit %left '&' ';' '\n' yacc_EOF %left AND_AND OR_OR -%right '|' +%right '|' BAR_AND %% inputunit: simple_list simple_list_terminator @@ -330,6 +373,8 @@ inputunit: simple_list simple_list_terminator global_command = $1; eof_encountered = 0; /* discard_parser_constructs (0); */ + if (parser_state & PST_CMDSUBST) + parser_state |= PST_EOFTOKEN; YYACCEPT; } | '\n' @@ -337,6 +382,8 @@ inputunit: simple_list simple_list_terminator /* Case of regular command, but not a very interesting one. Return a NULL command. */ global_command = (COMMAND *)NULL; + if (parser_state & PST_CMDSUBST) + parser_state |= PST_EOFTOKEN; YYACCEPT; } | error '\n' @@ -345,7 +392,7 @@ inputunit: simple_list simple_list_terminator global_command = (COMMAND *)NULL; eof_encountered = 0; /* discard_parser_constructs (1); */ - if (interactive) + if (interactive && parse_and_execute_level == 0) { YYACCEPT; } @@ -501,6 +548,11 @@ redirection: '>' WORD redir.filename = $2; $$ = make_redirection (1, r_err_and_out, redir); } + | AND_GREATER_GREATER WORD + { + redir.filename = $2; + $$ = make_redirection (1, r_append_err_and_out, redir); + } | NUMBER LESS_GREATER WORD { redir.filename = $3; @@ -574,6 +626,8 @@ command: simple_command } | function_def { $$ = $1; } + | coproc + { $$ = $1; } ; shell_command: for_command @@ -601,65 +655,116 @@ shell_command: for_command ; for_command: FOR WORD newline_list DO compound_list DONE - { $$ = make_for_command ($2, add_string_to_list ("\"$@\"", (WORD_LIST *)NULL), $5); } + { + $$ = make_for_command ($2, add_string_to_list ("\"$@\"", (WORD_LIST *)NULL), $5, word_lineno[word_top]); + if (word_top > 0) word_top--; + } | FOR WORD newline_list '{' compound_list '}' - { $$ = make_for_command ($2, add_string_to_list ("$@", (WORD_LIST *)NULL), $5); } + { + $$ = make_for_command ($2, add_string_to_list ("\"$@\"", (WORD_LIST *)NULL), $5, word_lineno[word_top]); + if (word_top > 0) word_top--; + } | FOR WORD ';' newline_list DO compound_list DONE - { $$ = make_for_command ($2, add_string_to_list ("\"$@\"", (WORD_LIST *)NULL), $6); } + { + $$ = make_for_command ($2, add_string_to_list ("\"$@\"", (WORD_LIST *)NULL), $6, word_lineno[word_top]); + if (word_top > 0) word_top--; + } | FOR WORD ';' newline_list '{' compound_list '}' - { $$ = make_for_command ($2, add_string_to_list ("\"$@\"", (WORD_LIST *)NULL), $6); } + { + $$ = make_for_command ($2, add_string_to_list ("\"$@\"", (WORD_LIST *)NULL), $6, word_lineno[word_top]); + if (word_top > 0) word_top--; + } | FOR WORD newline_list IN word_list list_terminator newline_list DO compound_list DONE - { $$ = make_for_command ($2, REVERSE_LIST ($5, WORD_LIST *), $9); } + { + $$ = make_for_command ($2, REVERSE_LIST ($5, WORD_LIST *), $9, word_lineno[word_top]); + if (word_top > 0) word_top--; + } | FOR WORD newline_list IN word_list list_terminator newline_list '{' compound_list '}' - { $$ = make_for_command ($2, REVERSE_LIST ($5, WORD_LIST *), $9); } + { + $$ = make_for_command ($2, REVERSE_LIST ($5, WORD_LIST *), $9, word_lineno[word_top]); + if (word_top > 0) word_top--; + } | FOR WORD newline_list IN list_terminator newline_list DO compound_list DONE - { $$ = make_for_command ($2, (WORD_LIST *)NULL, $8); } + { + $$ = make_for_command ($2, (WORD_LIST *)NULL, $8, word_lineno[word_top]); + if (word_top > 0) word_top--; + } | FOR WORD newline_list IN list_terminator newline_list '{' compound_list '}' - { $$ = make_for_command ($2, (WORD_LIST *)NULL, $8); } + { + $$ = make_for_command ($2, (WORD_LIST *)NULL, $8, word_lineno[word_top]); + if (word_top > 0) word_top--; + } ; arith_for_command: FOR ARITH_FOR_EXPRS list_terminator newline_list DO compound_list DONE - { $$ = make_arith_for_command ($2, $6, arith_for_lineno); } + { + $$ = make_arith_for_command ($2, $6, arith_for_lineno); + if (word_top > 0) word_top--; + } | FOR ARITH_FOR_EXPRS list_terminator newline_list '{' compound_list '}' - { $$ = make_arith_for_command ($2, $6, arith_for_lineno); } + { + $$ = make_arith_for_command ($2, $6, arith_for_lineno); + if (word_top > 0) word_top--; + } | FOR ARITH_FOR_EXPRS DO compound_list DONE - { $$ = make_arith_for_command ($2, $4, arith_for_lineno); } + { + $$ = make_arith_for_command ($2, $4, arith_for_lineno); + if (word_top > 0) word_top--; + } | FOR ARITH_FOR_EXPRS '{' compound_list '}' - { $$ = make_arith_for_command ($2, $4, arith_for_lineno); } + { + $$ = make_arith_for_command ($2, $4, arith_for_lineno); + if (word_top > 0) word_top--; + } ; select_command: SELECT WORD newline_list DO list DONE { - $$ = make_select_command ($2, add_string_to_list ("\"$@\"", (WORD_LIST *)NULL), $5); + $$ = make_select_command ($2, add_string_to_list ("\"$@\"", (WORD_LIST *)NULL), $5, word_lineno[word_top]); + if (word_top > 0) word_top--; } | SELECT WORD newline_list '{' list '}' { - $$ = make_select_command ($2, add_string_to_list ("$@", (WORD_LIST *)NULL), $5); + $$ = make_select_command ($2, add_string_to_list ("\"$@\"", (WORD_LIST *)NULL), $5, word_lineno[word_top]); + if (word_top > 0) word_top--; } | SELECT WORD ';' newline_list DO list DONE { - $$ = make_select_command ($2, add_string_to_list ("\"$@\"", (WORD_LIST *)NULL), $6); + $$ = make_select_command ($2, add_string_to_list ("\"$@\"", (WORD_LIST *)NULL), $6, word_lineno[word_top]); + if (word_top > 0) word_top--; } | SELECT WORD ';' newline_list '{' list '}' { - $$ = make_select_command ($2, add_string_to_list ("\"$@\"", (WORD_LIST *)NULL), $6); + $$ = make_select_command ($2, add_string_to_list ("\"$@\"", (WORD_LIST *)NULL), $6, word_lineno[word_top]); + if (word_top > 0) word_top--; } | SELECT WORD newline_list IN word_list list_terminator newline_list DO list DONE { - $$ = make_select_command ($2, REVERSE_LIST ($5, WORD_LIST *), $9); + $$ = make_select_command ($2, REVERSE_LIST ($5, WORD_LIST *), $9, word_lineno[word_top]); + if (word_top > 0) word_top--; } | SELECT WORD newline_list IN word_list list_terminator newline_list '{' list '}' { - $$ = make_select_command ($2, REVERSE_LIST ($5, WORD_LIST *), $9); + $$ = make_select_command ($2, REVERSE_LIST ($5, WORD_LIST *), $9, word_lineno[word_top]); + if (word_top > 0) word_top--; } ; case_command: CASE WORD newline_list IN newline_list ESAC - { $$ = make_case_command ($2, (PATTERN_LIST *)NULL); } + { + $$ = make_case_command ($2, (PATTERN_LIST *)NULL, word_lineno[word_top]); + if (word_top > 0) word_top--; + } | CASE WORD newline_list IN case_clause_sequence newline_list ESAC - { $$ = make_case_command ($2, $5); } + { + $$ = make_case_command ($2, $5, word_lineno[word_top]); + if (word_top > 0) word_top--; + } | CASE WORD newline_list IN case_clause ESAC - { $$ = make_case_command ($2, $5); } + { + $$ = make_case_command ($2, $5, word_lineno[word_top]); + if (word_top > 0) word_top--; + } ; function_def: WORD '(' ')' newline_list function_body @@ -672,7 +777,6 @@ function_def: WORD '(' ')' newline_list function_body { $$ = make_function_def ($2, $4, function_dstart, function_bstart); } ; - function_body: shell_command { $$ = $1; } | shell_command redirection_list @@ -713,6 +817,57 @@ subshell: '(' compound_list ')' } ; +coproc: COPROC shell_command + { + $$ = make_coproc_command ("COPROC", $2); + $$->flags |= CMD_WANT_SUBSHELL|CMD_COPROC_SUBSHELL; + } + | COPROC shell_command redirection_list + { + COMMAND *tc; + + tc = $2; + if (tc->redirects) + { + register REDIRECT *t; + for (t = tc->redirects; t->next; t = t->next) + ; + t->next = $3; + } + else + tc->redirects = $3; + $$ = make_coproc_command ("COPROC", $2); + $$->flags |= CMD_WANT_SUBSHELL|CMD_COPROC_SUBSHELL; + } + | COPROC WORD shell_command + { + $$ = make_coproc_command ($2->word, $3); + $$->flags |= CMD_WANT_SUBSHELL|CMD_COPROC_SUBSHELL; + } + | COPROC WORD shell_command redirection_list + { + COMMAND *tc; + + tc = $3; + if (tc->redirects) + { + register REDIRECT *t; + for (t = tc->redirects; t->next; t = t->next) + ; + t->next = $4; + } + else + tc->redirects = $4; + $$ = make_coproc_command ($2->word, $3); + $$->flags |= CMD_WANT_SUBSHELL|CMD_COPROC_SUBSHELL; + } + | COPROC simple_command + { + $$ = make_coproc_command ("COPROC", clean_simple_command ($2)); + $$->flags |= CMD_WANT_SUBSHELL|CMD_COPROC_SUBSHELL; + } + ; + if_command: IF compound_list THEN compound_list FI { $$ = make_if_command ($2, $4, (COMMAND *)NULL); } | IF compound_list THEN compound_list ELSE compound_list FI @@ -758,8 +913,17 @@ pattern_list: newline_list pattern ')' compound_list ; case_clause_sequence: pattern_list SEMI_SEMI + { $$ = $1; } | case_clause_sequence pattern_list SEMI_SEMI { $2->next = $1; $$ = $2; } + | pattern_list SEMI_AND + { $1->flags |= CASEPAT_FALLTHROUGH; $$ = $1; } + | case_clause_sequence pattern_list SEMI_AND + { $2->flags |= CASEPAT_FALLTHROUGH; $2->next = $1; $$ = $2; } + | pattern_list SEMI_SEMI_AND + { $1->flags |= CASEPAT_TESTNEXT; $$ = $1; } + | case_clause_sequence pattern_list SEMI_SEMI_AND + { $2->flags |= CASEPAT_TESTNEXT; $2->next = $1; $$ = $2; } ; pattern: WORD @@ -824,8 +988,11 @@ simple_list_terminator: '\n' ; list_terminator:'\n' + { $$ = '\n'; } | ';' + { $$ = ';'; } | yacc_EOF + { $$ = yacc_EOF; } ; newline_list: @@ -843,6 +1010,13 @@ simple_list: simple_list1 $$ = $1; if (need_here_doc) gather_here_documents (); + if ((parser_state & PST_CMDSUBST) && current_token == shell_eof_token) + { + global_command = $1; + eof_encountered = 0; + rewind_input_string (); + YYACCEPT; + } } | simple_list1 '&' { @@ -852,12 +1026,26 @@ simple_list: simple_list1 $$ = command_connect ($1, (COMMAND *)NULL, '&'); if (need_here_doc) gather_here_documents (); + if ((parser_state & PST_CMDSUBST) && current_token == shell_eof_token) + { + global_command = $1; + eof_encountered = 0; + rewind_input_string (); + YYACCEPT; + } } | simple_list1 ';' { $$ = $1; if (need_here_doc) gather_here_documents (); + if ((parser_state & PST_CMDSUBST) && current_token == shell_eof_token) + { + global_command = $1; + eof_encountered = 0; + rewind_input_string (); + YYACCEPT; + } } ; @@ -883,29 +1071,72 @@ pipeline_command: pipeline { $$ = $1; } | BANG pipeline { - $2->flags |= CMD_INVERT_RETURN; + if ($2) + $2->flags |= CMD_INVERT_RETURN; $$ = $2; } | timespec pipeline { - $2->flags |= $1; + if ($2) + $2->flags |= $1; $$ = $2; } | timespec BANG pipeline { - $3->flags |= $1|CMD_INVERT_RETURN; + if ($3) + $3->flags |= $1|CMD_INVERT_RETURN; $$ = $3; } | BANG timespec pipeline { - $3->flags |= $2|CMD_INVERT_RETURN; + if ($3) + $3->flags |= $2|CMD_INVERT_RETURN; $$ = $3; } + | timespec list_terminator + { + ELEMENT x; + + /* Boy, this is unclean. `time' by itself can + time a null command. We cheat and push a + newline back if the list_terminator was a newline + to avoid the double-newline problem (one to + terminate this, one to terminate the command) */ + x.word = 0; + x.redirect = 0; + $$ = make_simple_command (x, (COMMAND *)NULL); + $$->flags |= $1; + /* XXX - let's cheat and push a newline back */ + if ($2 == '\n') + token_to_read = '\n'; + } + ; -pipeline: - pipeline '|' newline_list pipeline +pipeline: pipeline '|' newline_list pipeline { $$ = command_connect ($1, $4, '|'); } + | pipeline BAR_AND newline_list pipeline + { + /* Make cmd1 |& cmd2 equivalent to cmd1 2>&1 | cmd2 */ + COMMAND *tc; + REDIRECTEE rd; + REDIRECT *r; + + tc = $1; + rd.dest = 1; + r = make_redirection (2, r_duplicating_output, rd); + if (tc->redirects) + { + register REDIRECT *t; + for (t = tc->redirects; t->next; t = t->next) + ; + t->next = r; + } + else + tc->redirects = r; + + $$ = command_connect ($1, $4, '|'); + } | command { $$ = $1; } ; @@ -917,46 +1148,20 @@ timespec: TIME ; %% -/* Possible states for the parser that require it to do special things. */ -#define PST_CASEPAT 0x001 /* in a case pattern list */ -#define PST_ALEXPNEXT 0x002 /* expand next word for aliases */ -#define PST_ALLOWOPNBRC 0x004 /* allow open brace for function def */ -#define PST_NEEDCLOSBRC 0x008 /* need close brace */ -#define PST_DBLPAREN 0x010 /* double-paren parsing */ -#define PST_SUBSHELL 0x020 /* ( ... ) subshell */ -#define PST_CMDSUBST 0x040 /* $( ... ) command substitution */ -#define PST_CASESTMT 0x080 /* parsing a case statement */ -#define PST_CONDCMD 0x100 /* parsing a [[...]] command */ -#define PST_CONDEXPR 0x200 /* parsing the guts of [[...]] */ -#define PST_ARITHFOR 0x400 /* parsing an arithmetic for command */ - /* Initial size to allocate for tokens, and the amount to grow them by. */ #define TOKEN_DEFAULT_INITIAL_SIZE 496 #define TOKEN_DEFAULT_GROW_SIZE 512 -/* The token currently being read. */ -static int current_token; - -/* The last read token, or NULL. read_token () uses this for context - checking. */ -static int last_read_token; - -/* The token read prior to last_read_token. */ -static int token_before_that; - -/* The token read prior to token_before_that. */ -static int two_tokens_ago; - -/* If non-zero, it is the token that we want read_token to return - regardless of what text is (or isn't) present to be read. This - is reset by read_token. If token_to_read == WORD or - ASSIGNMENT_WORD, yylval.word should be set to word_desc_to_read. */ -static int token_to_read; -static WORD_DESC *word_desc_to_read; +/* Should we call prompt_again? */ +#define SHOULD_PROMPT() \ + (interactive && (bash_input.type == st_stdin || bash_input.type == st_stream)) -/* The current parser state. */ -static int parser_state; +#if defined (ALIAS) +# define expanding_alias() (pushed_string_list && pushed_string_list->expander) +#else +# define expanding_alias() 0 +#endif /* Global var is non-zero when end of file has been reached. */ int EOF_Reached = 0; @@ -1104,10 +1309,12 @@ yy_readline_get () old_sigint = (SigHandler *)set_signal_handler (SIGINT, sigint_sighandler); interrupt_immediately++; } + terminate_immediately = 1; current_readline_line = readline (current_readline_prompt ? current_readline_prompt : ""); + terminate_immediately = 0; if (signal_is_ignored (SIGINT) == 0 && old_sigint) { interrupt_immediately--; @@ -1218,6 +1425,33 @@ with_input_from_string (string, name) init_yy_io (yy_string_get, yy_string_unget, st_string, name, location); } +/* Count the number of characters we've consumed from bash_input.location.string + and read into shell_input_line, but have not returned from shell_getc. + That is the true input location. Rewind bash_input.location.string by + that number of characters, so it points to the last character actually + consumed by the parser. */ +static void +rewind_input_string () +{ + int xchars; + + /* number of unconsumed characters in the input -- XXX need to take newlines + into account, e.g., $(...\n) */ + xchars = shell_input_line_len - shell_input_line_index; + if (bash_input.location.string[-1] == '\n') + xchars++; + + /* XXX - how to reflect bash_input.location.string back to string passed to + parse_and_execute or xparse_dolparen? xparse_dolparen needs to know how + far into the string we parsed. parse_and_execute knows where bash_input. + location.string is, and how far from orig_string that is -- that's the + number of characters the command consumed. */ + + /* bash_input.location.string - xchars should be where we parsed to */ + /* need to do more validation on xchars value for sanity -- test cases. */ + bash_input.location.string -= xchars; +} + /* **************************************************************** */ /* */ /* Let input come from STREAM. */ @@ -1237,8 +1471,19 @@ yy_stream_get () result = EOF; if (bash_input.location.file) - result = getc_with_restart (bash_input.location.file); - + { + if (interactive) + { + interrupt_immediately++; + terminate_immediately++; + } + result = getc_with_restart (bash_input.location.file); + if (interactive) + { + interrupt_immediately--; + terminate_immediately--; + } + } return (result); } @@ -1399,10 +1644,6 @@ restore_token_state (ts) #if defined (ALIAS) || defined (DPAREN_ARITHMETIC) -#if !defined (ALIAS) -typedef void *alias_t; -#endif - #define END_OF_ALIAS 0 /* @@ -1462,7 +1703,9 @@ push_string (s, expand, ap) shell_input_line_size = strlen (s); shell_input_line_index = 0; shell_input_line_terminator = '\0'; - parser_state &= ~PST_ALEXPNEXT; +#if 0 + parser_state &= ~PST_ALEXPNEXT; /* XXX */ +#endif set_line_mbstate (); } @@ -1523,6 +1766,14 @@ free_string_list () #endif /* ALIAS || DPAREN_ARITHMETIC */ +void +free_pushed_string_input () +{ +#if defined (ALIAS) || defined (DPAREN_ARITHMETIC) + free_string_list (); +#endif +} + /* Return a line of text, taken from wherever yylex () reads input. If there is no more input, then we return NULL. If REMOVE_QUOTED_NEWLINE is non-zero, we remove unquoted \ pairs. This is used by @@ -1536,20 +1787,20 @@ read_a_line (remove_quoted_newline) int indx = 0, c, peekc, pass_next; #if defined (READLINE) - if (interactive && bash_input.type != st_string && no_line_editing) + if (no_line_editing && SHOULD_PROMPT ()) #else - if (interactive && bash_input.type != st_string) + if (SHOULD_PROMPT ()) #endif print_prompt (); pass_next = 0; while (1) { - c = yy_getc (); - /* Allow immediate exit if interrupted during input. */ QUIT; + c = yy_getc (); + /* Ignore null bytes in input. */ if (c == 0) { @@ -1587,7 +1838,10 @@ read_a_line (remove_quoted_newline) { peekc = yy_getc (); if (peekc == '\n') - continue; /* Make the unquoted \ pair disappear. */ + { + line_number++; + continue; /* Make the unquoted \ pair disappear. */ + } else { yy_ungetc (peekc); @@ -1615,9 +1869,27 @@ char * read_secondary_line (remove_quoted_newline) int remove_quoted_newline; { + char *ret; + int n, c; + prompt_string_pointer = &ps2_prompt; - prompt_again (); - return (read_a_line (remove_quoted_newline)); + if (SHOULD_PROMPT()) + prompt_again (); + ret = read_a_line (remove_quoted_newline); +#if defined (HISTORY) + if (remember_on_history && (parser_state & PST_HEREDOC)) + { + /* To make adding the the here-document body right, we need to rely + on history_delimiting_chars() returning \n for the first line of + the here-document body and the null string for the second and + subsequent lines, so we avoid double newlines. + current_command_line_count == 2 for the first line of the body. */ + + current_command_line_count++; + maybe_add_history (ret); + } +#endif /* HISTORY */ + return ret; } /* **************************************************************** */ @@ -1656,6 +1928,9 @@ STRING_INT_ALIST word_token_alist[] = { { "[[", COND_START }, { "]]", COND_END }, #endif +#if defined (COPROCESS_SUPPORT) + { "coproc", COPROC }, +#endif { (char *)NULL, 0} }; @@ -1670,11 +1945,15 @@ STRING_INT_ALIST other_token_alist[] = { { "<&", LESS_AND }, { ">&", GREATER_AND }, { ";;", SEMI_SEMI }, + { ";&", SEMI_AND }, + { ";;&", SEMI_SEMI_AND }, { "<<-", LESS_LESS_MINUS }, { "<<<", LESS_LESS_LESS }, { "&>", AND_GREATER }, + { "&>>", AND_GREATER_GREATER }, { "<>", LESS_GREATER }, { ">|", GREATER_BAR }, + { "|&", BAR_AND }, { "EOF", yacc_EOF }, /* Tokens whose value is the character itself */ { ">", '>' }, @@ -1746,10 +2025,15 @@ shell_getc (remove_quoted_newline) register int i; int c; unsigned char uc; - static int mustpop = 0; QUIT; + if (sigwinch_received) + { + sigwinch_received = 0; + get_new_window_size (0, (int *)0, (int *)0); + } + if (eol_ungetc_lookahead) { c = eol_ungetc_lookahead; @@ -1778,20 +2062,27 @@ shell_getc (remove_quoted_newline) i = 0; shell_input_line_terminator = 0; + /* If the shell is interatctive, but not currently printing a prompt + (interactive_shell && interactive == 0), we don't want to print + notifies or cleanup the jobs -- we want to defer it until we do + print the next prompt. */ + if (interactive_shell == 0 || SHOULD_PROMPT()) + { #if defined (JOB_CONTROL) /* This can cause a problem when reading a command as the result of a trap, when the trap is called from flush_child. This call had better not cause jobs to disappear from the job table in that case, or we will have big trouble. */ - notify_and_cleanup (); + notify_and_cleanup (); #else /* !JOB_CONTROL */ - cleanup_dead_jobs (); + cleanup_dead_jobs (); #endif /* !JOB_CONTROL */ + } #if defined (READLINE) - if (interactive && bash_input.type != st_string && no_line_editing) + if (no_line_editing && SHOULD_PROMPT()) #else - if (interactive && bash_input.type != st_string) + if (SHOULD_PROMPT()) #endif print_prompt (); @@ -1911,7 +2202,8 @@ shell_getc (remove_quoted_newline) { shell_input_line_size = 0; prompt_string_pointer = ¤t_prompt_string; - prompt_again (); + if (SHOULD_PROMPT ()) + prompt_again (); goto restart_read; } @@ -1935,13 +2227,6 @@ shell_getc (remove_quoted_newline) if (uc) shell_input_line_index++; - if MBTEST(uc == '\\' && remove_quoted_newline && shell_input_line[shell_input_line_index] == '\n') - { - prompt_again (); - line_number++; - goto restart_read; - } - #if defined (ALIAS) || defined (DPAREN_ARITHMETIC) /* If UC is NULL, we have reached the end of the current input string. If pushed_string_list is non-empty, it's time to pop to the previous string @@ -1950,22 +2235,21 @@ shell_getc (remove_quoted_newline) to. */ if (!uc && (pushed_string_list != (STRING_SAVER *)NULL)) { - if (mustpop) - { - pop_string (); - uc = shell_input_line[shell_input_line_index]; - if (uc) - shell_input_line_index++; - mustpop--; - } - else - { - mustpop++; - uc = ' '; - } + pop_string (); + uc = shell_input_line[shell_input_line_index]; + if (uc) + shell_input_line_index++; } #endif /* ALIAS || DPAREN_ARITHMETIC */ + if MBTEST(uc == '\\' && remove_quoted_newline && shell_input_line[shell_input_line_index] == '\n') + { + if (SHOULD_PROMPT ()) + prompt_again (); + line_number++; + goto restart_read; + } + if (!uc && shell_input_line_terminator == EOF) return ((shell_input_line_index != 0) ? '\n' : EOF); @@ -2013,29 +2297,21 @@ discard_until (character) } void -execute_prompt_command (command) - char *command; +execute_variable_command (command, vname) + char *command, *vname; { - sh_builtin_func_t *temp_last, *temp_this; char *last_lastarg; - int temp_exit_value, temp_eof_encountered; + sh_parser_state_t ps; - temp_last = last_shell_builtin; - temp_this = this_shell_builtin; - temp_exit_value = last_command_exit_value; - temp_eof_encountered = eof_encountered; + save_parser_state (&ps); last_lastarg = get_string_value ("_"); if (last_lastarg) last_lastarg = savestring (last_lastarg); - parse_and_execute (savestring (command), "PROMPT_COMMAND", SEVAL_NONINT|SEVAL_NOHIST); + parse_and_execute (savestring (command), vname, SEVAL_NONINT|SEVAL_NOHIST); - last_shell_builtin = temp_last; - this_shell_builtin = temp_this; - last_command_exit_value = temp_exit_value; - eof_encountered = temp_eof_encountered; - - bind_variable ("_", last_lastarg); + restore_parser_state (&ps); + bind_variable ("_", last_lastarg, 0); FREE (last_lastarg); if (token_to_read == '\n') /* reset_parser was called */ @@ -2074,7 +2350,7 @@ yylex () /* Avoid printing a prompt if we're not going to read anything, e.g. after resetting the parser with read_token (RESET). */ - if (token_to_read == 0 && interactive) + if (token_to_read == 0 && SHOULD_PROMPT ()) prompt_again (); } @@ -2082,6 +2358,15 @@ yylex () token_before_that = last_read_token; last_read_token = current_token; current_token = read_token (READ); + + if ((parser_state & PST_EOFTOKEN) && current_token == shell_eof_token) + { + current_token = yacc_EOF; + if (bash_input.type == st_string) + rewind_input_string (); + } + parser_state &= ~PST_EOFTOKEN; + return (current_token); } @@ -2092,10 +2377,14 @@ static int esacs_needed_count; void gather_here_documents () { - int r = 0; + int r; + + r = 0; while (need_here_doc) { - make_here_document (redir_stack[r++]); + parser_state |= PST_HEREDOC; + make_here_document (redir_stack[r++], line_number); + parser_state &= ~PST_HEREDOC; need_here_doc--; } } @@ -2106,10 +2395,10 @@ static int open_brace_count; #define command_token_position(token) \ (((token) == ASSIGNMENT_WORD) || \ - ((token) != SEMI_SEMI && reserved_word_acceptable(token))) + ((token) != SEMI_SEMI && (token) != SEMI_AND && (token) != SEMI_SEMI_AND && reserved_word_acceptable(token))) -#define assignment_acceptable(token) command_token_position(token) && \ - ((parser_state & PST_CASEPAT) == 0) +#define assignment_acceptable(token) \ + (command_token_position(token) && ((parser_state & PST_CASEPAT) == 0)) /* Check to see if TOKEN is a reserved word and return the token value if it is. */ @@ -2124,7 +2413,7 @@ static int open_brace_count; { \ if ((parser_state & PST_CASEPAT) && (word_token_alist[i].token != ESAC)) \ break; \ - if (word_token_alist[i].token == TIME) \ + if (word_token_alist[i].token == TIME && time_command_acceptable () == 0) \ break; \ if (word_token_alist[i].token == ESAC) \ parser_state &= ~(PST_CASEPAT|PST_CASESTMT); \ @@ -2157,6 +2446,23 @@ static int open_brace_count; Special cases that disqualify: In a pattern list in a case statement (parser_state & PST_CASEPAT). */ + +static char * +mk_alexpansion (s) + char *s; +{ + int l; + char *r; + + l = strlen (s); + r = xmalloc (l + 2); + strcpy (r, s); + if (r[l -1] != ' ') + r[l++] = ' '; + r[l] = '\0'; + return r; +} + static int alias_expand_token (tokstr) char *tokstr; @@ -2173,7 +2479,12 @@ alias_expand_token (tokstr) if (ap && (ap->flags & AL_BEINGEXPANDED)) return (NO_EXPANSION); - expanded = ap ? savestring (ap->value) : (char *)NULL; + /* mk_alexpansion puts an extra space on the end of the alias expansion, + so the lookahead by the parser works right. If this gets changed, + make sure the code in shell_getc that deals with reaching the end of + an expanded alias is changed with it. */ + expanded = ap ? mk_alexpansion (ap->value) : (char *)NULL; + if (expanded) { push_string (expanded, ap->flags & AL_EXPANDNEXT, ap); @@ -2317,10 +2628,12 @@ special_case_tokens (tokstr) return (TIMEOPT); #endif +#if 0 #if defined (COMMAND_TIMING) if (STREQ (token, "time") && ((parser_state & PST_CASEPAT) == 0) && time_command_acceptable ()) return (TIME); #endif /* COMMAND_TIMING */ +#endif #if defined (COND_COMMAND) /* [[ */ if ((parser_state & PST_CONDEXPR) && tokstr[0] == ']' && tokstr[1] == ']' && tokstr[2] == '\0') @@ -2411,7 +2724,7 @@ read_token (command) #endif /* ALIAS */ /* Read a single word from input. Start by skipping blanks. */ - while ((character = shell_getc (1)) != EOF && whitespace (character)) + while ((character = shell_getc (1)) != EOF && shellblank (character)) ; if (character == EOF) @@ -2439,9 +2752,14 @@ read_token (command) parser_state &= ~PST_ALEXPNEXT; #endif /* ALIAS */ + parser_state &= ~PST_ASSIGNOK; + return (character); } + if (parser_state & PST_REGEXP) + goto tokword; + /* Shell meta-characters. */ if MBTEST(shellmeta (character) && ((parser_state & PST_DBLPAREN) == 0)) { @@ -2452,6 +2770,8 @@ read_token (command) parser_state &= ~PST_ALEXPNEXT; #endif /* ALIAS */ + parser_state &= ~PST_ASSIGNOK; + peek_char = shell_getc (1); if (character == peek_char) { @@ -2461,9 +2781,9 @@ read_token (command) /* If '<' then we could be at "<<" or at "<<-". We have to look ahead one more character. */ peek_char = shell_getc (1); - if (peek_char == '-') + if MBTEST(peek_char == '-') return (LESS_LESS_MINUS); - else if (peek_char == '<') + else if MBTEST(peek_char == '<') return (LESS_LESS_LESS); else { @@ -2479,7 +2799,15 @@ read_token (command) #if defined (ALIAS) parser_state &= ~PST_ALEXPNEXT; #endif /* ALIAS */ - return (SEMI_SEMI); + + peek_char = shell_getc (1); + if MBTEST(peek_char == '&') + return (SEMI_SEMI_AND); + else + { + shell_ungetc (peek_char); + return (SEMI_SEMI); + } case '&': return (AND_AND); @@ -2505,8 +2833,27 @@ read_token (command) return (LESS_GREATER); else if MBTEST(character == '>' && peek_char == '|') return (GREATER_BAR); - else if MBTEST(peek_char == '>' && character == '&') - return (AND_GREATER); + else if MBTEST(character == '&' && peek_char == '>') + { + peek_char = shell_getc (1); + if MBTEST(peek_char == '>') + return (AND_GREATER_GREATER); + else + { + shell_ungetc (peek_char); + return (AND_GREATER); + } + } + else if MBTEST(character == '|' && peek_char == '&') + return (BAR_AND); + else if MBTEST(character == ';' && peek_char == '&') + { + parser_state |= PST_CASEPAT; +#if defined (ALIAS) + parser_state &= ~PST_ALEXPNEXT; +#endif /* ALIAS */ + return (SEMI_AND); + } shell_ungetc (peek_char); @@ -2546,6 +2893,7 @@ read_token (command) if MBTEST(character == '-' && (last_read_token == LESS_AND || last_read_token == GREATER_AND)) return (character); +tokword: /* Okay, if we got this far, we have to read a word. Read one, and then check it against the known ones. */ result = read_token_word (character); @@ -2559,27 +2907,70 @@ read_token (command) /* * Match a $(...) or other grouping construct. This has to handle embedded * quoted strings ('', ``, "") and nested constructs. It also must handle - * reprompting the user, if necessary, after reading a newline (unless the - * P_NONL flag is passed), and returning correct error values if it reads - * EOF. + * reprompting the user, if necessary, after reading a newline, and returning + * correct error values if it reads EOF. */ #define P_FIRSTCLOSE 0x01 #define P_ALLOWESC 0x02 +#define P_DQUOTE 0x04 +#define P_COMMAND 0x08 /* parsing a command, so look for comments */ +#define P_BACKQUOTE 0x10 /* parsing a backquoted command substitution */ + +/* Lexical state while parsing a grouping construct or $(...). */ +#define LEX_WASDOL 0x001 +#define LEX_CKCOMMENT 0x002 +#define LEX_INCOMMENT 0x004 +#define LEX_PASSNEXT 0x008 +#define LEX_RESWDOK 0x010 +#define LEX_CKCASE 0x020 +#define LEX_INCASE 0x040 +#define LEX_INHEREDOC 0x080 +#define LEX_HEREDELIM 0x100 /* reading here-doc delimiter */ +#define LEX_STRIPDOC 0x200 /* <<- strip tabs from here doc delim */ + +#define COMSUB_META(ch) ((ch) == ';' || (ch) == '&' || (ch) == '|') + +#define CHECK_NESTRET_ERROR() \ + do { \ + if (nestret == &matched_pair_error) \ + { \ + free (ret); \ + return &matched_pair_error; \ + } \ + } while (0) + +#define APPEND_NESTRET() \ + do { \ + if (nestlen) \ + { \ + RESIZE_MALLOCED_BUFFER (ret, retind, nestlen, retsize, 64); \ + strcpy (ret + retind, nestret); \ + retind += nestlen; \ + } \ + } while (0) static char matched_pair_error; + static char * parse_matched_pair (qc, open, close, lenp, flags) int qc; /* `"' if this construct is within double quotes */ int open, close; int *lenp, flags; { - int count, ch, was_dollar; - int pass_next_character, nestlen, ttranslen, start_lineno; + int count, ch, tflags; + int nestlen, ttranslen, start_lineno; char *ret, *nestret, *ttrans; - int retind, retsize; + int retind, retsize, rflags; +/* itrace("parse_matched_pair: open = %c close = %c flags = %d", open, close, flags); */ count = 1; - pass_next_character = was_dollar = 0; + tflags = 0; + + if ((flags & P_COMMAND) && qc != '`' && qc != '\'' && qc != '"' && (flags & P_DQUOTE) == 0) + tflags |= LEX_CKCOMMENT; + + /* RFLAGS is the set of flags we want to pass to recursive calls. */ + rflags = (qc == '"') ? P_DQUOTE : (flags & P_DQUOTE); ret = (char *)xmalloc (retsize = 64); retind = 0; @@ -2587,27 +2978,48 @@ parse_matched_pair (qc, open, close, lenp, flags) start_lineno = line_number; while (count) { - ch = shell_getc ((qc != '\'' || (flags & P_ALLOWESC)) && pass_next_character == 0); + ch = shell_getc (qc != '\'' && (tflags & LEX_PASSNEXT) == 0); + if (ch == EOF) { free (ret); - parser_error (start_lineno, "unexpected EOF while looking for matching `%c'", close); + parser_error (start_lineno, _("unexpected EOF while looking for matching `%c'"), close); EOF_Reached = 1; /* XXX */ return (&matched_pair_error); } /* Possible reprompting. */ - if (ch == '\n' && interactive && - (bash_input.type == st_stdin || bash_input.type == st_stream)) + if (ch == '\n' && SHOULD_PROMPT ()) prompt_again (); - if (pass_next_character) /* last char was backslash */ + /* Don't bother counting parens or doing anything else if in a comment + or part of a case statement */ + if (tflags & LEX_INCOMMENT) { - pass_next_character = 0; - if (qc != '\'' && ch == '\n') /* double-quoted \ disappears. */ - { - if (retind > 0) retind--; /* swallow previously-added backslash */ - continue; + /* Add this character. */ + RESIZE_MALLOCED_BUFFER (ret, retind, 1, retsize, 64); + ret[retind++] = ch; + + if (ch == '\n') + tflags &= ~LEX_INCOMMENT; + + continue; + } + + /* Not exactly right yet, should handle shell metacharacters, too. If + any changes are made to this test, make analogous changes to subst.c: + extract_delimited_string(). */ + else if MBTEST((tflags & LEX_CKCOMMENT) && (tflags & LEX_INCOMMENT) == 0 && ch == '#' && (retind == 0 || ret[retind-1] == '\n' || shellblank (ret[retind - 1]))) + tflags |= LEX_INCOMMENT; + + if (tflags & LEX_PASSNEXT) /* last char was backslash */ + { + tflags &= ~LEX_PASSNEXT; + if (qc != '\'' && ch == '\n') /* double-quoted \ disappears. */ + { + if (retind > 0) + retind--; /* swallow previously-added backslash */ + continue; } RESIZE_MALLOCED_BUFFER (ret, retind, 2, retsize, 64); @@ -2616,6 +3028,16 @@ parse_matched_pair (qc, open, close, lenp, flags) ret[retind++] = ch; continue; } + /* If we're reparsing the input (e.g., from parse_string_to_word_list), + we've already prepended CTLESC to single-quoted results of $'...'. + We may want to do this for other CTLESC-quoted characters in + reparse, too. */ + else if MBTEST((parser_state & PST_REPARSE) && open == '\'' && (ch == CTLESC || ch == CTLNUL)) + { + RESIZE_MALLOCED_BUFFER (ret, retind, 1, retsize, 64); + ret[retind++] = ch; + continue; + } else if MBTEST(ch == CTLESC || ch == CTLNUL) /* special shell escapes */ { RESIZE_MALLOCED_BUFFER (ret, retind, 2, retsize, 64); @@ -2625,11 +3047,9 @@ parse_matched_pair (qc, open, close, lenp, flags) } else if MBTEST(ch == close) /* ending delimiter */ count--; -#if 1 /* handle nested ${...} specially. */ - else if MBTEST(open != close && was_dollar && open == '{' && ch == open) /* } */ + else if MBTEST(open != close && (tflags & LEX_WASDOL) && open == '{' && ch == open) /* } */ count++; -#endif else if MBTEST(((flags & P_FIRSTCLOSE) == 0) && ch == open) /* nested begin */ count++; @@ -2637,63 +3057,76 @@ parse_matched_pair (qc, open, close, lenp, flags) RESIZE_MALLOCED_BUFFER (ret, retind, 1, retsize, 64); ret[retind++] = ch; + /* If we just read the ending character, don't bother continuing. */ + if (count == 0) + break; + if (open == '\'') /* '' inside grouping construct */ { if MBTEST((flags & P_ALLOWESC) && ch == '\\') - pass_next_character++; + tflags |= LEX_PASSNEXT; continue; } if MBTEST(ch == '\\') /* backslashes */ - pass_next_character++; + tflags |= LEX_PASSNEXT; + +#if 0 + /* The big hammer. Single quotes aren't special in double quotes. The + problem is that Posix says the single quotes are semi-special: + within a double-quoted ${...} construct "an even number of + unescaped double-quotes or single-quotes, if any, shall occur." */ + if MBTEST(open == '{' && (flags & P_DQUOTE) && ch == '\'') /* } */ + continue; +#endif + /* Could also check open == '`' if we want to parse grouping constructs + inside old-style command substitution. */ if (open != close) /* a grouping construct */ { if MBTEST(shellquote (ch)) { /* '', ``, or "" inside $(...) or other grouping construct. */ push_delimiter (dstack, ch); - if MBTEST(was_dollar && ch == '\'') /* $'...' inside group */ - nestret = parse_matched_pair (ch, ch, ch, &nestlen, P_ALLOWESC); + if MBTEST((tflags & LEX_WASDOL) && ch == '\'') /* $'...' inside group */ + nestret = parse_matched_pair (ch, ch, ch, &nestlen, P_ALLOWESC|rflags); else - nestret = parse_matched_pair (ch, ch, ch, &nestlen, 0); + nestret = parse_matched_pair (ch, ch, ch, &nestlen, rflags); pop_delimiter (dstack); - if (nestret == &matched_pair_error) - { - free (ret); - return &matched_pair_error; - } - if MBTEST(was_dollar && ch == '\'') + CHECK_NESTRET_ERROR (); + + if MBTEST((tflags & LEX_WASDOL) && ch == '\'' && (extended_quote || (rflags & P_DQUOTE) == 0)) { /* Translate $'...' here. */ ttrans = ansiexpand (nestret, 0, nestlen - 1, &ttranslen); xfree (nestret); - nestret = sh_single_quote (ttrans); - free (ttrans); - nestlen = strlen (nestret); + + if ((rflags & P_DQUOTE) == 0) + { + nestret = sh_single_quote (ttrans); + free (ttrans); + nestlen = strlen (nestret); + } + else + { + nestret = ttrans; + nestlen = ttranslen; + } retind -= 2; /* back up before the $' */ } - else if MBTEST(was_dollar && ch == '"') + else if MBTEST((tflags & LEX_WASDOL) && ch == '"' && (extended_quote || (rflags & P_DQUOTE) == 0)) { /* Locale expand $"..." here. */ ttrans = localeexpand (nestret, 0, nestlen - 1, start_lineno, &ttranslen); xfree (nestret); - nestret = (char *)xmalloc (ttranslen + 3); - nestret[0] = '"'; - strcpy (nestret + 1, ttrans); - nestret[ttranslen + 1] = '"'; - nestret[ttranslen += 2] = '\0'; + + nestret = sh_mkdoublequoted (ttrans, ttranslen, 0); free (ttrans); - nestlen = ttranslen; + nestlen = ttranslen + 2; retind -= 2; /* back up before the $" */ } - if (nestlen) - { - RESIZE_MALLOCED_BUFFER (ret, retind, nestlen, retsize, 64); - strcpy (ret + retind, nestret); - retind += nestlen; - } + APPEND_NESTRET (); FREE (nestret); } } @@ -2702,50 +3135,466 @@ parse_matched_pair (qc, open, close, lenp, flags) /* XXX - sh and ksh93 don't do this - XXX */ else if MBTEST(open == '"' && ch == '`') { - nestret = parse_matched_pair (0, '`', '`', &nestlen, 0); - if (nestret == &matched_pair_error) - { - free (ret); - return &matched_pair_error; - } - if (nestlen) - { - RESIZE_MALLOCED_BUFFER (ret, retind, nestlen, retsize, 64); - strcpy (ret + retind, nestret); - retind += nestlen; - } + nestret = parse_matched_pair (0, '`', '`', &nestlen, rflags); + + CHECK_NESTRET_ERROR (); + APPEND_NESTRET (); + FREE (nestret); } - else if MBTEST(was_dollar && (ch == '(' || ch == '{' || ch == '[')) /* ) } ] */ + else if MBTEST(open != '`' && (tflags & LEX_WASDOL) && (ch == '(' || ch == '{' || ch == '[')) /* ) } ] */ /* check for $(), $[], or ${} inside quoted string. */ { if (open == ch) /* undo previous increment */ count--; if (ch == '(') /* ) */ - nestret = parse_matched_pair (0, '(', ')', &nestlen, 0); + nestret = parse_comsub (0, '(', ')', &nestlen, (rflags|P_COMMAND) & ~P_DQUOTE); else if (ch == '{') /* } */ - nestret = parse_matched_pair (0, '{', '}', &nestlen, P_FIRSTCLOSE); + nestret = parse_matched_pair (0, '{', '}', &nestlen, P_FIRSTCLOSE|rflags); else if (ch == '[') /* ] */ - nestret = parse_matched_pair (0, '[', ']', &nestlen, 0); - if (nestret == &matched_pair_error) + nestret = parse_matched_pair (0, '[', ']', &nestlen, rflags); + + CHECK_NESTRET_ERROR (); + APPEND_NESTRET (); + + FREE (nestret); + } + if MBTEST(ch == '$') + tflags |= LEX_WASDOL; + else + tflags &= ~LEX_WASDOL; + } + + ret[retind] = '\0'; + if (lenp) + *lenp = retind; + return ret; +} + +/* Parse a $(...) command substitution. This is messier than I'd like, and + reproduces a lot more of the token-reading code than I'd like. */ +static char * +parse_comsub (qc, open, close, lenp, flags) + int qc; /* `"' if this construct is within double quotes */ + int open, close; + int *lenp, flags; +{ + int count, ch, peekc, tflags, lex_rwlen, lex_firstind; + int nestlen, ttranslen, start_lineno; + char *ret, *nestret, *ttrans, *heredelim; + int retind, retsize, rflags, hdlen; + +/*itrace("parse_comsub: qc = `%c' open = %c close = %c", qc, open, close);*/ + count = 1; + tflags = LEX_RESWDOK; + + if ((flags & P_COMMAND) && qc != '\'' && qc != '"' && (flags & P_DQUOTE) == 0) + tflags |= LEX_CKCASE; + if ((tflags & LEX_CKCASE) && (interactive == 0 || interactive_comments)) + tflags |= LEX_CKCOMMENT; + + /* RFLAGS is the set of flags we want to pass to recursive calls. */ + rflags = (flags & P_DQUOTE); + + ret = (char *)xmalloc (retsize = 64); + retind = 0; + + start_lineno = line_number; + lex_rwlen = 0; + + heredelim = 0; + lex_firstind = -1; + + while (count) + { +comsub_readchar: + ch = shell_getc (qc != '\'' && (tflags & LEX_PASSNEXT) == 0); + + if (ch == EOF) + { +eof_error: + free (ret); + FREE (heredelim); + parser_error (start_lineno, _("unexpected EOF while looking for matching `%c'"), close); + EOF_Reached = 1; /* XXX */ + return (&matched_pair_error); + } + + /* If we hit the end of a line and are reading the contents of a here + document, and it's not the same line that the document starts on, + check for this line being the here doc delimiter. Otherwise, if + we're in a here document, mark the next character as the beginning + of a line. */ + if (ch == '\n') + { + if ((tflags & LEX_HEREDELIM) && heredelim) + { + tflags &= ~LEX_HEREDELIM; + tflags |= LEX_INHEREDOC; + lex_firstind = retind + 1; + } + else if (tflags & LEX_INHEREDOC) + { + int tind; + tind = lex_firstind; + while ((tflags & LEX_STRIPDOC) && ret[tind] == '\t') + tind++; + if (STREQN (ret + tind, heredelim, hdlen)) + { + tflags &= ~(LEX_STRIPDOC|LEX_INHEREDOC); +/*itrace("parse_comsub:%d: found here doc end `%s'", line_number, ret + tind);*/ + lex_firstind = -1; + } + else + lex_firstind = retind + 1; + } + } + + /* Possible reprompting. */ + if (ch == '\n' && SHOULD_PROMPT ()) + prompt_again (); + + /* Don't bother counting parens or doing anything else if in a comment */ + if (tflags & (LEX_INCOMMENT|LEX_INHEREDOC)) + { + /* Add this character. */ + RESIZE_MALLOCED_BUFFER (ret, retind, 1, retsize, 64); + ret[retind++] = ch; + + if ((tflags & LEX_INCOMMENT) && ch == '\n') + tflags &= ~LEX_INCOMMENT; + + continue; + } + + /* Skip whitespace */ + if MBTEST(shellblank (ch) && lex_rwlen == 0) + { + /* Add this character. */ + RESIZE_MALLOCED_BUFFER (ret, retind, 1, retsize, 64); + ret[retind++] = ch; + continue; + } + + /* Either we are looking for the start of the here-doc delimiter + (lex_firstind == -1) or we are reading one (lex_firstind >= 0). + If this character is a shell break character and we are reading + the delimiter, save it and note that we are now reading a here + document. If we've found the start of the delimiter, note it by + setting lex_firstind. Backslashes can quote shell metacharacters + in here-doc delimiters. */ + if (tflags & LEX_HEREDELIM) + { + if (lex_firstind == -1 && shellbreak (ch) == 0) + lex_firstind = retind; + else if (lex_firstind >= 0 && (tflags & LEX_PASSNEXT) == 0 && shellbreak (ch)) + { + nestret = substring (ret, lex_firstind, retind); + heredelim = string_quote_removal (nestret, 0); + free (nestret); + hdlen = STRLEN(heredelim); +/*itrace("parse_comsub:%d: found here doc delimiter `%s' (%d)", line_number, heredelim, hdlen);*/ + if (ch == '\n') + { + tflags |= LEX_INHEREDOC; + tflags &= ~LEX_HEREDELIM; + lex_firstind = retind + 1; + } + else + lex_firstind = -1; + } + } + + /* Meta-characters that can introduce a reserved word. Not perfect yet. */ + if MBTEST((tflags & LEX_RESWDOK) == 0 && (tflags & LEX_CKCASE) && (tflags & LEX_INCOMMENT) == 0 && shellmeta(ch)) + { + /* Add this character. */ + RESIZE_MALLOCED_BUFFER (ret, retind, 1, retsize, 64); + ret[retind++] = ch; + peekc = shell_getc (1); + if (ch == peekc && (ch == '&' || ch == '|' || ch == ';')) /* two-character tokens */ + { + RESIZE_MALLOCED_BUFFER (ret, retind, 1, retsize, 64); + ret[retind++] = peekc; +/*itrace("parse_comsub:%d: set lex_reswordok = 1, ch = `%c'", line_number, ch); */ + tflags |= LEX_RESWDOK; + lex_rwlen = 0; + continue; + } + else if (ch == '\n' || COMSUB_META(ch)) + { + shell_ungetc (peekc); + tflags |= LEX_RESWDOK; +/*itrace("parse_comsub:%d: set lex_reswordok = 1, ch = `%c'", line_number, ch);*/ + lex_rwlen = 0; + continue; + } + else if (ch == EOF) + goto eof_error; + else + { + /* `unget' the character we just added and fall through */ + retind--; + shell_ungetc (peekc); + } + } + + /* If we can read a reserved word, try to read one. */ + if (tflags & LEX_RESWDOK) + { + if MBTEST(islower (ch)) + { + /* Add this character. */ + RESIZE_MALLOCED_BUFFER (ret, retind, 1, retsize, 64); + ret[retind++] = ch; + lex_rwlen++; + continue; + } + else if MBTEST(lex_rwlen == 4 && shellbreak (ch)) + { + if (STREQN (ret + retind - 4, "case", 4)) +{ + tflags |= LEX_INCASE; +/*itrace("parse_comsub:%d: found `case', lex_incase -> 1", line_number);*/ +} + else if (STREQN (ret + retind - 4, "esac", 4)) +{ + tflags &= ~LEX_INCASE; +/*itrace("parse_comsub:%d: found `esac', lex_incase -> 0", line_number);*/ +} + tflags &= ~LEX_RESWDOK; + } + else if (shellbreak (ch) == 0) +{ + tflags &= ~LEX_RESWDOK; +/*itrace("parse_comsub:%d: found `%c', lex_reswordok -> 0", line_number, ch);*/ +} + } + + if MBTEST((tflags & LEX_INCOMMENT) == 0 && (tflags & LEX_CKCASE) && ch == '<') + { + /* Add this character. */ + RESIZE_MALLOCED_BUFFER (ret, retind, 1, retsize, 64); + ret[retind++] = ch; + peekc = shell_getc (1); + if (peekc == EOF) + goto eof_error; + if (peekc == ch) + { + RESIZE_MALLOCED_BUFFER (ret, retind, 1, retsize, 64); + ret[retind++] = peekc; + peekc = shell_getc (1); + if (peekc == EOF) + goto eof_error; + if (peekc == '-') + { + RESIZE_MALLOCED_BUFFER (ret, retind, 1, retsize, 64); + ret[retind++] = peekc; + tflags |= LEX_STRIPDOC; + } + else + shell_ungetc (peekc); + tflags |= LEX_HEREDELIM; + lex_firstind = -1; + continue; + } + else + ch = peekc; /* fall through and continue XXX - this skips comments if peekc == '#' */ + } + /* Not exactly right yet, should handle shell metacharacters, too. If + any changes are made to this test, make analogous changes to subst.c: + extract_delimited_string(). */ + else if MBTEST((tflags & LEX_CKCOMMENT) && (tflags & LEX_INCOMMENT) == 0 && ch == '#' && (retind == 0 || ret[retind-1] == '\n' || shellblank (ret[retind - 1]))) + tflags |= LEX_INCOMMENT; + + if (tflags & LEX_PASSNEXT) /* last char was backslash */ + { + tflags &= ~LEX_PASSNEXT; + if (qc != '\'' && ch == '\n') /* double-quoted \ disappears. */ + { + if (retind > 0) + retind--; /* swallow previously-added backslash */ + continue; + } + + RESIZE_MALLOCED_BUFFER (ret, retind, 2, retsize, 64); + if MBTEST(ch == CTLESC || ch == CTLNUL) + ret[retind++] = CTLESC; + ret[retind++] = ch; + continue; + } + else if MBTEST(ch == CTLESC || ch == CTLNUL) /* special shell escapes */ + { + RESIZE_MALLOCED_BUFFER (ret, retind, 2, retsize, 64); + ret[retind++] = CTLESC; + ret[retind++] = ch; + continue; + } +#if 0 + else if MBTEST((tflags & LEX_INCASE) && ch == close && close == ')') + tflags &= ~LEX_INCASE; /* XXX */ +#endif + else if MBTEST(ch == close && (tflags & LEX_INCASE) == 0) /* ending delimiter */ +{ + count--; +/*itrace("parse_comsub:%d: found close: count = %d", line_number, count);*/ +} + else if MBTEST(((flags & P_FIRSTCLOSE) == 0) && (tflags & LEX_INCASE) == 0 && ch == open) /* nested begin */ + count++; + + /* Add this character. */ + RESIZE_MALLOCED_BUFFER (ret, retind, 1, retsize, 64); + ret[retind++] = ch; + + /* If we just read the ending character, don't bother continuing. */ + if (count == 0) + break; + + if MBTEST(ch == '\\') /* backslashes */ + tflags |= LEX_PASSNEXT; + + if MBTEST(shellquote (ch)) + { + /* '', ``, or "" inside $(...). */ + push_delimiter (dstack, ch); + if MBTEST((tflags & LEX_WASDOL) && ch == '\'') /* $'...' inside group */ + nestret = parse_matched_pair (ch, ch, ch, &nestlen, P_ALLOWESC|rflags); + else + nestret = parse_matched_pair (ch, ch, ch, &nestlen, rflags); + pop_delimiter (dstack); + CHECK_NESTRET_ERROR (); + + if MBTEST((tflags & LEX_WASDOL) && ch == '\'' && (extended_quote || (rflags & P_DQUOTE) == 0)) { - free (ret); - return &matched_pair_error; + /* Translate $'...' here. */ + ttrans = ansiexpand (nestret, 0, nestlen - 1, &ttranslen); + xfree (nestret); + + if ((rflags & P_DQUOTE) == 0) + { + nestret = sh_single_quote (ttrans); + free (ttrans); + nestlen = strlen (nestret); + } + else + { + nestret = ttrans; + nestlen = ttranslen; + } + retind -= 2; /* back up before the $' */ } - if (nestlen) + else if MBTEST((tflags & LEX_WASDOL) && ch == '"' && (extended_quote || (rflags & P_DQUOTE) == 0)) { - RESIZE_MALLOCED_BUFFER (ret, retind, nestlen, retsize, 64); - strcpy (ret + retind, nestret); - retind += nestlen; + /* Locale expand $"..." here. */ + ttrans = localeexpand (nestret, 0, nestlen - 1, start_lineno, &ttranslen); + xfree (nestret); + + nestret = sh_mkdoublequoted (ttrans, ttranslen, 0); + free (ttrans); + nestlen = ttranslen + 2; + retind -= 2; /* back up before the $" */ } + + APPEND_NESTRET (); FREE (nestret); } - was_dollar = MBTEST(ch == '$'); + else if MBTEST((tflags & LEX_WASDOL) && (ch == '(' || ch == '{' || ch == '[')) /* ) } ] */ + /* check for $(), $[], or ${} inside command substitution. */ + { + if ((tflags & LEX_INCASE) == 0 && open == ch) /* undo previous increment */ + count--; + if (ch == '(') /* ) */ + nestret = parse_comsub (0, '(', ')', &nestlen, (rflags|P_COMMAND) & ~P_DQUOTE); + else if (ch == '{') /* } */ + nestret = parse_matched_pair (0, '{', '}', &nestlen, P_FIRSTCLOSE|rflags); + else if (ch == '[') /* ] */ + nestret = parse_matched_pair (0, '[', ']', &nestlen, rflags); + + CHECK_NESTRET_ERROR (); + APPEND_NESTRET (); + + FREE (nestret); + } + if MBTEST(ch == '$') + tflags |= LEX_WASDOL; + else + tflags &= ~LEX_WASDOL; } + FREE (heredelim); ret[retind] = '\0'; if (lenp) *lenp = retind; +/*itrace("parse_comsub:%d: returning `%s'", line_number, ret);*/ + return ret; +} + +/* XXX - this needs to handle functionality like subst.c:no_longjmp_on_fatal_error; + maybe extract_command_subst should handle it. */ +char * +xparse_dolparen (base, string, indp, flags) + char *base; + char *string; + int *indp; + int flags; +{ + sh_parser_state_t ps; + int orig_ind, nc, sflags; + char *ret, *s, *ep, *ostring; + + /*yydebug = 1;*/ + orig_ind = *indp; + ostring = string; + + sflags = SEVAL_NONINT|SEVAL_NOHIST|SEVAL_NOFREE; + if (flags & SX_NOLONGJMP) + sflags |= SEVAL_NOLONGJMP; + save_parser_state (&ps); + + /*(*/ + parser_state |= PST_CMDSUBST|PST_EOFTOKEN; /* allow instant ')' */ /*(*/ + shell_eof_token = ')'; + parse_string (string, "command substitution", sflags, &ep); + + restore_parser_state (&ps); + reset_parser (); + if (interactive) + token_to_read = 0; + + /* Need to find how many characters parse_and_execute consumed, update + *indp, if flags != 0, copy the portion of the string parsed into RET + and return it. If flags & 1 (EX_NOALLOC) we can return NULL. */ + + /*(*/ + if (ep[-1] != ')') + { +#if DEBUG + if (ep[-1] != '\n') + itrace("xparse_dolparen:%d: ep[-1] != RPAREN (%d), ep = `%s'", line_number, ep[-1], ep); +#endif + while (ep > ostring && ep[-1] == '\n') ep--; + } + + nc = ep - ostring; + *indp = ep - base - 1; + + /*(*/ +#if DEBUG + if (base[*indp] != ')') + itrace("xparse_dolparen:%d: base[%d] != RPAREN (%d), base = `%s'", line_number, *indp, base[*indp], base); +#endif + + if (flags & SX_NOALLOC) + return (char *)NULL; + + if (nc == 0) + { + ret = xmalloc (1); + ret[0] = '\0'; + } + else + ret = substring (ostring, 0, nc - 1); + return ret; } @@ -2758,27 +3607,20 @@ static int parse_dparen (c) int c; { - int cmdtyp, len, sline; - char *wval, *wv2; + int cmdtyp, sline; + char *wval; WORD_DESC *wd; #if defined (ARITH_FOR_COMMAND) if (last_read_token == FOR) { arith_for_lineno = line_number; - cmdtyp = parse_arith_cmd (&wval); + cmdtyp = parse_arith_cmd (&wval, 0); if (cmdtyp == 1) { - /* parse_arith_cmd adds quotes at the beginning and end - of the string it returns; we need to take those out. */ - len = strlen (wval); - wv2 = (char *)xmalloc (len); - strncpy (wv2, wval + 1, len - 2); - wv2[len - 2] = '\0'; - wd = make_word (wv2); + wd = alloc_word_desc (); + wd->word = wval; yylval.word_list = make_word_list (wd, (WORD_LIST *)NULL); - free (wval); - free (wv2); return (ARITH_FOR_EXPRS); } else @@ -2790,13 +3632,14 @@ parse_dparen (c) if (reserved_word_acceptable (last_read_token)) { sline = line_number; - cmdtyp = parse_arith_cmd (&wval); + + cmdtyp = parse_arith_cmd (&wval, 0); if (cmdtyp == 1) /* arithmetic command */ { - wd = make_word (wval); - wd->flags = W_QUOTED; + wd = alloc_word_desc (); + wd->word = wval; + wd->flags = W_QUOTED|W_NOSPLIT|W_NOGLOB|W_DQUOTE; yylval.word_list = make_word_list (wd, (WORD_LIST *)NULL); - free (wval); /* make_word copies it */ return (ARITH_CMD); } else if (cmdtyp == 0) /* nested subshell */ @@ -2820,8 +3663,9 @@ parse_dparen (c) allocated buffer and make *ep point to that buffer. Return -1 on an error, for example EOF. */ static int -parse_arith_cmd (ep) +parse_arith_cmd (ep, adddq) char **ep; + int adddq; { int exp_lineno, rval, c; char *ttok, *tokstr; @@ -2840,20 +3684,28 @@ parse_arith_cmd (ep) tokstr = (char *)xmalloc (ttoklen + 4); - /* (( ... )) -> "..." */ - tokstr[0] = (rval == 1) ? '"' : '('; - strncpy (tokstr + 1, ttok, ttoklen - 1); /* don't copy the final `)' */ - if (rval == 1) + /* if ADDDQ != 0 then (( ... )) -> "..." */ + if (rval == 1 && adddq) /* arith cmd, add double quotes */ { + tokstr[0] = '"'; + strncpy (tokstr + 1, ttok, ttoklen - 1); tokstr[ttoklen] = '"'; tokstr[ttoklen+1] = '\0'; } - else + else if (rval == 1) /* arith cmd, don't add double quotes */ { + strncpy (tokstr, ttok, ttoklen - 1); + tokstr[ttoklen-1] = '\0'; + } + else /* nested subshell */ + { + tokstr[0] = '('; + strncpy (tokstr + 1, ttok, ttoklen - 1); tokstr[ttoklen] = ')'; tokstr[ttoklen+1] = c; tokstr[ttoklen+2] = '\0'; } + *ep = tokstr; FREE (ttok); return rval; @@ -2867,16 +3719,16 @@ cond_error () char *etext; if (EOF_Reached && cond_token != COND_ERROR) /* [[ */ - parser_error (cond_lineno, "unexpected EOF while looking for `]]'"); + parser_error (cond_lineno, _("unexpected EOF while looking for `]]'")); else if (cond_token != COND_ERROR) { if (etext = error_token_from_token (cond_token)) { - parser_error (cond_lineno, "syntax error in conditional expression: unexpected token `%s'", etext); + parser_error (cond_lineno, _("syntax error in conditional expression: unexpected token `%s'"), etext); free (etext); } else - parser_error (cond_lineno, "syntax error in conditional expression"); + parser_error (cond_lineno, _("syntax error in conditional expression")); } } @@ -2919,7 +3771,7 @@ cond_skip_newlines () { while ((cond_token = read_token (READ)) == '\n') { - if (interactive && (bash_input.type == st_stdin || bash_input.type == st_stream)) + if (SHOULD_PROMPT ()) prompt_again (); } return (cond_token); @@ -2954,11 +3806,11 @@ cond_term () dispose_cond_node (term); /* ( */ if (etext = error_token_from_token (cond_token)) { - parser_error (lineno, "unexpected token `%s', expected `)'", etext); + parser_error (lineno, _("unexpected token `%s', expected `)'"), etext); free (etext); } else - parser_error (lineno, "expected `)'"); + parser_error (lineno, _("expected `)'")); COND_RETURN_ERROR (); } term = make_cond_node (COND_EXPR, (WORD_DESC *)NULL, term, (COND_COM *)NULL); @@ -2972,7 +3824,7 @@ cond_term () if (term) term->flags |= CMD_INVERT_RETURN; } - else if (tok == WORD && test_unop (yylval.word->word)) + else if (tok == WORD && yylval.word->word[0] == '-' && yylval.word->word[2] == 0 && test_unop (yylval.word->word)) { op = yylval.word; tok = read_token (READ); @@ -2986,11 +3838,11 @@ cond_term () dispose_word (op); if (etext = error_token_from_token (tok)) { - parser_error (line_number, "unexpected argument `%s' to conditional unary operator", etext); + parser_error (line_number, _("unexpected argument `%s' to conditional unary operator"), etext); free (etext); } else - parser_error (line_number, "unexpected argument to conditional unary operator"); + parser_error (line_number, _("unexpected argument to conditional unary operator")); COND_RETURN_ERROR (); } @@ -3005,6 +3857,13 @@ cond_term () tok = read_token (READ); if (tok == WORD && test_binop (yylval.word->word)) op = yylval.word; +#if defined (COND_REGEXP) + else if (tok == WORD && STREQ (yylval.word->word, "=~")) + { + op = yylval.word; + parser_state |= PST_REGEXP; + } +#endif else if (tok == '<' || tok == '>') op = make_word_from_token (tok); /* ( */ /* There should be a check before blindly accepting the `)' that we have @@ -3023,17 +3882,18 @@ cond_term () { if (etext = error_token_from_token (tok)) { - parser_error (line_number, "unexpected token `%s', conditional binary operator expected", etext); + parser_error (line_number, _("unexpected token `%s', conditional binary operator expected"), etext); free (etext); } else - parser_error (line_number, "conditional binary operator expected"); + parser_error (line_number, _("conditional binary operator expected")); dispose_cond_node (tleft); COND_RETURN_ERROR (); } /* rhs */ tok = read_token (READ); + parser_state &= ~PST_REGEXP; if (tok == WORD) { tright = make_cond_node (COND_TERM, yylval.word, (COND_COM *)NULL, (COND_COM *)NULL); @@ -3043,11 +3903,11 @@ cond_term () { if (etext = error_token_from_token (tok)) { - parser_error (line_number, "unexpected argument `%s' to conditional binary operator", etext); + parser_error (line_number, _("unexpected argument `%s' to conditional binary operator"), etext); free (etext); } else - parser_error (line_number, "unexpected argument to conditional binary operator"); + parser_error (line_number, _("unexpected argument to conditional binary operator")); dispose_cond_node (tleft); dispose_word (op); COND_RETURN_ERROR (); @@ -3058,14 +3918,14 @@ cond_term () else { if (tok < 256) - parser_error (line_number, "unexpected token `%c' in conditional command", tok); + parser_error (line_number, _("unexpected token `%c' in conditional command"), tok); else if (etext = error_token_from_token (tok)) { - parser_error (line_number, "unexpected token `%s' in conditional command", etext); + parser_error (line_number, _("unexpected token `%s' in conditional command"), etext); free (etext); } else - parser_error (line_number, "unexpected token %d in conditional command", tok); + parser_error (line_number, _("unexpected token %d in conditional command"), tok); COND_RETURN_ERROR (); } return (term); @@ -3096,11 +3956,12 @@ token_is_assignment (t, i) c = t[i]; c1 = t[i+1]; t[i] = '='; t[i+1] = '\0'; - r = assignment (t); + r = assignment (t, (parser_state & PST_COMPASSIGN) != 0); t[i] = c; t[i+1] = c1; return r; } +/* XXX - possible changes here for `+=' */ static int token_is_ident (t, i) char *t; @@ -3133,6 +3994,10 @@ read_token_word (character) /* DOLLAR_PRESENT becomes non-zero if we see a `$'. */ int dollar_present; + /* COMPOUND_ASSIGNMENT becomes non-zero if we are parsing a compound + assignment. */ + int compound_assignment; + /* QUOTED becomes non-zero if we see one of ("), ('), (`), or (\). */ int quoted; @@ -3152,7 +4017,7 @@ read_token_word (character) token_index = 0; all_digit_token = DIGIT (character); - dollar_present = quoted = pass_next_character = 0; + dollar_present = quoted = pass_next_character = compound_assignment = 0; for (;;) { @@ -3162,7 +4027,7 @@ read_token_word (character) if (pass_next_character) { pass_next_character = 0; - goto got_character; + goto got_escaped_character; } cd = current_delimiter (dstack); @@ -3198,7 +4063,7 @@ read_token_word (character) if MBTEST(shellquote (character)) { push_delimiter (dstack, character); - ttok = parse_matched_pair (character, character, character, &ttoklen, 0); + ttok = parse_matched_pair (character, character, character, &ttoklen, (character == '`') ? P_COMMAND : 0); pop_delimiter (dstack); if (ttok == &matched_pair_error) return -1; /* Bail immediately. */ @@ -3214,9 +4079,34 @@ read_token_word (character) goto next_character; } +#ifdef COND_REGEXP + /* When parsing a regexp as a single word inside a conditional command, + we need to special-case characters special to both the shell and + regular expressions. Right now, that is only '(' and '|'. */ /*)*/ + if MBTEST((parser_state & PST_REGEXP) && (character == '(' || character == '|')) /*)*/ + { + if (character == '|') + goto got_character; + + push_delimiter (dstack, character); + ttok = parse_matched_pair (cd, '(', ')', &ttoklen, 0); + pop_delimiter (dstack); + if (ttok == &matched_pair_error) + return -1; /* Bail immediately. */ + RESIZE_MALLOCED_BUFFER (token, token_index, ttoklen + 2, + token_buffer_size, TOKEN_DEFAULT_GROW_SIZE); + token[token_index++] = character; + strcpy (token + token_index, ttok); + token_index += ttoklen; + FREE (ttok); + dollar_present = all_digit_token = 0; + goto next_character; + } +#endif /* COND_REGEXP */ + #ifdef EXTENDED_GLOB /* Parse a ksh-style extended pattern matching specification. */ - if (extended_glob && PATTERN_CHAR (character)) + if MBTEST(extended_glob && PATTERN_CHAR (character)) { peek_char = shell_getc (1); if MBTEST(peek_char == '(') /* ) */ @@ -3261,7 +4151,7 @@ read_token_word (character) history literally rather than causing a possibly- incorrect `;' to be added. ) */ push_delimiter (dstack, peek_char); - ttok = parse_matched_pair (cd, '(', ')', &ttoklen, 0); + ttok = parse_comsub (cd, '(', ')', &ttoklen, P_COMMAND); pop_delimiter (dstack); } else @@ -3297,13 +4187,14 @@ read_token_word (character) { ttrans = ansiexpand (ttok, 0, ttoklen - 1, &ttranslen); free (ttok); + /* Insert the single quotes and correctly quote any embedded single quotes (allowed because P_ALLOWESC was passed to parse_matched_pair). */ ttok = sh_single_quote (ttrans); free (ttrans); + ttranslen = strlen (ttok); ttrans = ttok; - ttranslen = strlen (ttrans); } else { @@ -3312,12 +4203,9 @@ read_token_word (character) free (ttok); /* Add the double quotes back */ - ttok = (char *)xmalloc (ttranslen + 3); - ttok[0] = '"'; - strcpy (ttok + 1, ttrans); - ttok[ttranslen + 1] = '"'; - ttok[ttranslen += 2] = '\0'; + ttok = sh_mkdoublequoted (ttrans, ttranslen, 0); free (ttrans); + ttranslen += 2; ttrans = ttok; } @@ -3353,8 +4241,12 @@ read_token_word (character) } #if defined (ARRAY_VARS) - /* Identify possible array subscript assignment; match [...] */ - else if MBTEST(character == '[' && token_index > 0 && assignment_acceptable (last_read_token) && token_is_ident (token, token_index)) /* ] */ + /* Identify possible array subscript assignment; match [...]. If + parser_state&PST_COMPASSIGN, we need to parse [sub]=words treating + `sub' as if it were enclosed in double quotes. */ + else if MBTEST(character == '[' && /* ] */ + ((token_index > 0 && assignment_acceptable (last_read_token) && token_is_ident (token, token_index)) || + (token_index == 0 && (parser_state&PST_COMPASSIGN)))) { ttok = parse_matched_pair (cd, '[', ']', &ttoklen, 0); if (ttok == &matched_pair_error) @@ -3370,7 +4262,7 @@ read_token_word (character) goto next_character; } /* Identify possible compound array variable assignment. */ - else if MBTEST(character == '=' && token_index > 0 && token_is_assignment (token, token_index)) + else if MBTEST(character == '=' && token_index > 0 && (assignment_acceptable (last_read_token) || (parser_state & PST_ASSIGNOK)) && token_is_assignment (token, token_index)) { peek_char = shell_getc (1); if MBTEST(peek_char == '(') /* ) */ @@ -3391,7 +4283,12 @@ read_token_word (character) token[token_index++] = ')'; FREE (ttok); all_digit_token = 0; + compound_assignment = 1; +#if 1 goto next_character; +#else + goto got_token; /* ksh93 seems to do this */ +#endif } else shell_ungetc (peek_char); @@ -3408,20 +4305,21 @@ read_token_word (character) got_character: - all_digit_token &= DIGIT (character); - dollar_present |= character == '$'; - if (character == CTLESC || character == CTLNUL) token[token_index++] = CTLESC; + got_escaped_character: + + all_digit_token &= DIGIT (character); + dollar_present |= character == '$'; + token[token_index++] = character; RESIZE_MALLOCED_BUFFER (token, token_index, 1, token_buffer_size, TOKEN_DEFAULT_GROW_SIZE); next_character: - if (character == '\n' && interactive && - (bash_input.type == st_stdin || bash_input.type == st_stream)) + if (character == '\n' && SHOULD_PROMPT ()) prompt_again (); /* We want to remove quoted newlines (that is, a \ pair) @@ -3486,27 +4384,48 @@ got_token: if (dollar_present) the_word->flags |= W_HASDOLLAR; if (quoted) - the_word->flags |= W_QUOTED; + the_word->flags |= W_QUOTED; /*(*/ + if (compound_assignment && token[token_index-1] == ')') + the_word->flags |= W_COMPASSIGN; /* A word is an assignment if it appears at the beginning of a simple command, or after another assignment word. This is context-dependent, so it cannot be handled in the grammar. */ - if (assignment (token)) + if (assignment (token, (parser_state & PST_COMPASSIGN) != 0)) { the_word->flags |= W_ASSIGNMENT; /* Don't perform word splitting on assignment statements. */ - if (assignment_acceptable (last_read_token)) + if (assignment_acceptable (last_read_token) || (parser_state & PST_COMPASSIGN) != 0) the_word->flags |= W_NOSPLIT; } + if (command_token_position (last_read_token)) + { + struct builtin *b; + b = builtin_address_internal (token, 0); + if (b && (b->flags & ASSIGNMENT_BUILTIN)) + parser_state |= PST_ASSIGNOK; + else if (STREQ (token, "eval") || STREQ (token, "let")) + parser_state |= PST_ASSIGNOK; + } + yylval.word = the_word; result = ((the_word->flags & (W_ASSIGNMENT|W_NOSPLIT)) == (W_ASSIGNMENT|W_NOSPLIT)) ? ASSIGNMENT_WORD : WORD; - if (last_read_token == FUNCTION) + switch (last_read_token) { + case FUNCTION: parser_state |= PST_ALLOWOPNBRC; function_dstart = line_number; + break; + case CASE: + case SELECT: + case FOR: + if (word_top < MAX_CASE_NEST) + word_top++; + word_lineno[word_top] = line_number; + break; } return (result); @@ -3539,14 +4458,21 @@ reserved_word_acceptable (toksym) case IF: case OR_OR: case SEMI_SEMI: + case SEMI_AND: + case SEMI_SEMI_AND: case THEN: case TIME: case TIMEOPT: + case COPROC: case UNTIL: case WHILE: case 0: return 1; default: +#if defined (COPROCESS_SUPPORT) + if (last_read_token == WORD && token_before_that == COPROC) + return 1; +#endif return 0; } } @@ -3597,9 +4523,10 @@ reset_readline_prompt () /* A list of tokens which can be followed by newlines, but not by semi-colons. When concatenating multiple lines of history, the newline separator for such tokens is replaced with a space. */ -static int no_semi_successors[] = { +static const int no_semi_successors[] = { '\n', '{', '(', ')', ';', '&', '|', - CASE, DO, ELSE, IF, SEMI_SEMI, THEN, UNTIL, WHILE, AND_AND, OR_OR, IN, + CASE, DO, ELSE, IF, SEMI_SEMI, SEMI_AND, SEMI_SEMI_AND, THEN, UNTIL, + WHILE, AND_AND, OR_OR, IN, 0 }; @@ -3614,7 +4541,13 @@ history_delimiting_chars () if (dstack.delimiter_depth != 0) return ("\n"); - + + /* We look for current_command_line_count == 2 because we are looking to + add the first line of the body of the here document (the second line + of the command). */ + if (parser_state & PST_HEREDOC) + return (current_command_line_count == 2 ? "\n" : ""); + /* First, handle some special cases. */ /*(*/ /* If we just read `()', assume it's a function definition, and don't @@ -3640,12 +4573,14 @@ history_delimiting_chars () { /* Tricky. `for i\nin ...' should not have a semicolon, but `for i\ndo ...' should. We do what we can. */ - for (i = shell_input_line_index; whitespace(shell_input_line[i]); i++) + for (i = shell_input_line_index; whitespace (shell_input_line[i]); i++) ; if (shell_input_line[i] && shell_input_line[i] == 'i' && shell_input_line[i+1] == 'n') return " "; return ";"; } + else if (two_tokens_ago == CASE && token_before_that == WORD && (parser_state & PST_CASESTMT)) + return " "; for (i = 0; no_semi_successors[i]; i++) { @@ -3664,7 +4599,7 @@ prompt_again () { char *temp_prompt; - if (!interactive) /* XXX */ + if (interactive == 0 || expanding_alias()) /* XXX */ return; ps1_prompt = get_string_value ("PS1"); @@ -3763,7 +4698,7 @@ decode_prompt_string (string) int last_exit_value; #if defined (PROMPT_STRING_DECODE) int result_size, result_index; - int c, n; + int c, n, i; char *temp, octal_string[4]; struct tm *tm; time_t the_time; @@ -3857,7 +4792,11 @@ decode_prompt_string (string) else if (c == 'A') n = strftime (timebuf, sizeof (timebuf), "%H:%M", tm); - timebuf[sizeof(timebuf) - 1] = '\0'; + if (n == 0) + timebuf[0] = '\0'; + else + timebuf[sizeof(timebuf) - 1] = '\0'; + temp = savestring (timebuf); goto add_string; @@ -3882,7 +4821,11 @@ decode_prompt_string (string) n = strftime (timebuf, sizeof (timebuf), timefmt, tm); free (timefmt); - timebuf[sizeof(timebuf) - 1] = '\0'; + if (n == 0) + timebuf[0] = '\0'; + else + timebuf[sizeof(timebuf) - 1] = '\0'; + if (promptvars || posixly_correct) /* Make sure that expand_prompt_string is called with a second argument of Q_DOUBLE_QUOTES if we use this @@ -3941,7 +4884,8 @@ decode_prompt_string (string) #define ROOT_PATH(x) ((x)[0] == '/' && (x)[1] == 0) #define DOUBLE_SLASH_ROOT(x) ((x)[0] == '/' && (x)[1] == '/' && (x)[2] == 0) - if (c == 'W') + /* Abbreviate \W as ~ if $PWD == $HOME */ + if (c == 'W' && (((t = get_string_value ("HOME")) == 0) || STREQ (t, t_string) == 0)) { if (ROOT_PATH (t_string) == 0 && DOUBLE_SLASH_ROOT (t_string) == 0) { @@ -3957,6 +4901,7 @@ decode_prompt_string (string) no longer than PATH_MAX - 1 characters. */ strcpy (t_string, polite_directory_format (t_string)); + temp = trim_pathname (t_string, PATH_MAX - 1); /* If we're going to be expanding the prompt string later, quote the directory name. */ if (promptvars || posixly_correct) @@ -4020,10 +4965,18 @@ decode_prompt_string (string) #if defined (READLINE) case '[': case ']': + if (no_line_editing) + { + string++; + break; + } temp = (char *)xmalloc (3); - temp[0] = '\001'; - temp[1] = (c == '[') ? RL_PROMPT_START_IGNORE : RL_PROMPT_END_IGNORE; - temp[2] = '\0'; + n = (c == '[') ? RL_PROMPT_START_IGNORE : RL_PROMPT_END_IGNORE; + i = 0; + if (n == CTLESC || n == CTLNUL) + temp[i++] = CTLESC; + temp[i++] = n; + temp[i] = '\0'; goto add_string; #endif /* READLINE */ @@ -4083,7 +5036,7 @@ not_escape: if (promptvars || posixly_correct) { last_exit_value = last_command_exit_value; - list = expand_prompt_string (result, Q_DOUBLE_QUOTES); + list = expand_prompt_string (result, Q_DOUBLE_QUOTES, 0); free (result); result = string_list (list); dispose_words (list); @@ -4119,15 +5072,15 @@ yyerror (msg) } static char * -error_token_from_token (token) - int token; +error_token_from_token (tok) + int tok; { char *t; - if (t = find_token_in_alist (token, word_token_alist, 0)) + if (t = find_token_in_alist (tok, word_token_alist, 0)) return t; - if (t = find_token_in_alist (token, other_token_alist, 0)) + if (t = find_token_in_alist (tok, other_token_alist, 0)) return t; t = (char *)NULL; @@ -4239,7 +5192,7 @@ report_syntax_error (message) parser's complaining about by looking at current_token. */ if (current_token != 0 && EOF_Reached == 0 && (msg = error_token_from_token (current_token))) { - parser_error (line_number, "syntax error near unexpected token `%s'", msg); + parser_error (line_number, _("syntax error near unexpected token `%s'"), msg); free (msg); if (interactive == 0) @@ -4257,7 +5210,7 @@ report_syntax_error (message) msg = error_token_from_text (); if (msg) { - parser_error (line_number, "syntax error near `%s'", msg); + parser_error (line_number, _("syntax error near `%s'"), msg); free (msg); } @@ -4267,7 +5220,7 @@ report_syntax_error (message) } else { - msg = EOF_Reached ? "syntax error: unexpected end of file" : "syntax error"; + msg = EOF_Reached ? _("syntax error: unexpected end of file") : _("syntax error"); parser_error (line_number, "%s", msg); /* When the shell is interactive, this file uses EOF_Reached only for error reporting. Other mechanisms are used to @@ -4329,7 +5282,7 @@ handle_eof_input_unit () { if (eof_encountered < eof_encountered_limit) { - fprintf (stderr, "Use \"%s\" to leave the shell.\n", + fprintf (stderr, _("Use \"%s\" to leave the shell.\n"), login_shell ? "logout" : "exit"); eof_encountered++; /* Reset the parsing state. */ @@ -4366,8 +5319,9 @@ static WORD_LIST parse_string_error; /* Take a string and run it through the shell parser, returning the resultant word list. Used by compound array assignment. */ WORD_LIST * -parse_string_to_word_list (s, whom) +parse_string_to_word_list (s, flags, whom) char *s; + int flags; const char *whom; { WORD_LIST *wl; @@ -4399,6 +5353,10 @@ parse_string_to_word_list (s, whom) with_input_from_string (s, whom); wl = (WORD_LIST *)NULL; + + if (flags & 1) + parser_state |= PST_COMPASSIGN|PST_REPARSE; + while ((tok = read_token (READ)) != yacc_EOF) { if (tok == '\n' && *bash_input.location.string == '\0') @@ -4410,7 +5368,7 @@ parse_string_to_word_list (s, whom) line_number = orig_line_number + line_number - 1; orig_current_token = current_token; current_token = tok; - yyerror ((char *)NULL); /* does the right thing */ + yyerror (NULL); /* does the right thing */ current_token = orig_current_token; if (wl) dispose_words (wl); @@ -4436,6 +5394,9 @@ parse_string_to_word_list (s, whom) current_command_line_count = orig_line_count; shell_input_line_terminator = orig_input_terminator; + if (flags & 1) + parser_state &= ~(PST_COMPASSIGN|PST_REPARSE); + if (wl == &parse_string_error) { last_command_exit_value = EXECUTION_FAILURE; @@ -4453,30 +5414,39 @@ parse_compound_assignment (retlenp) int *retlenp; { WORD_LIST *wl, *rl; - int tok, orig_line_number, orig_token_size; + int tok, orig_line_number, orig_token_size, orig_last_token, assignok; char *saved_token, *ret; saved_token = token; orig_token_size = token_buffer_size; orig_line_number = line_number; + orig_last_token = last_read_token; last_read_token = WORD; /* WORD to allow reserved words here */ token = (char *)NULL; token_buffer_size = 0; + assignok = parser_state&PST_ASSIGNOK; /* XXX */ + wl = (WORD_LIST *)NULL; /* ( */ + parser_state |= PST_COMPASSIGN; + while ((tok = read_token (READ)) != ')') { if (tok == '\n') /* Allow newlines in compound assignments */ - continue; + { + if (SHOULD_PROMPT ()) + prompt_again (); + continue; + } if (tok != WORD && tok != ASSIGNMENT_WORD) { current_token = tok; /* for error reporting */ if (tok == yacc_EOF) /* ( */ - parser_error (orig_line_number, "unexpected EOF while looking for matching `)'"); + parser_error (orig_line_number, _("unexpected EOF while looking for matching `)'")); else - yyerror ((char *)NULL); /* does the right thing */ + yyerror(NULL); /* does the right thing */ if (wl) dispose_words (wl); wl = &parse_string_error; @@ -4489,6 +5459,8 @@ parse_compound_assignment (retlenp) token = saved_token; token_buffer_size = orig_token_size; + parser_state &= ~PST_COMPASSIGN; + if (wl == &parse_string_error) { last_command_exit_value = EXECUTION_FAILURE; @@ -4499,7 +5471,8 @@ parse_compound_assignment (retlenp) jump_to_top_level (DISCARD); } - last_read_token = WORD; + last_read_token = orig_last_token; /* XXX - was WORD? */ + if (wl) { rl = REVERSE_LIST (wl, WORD_LIST *); @@ -4511,11 +5484,114 @@ parse_compound_assignment (retlenp) if (retlenp) *retlenp = (ret && *ret) ? strlen (ret) : 0; + + if (assignok) + parser_state |= PST_ASSIGNOK; + return ret; } /************************************************ * * + * SAVING AND RESTORING PARTIAL PARSE STATE * + * * + ************************************************/ + +sh_parser_state_t * +save_parser_state (ps) + sh_parser_state_t *ps; +{ +#if defined (ARRAY_VARS) + SHELL_VAR *v; +#endif + + if (ps == 0) + ps = (sh_parser_state_t *)xmalloc (sizeof (sh_parser_state_t)); + if (ps == 0) + return ((sh_parser_state_t *)NULL); + + ps->parser_state = parser_state; + ps->token_state = save_token_state (); + + ps->input_line_terminator = shell_input_line_terminator; + ps->eof_encountered = eof_encountered; + + ps->current_command_line_count = current_command_line_count; + +#if defined (HISTORY) + ps->remember_on_history = remember_on_history; +# if defined (BANG_HISTORY) + ps->history_expansion_inhibited = history_expansion_inhibited; +# endif +#endif + + ps->last_command_exit_value = last_command_exit_value; +#if defined (ARRAY_VARS) + v = find_variable ("PIPESTATUS"); + if (v && array_p (v) && array_cell (v)) + ps->pipestatus = array_copy (array_cell (v)); + else + ps->pipestatus = (ARRAY *)NULL; +#endif + + ps->last_shell_builtin = last_shell_builtin; + ps->this_shell_builtin = this_shell_builtin; + + ps->expand_aliases = expand_aliases; + ps->echo_input_at_read = echo_input_at_read; + + return (ps); +} + +void +restore_parser_state (ps) + sh_parser_state_t *ps; +{ +#if defined (ARRAY_VARS) + SHELL_VAR *v; +#endif + + if (ps == 0) + return; + + parser_state = ps->parser_state; + if (ps->token_state) + { + restore_token_state (ps->token_state); + free (ps->token_state); + } + + shell_input_line_terminator = ps->input_line_terminator; + eof_encountered = ps->eof_encountered; + + current_command_line_count = ps->current_command_line_count; + +#if defined (HISTORY) + remember_on_history = ps->remember_on_history; +# if defined (BANG_HISTORY) + history_expansion_inhibited = ps->history_expansion_inhibited; +# endif +#endif + + last_command_exit_value = ps->last_command_exit_value; +#if defined (ARRAY_VARS) + v = find_variable ("PIPESTATUS"); + if (v && array_p (v) && array_cell (v)) + { + array_dispose (array_cell (v)); + var_setarray (v, ps->pipestatus); + } +#endif + + last_shell_builtin = ps->last_shell_builtin; + this_shell_builtin = ps->this_shell_builtin; + + expand_aliases = ps->expand_aliases; + echo_input_at_read = ps->echo_input_at_read; +} + +/************************************************ + * * * MULTIBYTE CHARACTER HANDLING * * * ************************************************/ @@ -4524,7 +5600,7 @@ parse_compound_assignment (retlenp) static void set_line_mbstate () { - int i, previ, len; + int i, previ, len, c; mbstate_t mbs, prevs; size_t mbclen; @@ -4539,7 +5615,8 @@ set_line_mbstate () { mbs = prevs; - if (shell_input_line[i] == EOF) + c = shell_input_line[i]; + if (c == EOF) { int j; for (j = i; j < len; j++) @@ -4563,7 +5640,11 @@ set_line_mbstate () } else { - /* mbrlen doesn't return any other values */ + /* XXX - what to do if mbrlen returns 0? (null wide character) */ + int j; + for (j = i; j < len; j++) + shell_input_line_property[j] = 1; + break; } shell_input_line_property[i] = mbclen;