X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=parse.y;h=8461337eed87db6cddd33119740a0af9d24cd112;hb=3185942a5234e26ab13fa02f9c51d340cec514f8;hp=363430e5b60a9c286004fc108c4a11c3a60c07ff;hpb=b80f6443b6b7b620c7272664c66ecb0b120a0998;p=platform%2Fupstream%2Fbash.git diff --git a/parse.y b/parse.y index 363430e..8461337 100644 --- a/parse.y +++ b/parse.y @@ -1,22 +1,22 @@ -/* Yacc grammar for bash. */ +/* parse.y - Yacc grammar for bash. */ -/* Copyright (C) 1989-2004 Free Software Foundation, Inc. +/* Copyright (C) 1989-2009 Free Software Foundation, Inc. This file is part of GNU Bash, the Bourne Again SHell. - Bash is free software; you can redistribute it and/or modify it under - the terms of the GNU General Public License as published by the Free - Software Foundation; either version 2, or (at your option) any later - version. + Bash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - Bash is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - for more details. + Bash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with Bash; see the file LICENSE. If not, write to the Free Software - Foundation, 59 Temple Place, Suite 330, Boston, MA 02111 USA. */ + You should have received a copy of the GNU General Public License + along with Bash. If not, see . +*/ %{ #include "config.h" @@ -50,6 +50,7 @@ #include "parser.h" #include "mailcheck.h" #include "test.h" +#include "builtins.h" #include "builtins/common.h" #include "builtins/builtext.h" @@ -114,10 +115,9 @@ extern int extended_glob; extern int eof_encountered; extern int no_line_editing, running_under_emacs; extern int current_command_number; -extern int sourcelevel; +extern int sourcelevel, parse_and_execute_level; extern int posixly_correct; extern int last_command_exit_value; -extern int interrupt_immediately; extern char *shell_name, *current_host_name; extern char *dist_version; extern int patch_level; @@ -148,6 +148,7 @@ static int yy_readline_unget __P((int)); static int yy_string_get __P((void)); static int yy_string_unget __P((int)); +static void rewind_input_string __P((void)); static int yy_stream_get __P((void)); static int yy_stream_unget __P((int)); @@ -170,6 +171,7 @@ static int time_command_acceptable __P((void)); static int special_case_tokens __P((char *)); static int read_token __P((int)); static char *parse_matched_pair __P((int, int, int, int *, int)); +static char *parse_comsub __P((int, int, int, int *, int)); #if defined (ARRAY_VARS) static char *parse_compound_assignment __P((int *)); #endif @@ -205,10 +207,6 @@ static void reset_readline_prompt __P((void)); #endif static void print_prompt __P((void)); -#if defined (HISTORY) -char *history_delimiting_chars __P((void)); -#endif - #if defined (HANDLE_MULTIBYTE) static void set_line_mbstate __P((void)); static char *shell_input_line_property = NULL; @@ -255,6 +253,12 @@ static char *current_decoded_prompt; /* The number of lines read from input while creating the current command. */ int current_command_line_count; +/* The token that currently denotes the end of parse. */ +int shell_eof_token; + +/* The token currently being read. */ +int current_token; + /* Variables to manage the task of reading here documents, because we need to defer the reading until after a complete command has been collected. */ static REDIRECT *redir_stack[10]; @@ -279,6 +283,19 @@ static int function_bstart; /* The line number in a script at which an arithmetic for command starts. */ static int arith_for_lineno; +/* The current parser state. */ +static int parser_state; + +/* The last read token, or NULL. read_token () uses this for context + checking. */ +static int last_read_token; + +/* The token read prior to last_read_token. */ +static int token_before_that; + +/* The token read prior to token_before_that. */ +static int two_tokens_ago; + /* The line number in a script where the word in a `case WORD', `select WORD' or `for WORD' begins. This is a nested command maximum, since the array index is decremented after a case, select, or for command is parsed. */ @@ -310,7 +327,7 @@ static REDIRECTEE redir; in the case that they are preceded by a list_terminator. Members of the second group are for [[...]] commands. Members of the third group are recognized only under special circumstances. */ -%token IF THEN ELSE ELIF FI CASE ESAC FOR SELECT WHILE UNTIL DO DONE FUNCTION +%token IF THEN ELSE ELIF FI CASE ESAC FOR SELECT WHILE UNTIL DO DONE FUNCTION COPROC %token COND_START COND_END COND_ERROR %token IN BANG TIME TIMEOPT @@ -320,8 +337,9 @@ static REDIRECTEE redir; %token ARITH_CMD ARITH_FOR_EXPRS %token COND_CMD %token AND_AND OR_OR GREATER_GREATER LESS_LESS LESS_AND LESS_LESS_LESS -%token GREATER_AND SEMI_SEMI LESS_LESS_MINUS AND_GREATER LESS_GREATER -%token GREATER_BAR +%token GREATER_AND SEMI_SEMI SEMI_AND SEMI_SEMI_AND +%token LESS_LESS_MINUS AND_GREATER AND_GREATER_GREATER LESS_GREATER +%token GREATER_BAR BAR_AND /* The types that the various syntactical units return. */ @@ -332,6 +350,7 @@ static REDIRECTEE redir; %type arith_command %type cond_command %type arith_for_command +%type coproc %type function_def function_body if_command elif_clause subshell %type redirection redirection_list %type simple_command_element @@ -344,7 +363,7 @@ static REDIRECTEE redir; %left '&' ';' '\n' yacc_EOF %left AND_AND OR_OR -%right '|' +%right '|' BAR_AND %% inputunit: simple_list simple_list_terminator @@ -354,6 +373,8 @@ inputunit: simple_list simple_list_terminator global_command = $1; eof_encountered = 0; /* discard_parser_constructs (0); */ + if (parser_state & PST_CMDSUBST) + parser_state |= PST_EOFTOKEN; YYACCEPT; } | '\n' @@ -361,6 +382,8 @@ inputunit: simple_list simple_list_terminator /* Case of regular command, but not a very interesting one. Return a NULL command. */ global_command = (COMMAND *)NULL; + if (parser_state & PST_CMDSUBST) + parser_state |= PST_EOFTOKEN; YYACCEPT; } | error '\n' @@ -369,7 +392,7 @@ inputunit: simple_list simple_list_terminator global_command = (COMMAND *)NULL; eof_encountered = 0; /* discard_parser_constructs (1); */ - if (interactive) + if (interactive && parse_and_execute_level == 0) { YYACCEPT; } @@ -525,6 +548,11 @@ redirection: '>' WORD redir.filename = $2; $$ = make_redirection (1, r_err_and_out, redir); } + | AND_GREATER_GREATER WORD + { + redir.filename = $2; + $$ = make_redirection (1, r_append_err_and_out, redir); + } | NUMBER LESS_GREATER WORD { redir.filename = $3; @@ -598,6 +626,8 @@ command: simple_command } | function_def { $$ = $1; } + | coproc + { $$ = $1; } ; shell_command: for_command @@ -747,7 +777,6 @@ function_def: WORD '(' ')' newline_list function_body { $$ = make_function_def ($2, $4, function_dstart, function_bstart); } ; - function_body: shell_command { $$ = $1; } | shell_command redirection_list @@ -788,6 +817,57 @@ subshell: '(' compound_list ')' } ; +coproc: COPROC shell_command + { + $$ = make_coproc_command ("COPROC", $2); + $$->flags |= CMD_WANT_SUBSHELL|CMD_COPROC_SUBSHELL; + } + | COPROC shell_command redirection_list + { + COMMAND *tc; + + tc = $2; + if (tc->redirects) + { + register REDIRECT *t; + for (t = tc->redirects; t->next; t = t->next) + ; + t->next = $3; + } + else + tc->redirects = $3; + $$ = make_coproc_command ("COPROC", $2); + $$->flags |= CMD_WANT_SUBSHELL|CMD_COPROC_SUBSHELL; + } + | COPROC WORD shell_command + { + $$ = make_coproc_command ($2->word, $3); + $$->flags |= CMD_WANT_SUBSHELL|CMD_COPROC_SUBSHELL; + } + | COPROC WORD shell_command redirection_list + { + COMMAND *tc; + + tc = $3; + if (tc->redirects) + { + register REDIRECT *t; + for (t = tc->redirects; t->next; t = t->next) + ; + t->next = $4; + } + else + tc->redirects = $4; + $$ = make_coproc_command ($2->word, $3); + $$->flags |= CMD_WANT_SUBSHELL|CMD_COPROC_SUBSHELL; + } + | COPROC simple_command + { + $$ = make_coproc_command ("COPROC", clean_simple_command ($2)); + $$->flags |= CMD_WANT_SUBSHELL|CMD_COPROC_SUBSHELL; + } + ; + if_command: IF compound_list THEN compound_list FI { $$ = make_if_command ($2, $4, (COMMAND *)NULL); } | IF compound_list THEN compound_list ELSE compound_list FI @@ -833,8 +913,17 @@ pattern_list: newline_list pattern ')' compound_list ; case_clause_sequence: pattern_list SEMI_SEMI + { $$ = $1; } | case_clause_sequence pattern_list SEMI_SEMI { $2->next = $1; $$ = $2; } + | pattern_list SEMI_AND + { $1->flags |= CASEPAT_FALLTHROUGH; $$ = $1; } + | case_clause_sequence pattern_list SEMI_AND + { $2->flags |= CASEPAT_FALLTHROUGH; $2->next = $1; $$ = $2; } + | pattern_list SEMI_SEMI_AND + { $1->flags |= CASEPAT_TESTNEXT; $$ = $1; } + | case_clause_sequence pattern_list SEMI_SEMI_AND + { $2->flags |= CASEPAT_TESTNEXT; $2->next = $1; $$ = $2; } ; pattern: WORD @@ -921,6 +1010,13 @@ simple_list: simple_list1 $$ = $1; if (need_here_doc) gather_here_documents (); + if ((parser_state & PST_CMDSUBST) && current_token == shell_eof_token) + { + global_command = $1; + eof_encountered = 0; + rewind_input_string (); + YYACCEPT; + } } | simple_list1 '&' { @@ -930,12 +1026,26 @@ simple_list: simple_list1 $$ = command_connect ($1, (COMMAND *)NULL, '&'); if (need_here_doc) gather_here_documents (); + if ((parser_state & PST_CMDSUBST) && current_token == shell_eof_token) + { + global_command = $1; + eof_encountered = 0; + rewind_input_string (); + YYACCEPT; + } } | simple_list1 ';' { $$ = $1; if (need_here_doc) gather_here_documents (); + if ((parser_state & PST_CMDSUBST) && current_token == shell_eof_token) + { + global_command = $1; + eof_encountered = 0; + rewind_input_string (); + YYACCEPT; + } } ; @@ -961,22 +1071,26 @@ pipeline_command: pipeline { $$ = $1; } | BANG pipeline { - $2->flags |= CMD_INVERT_RETURN; + if ($2) + $2->flags |= CMD_INVERT_RETURN; $$ = $2; } | timespec pipeline { - $2->flags |= $1; + if ($2) + $2->flags |= $1; $$ = $2; } | timespec BANG pipeline { - $3->flags |= $1|CMD_INVERT_RETURN; + if ($3) + $3->flags |= $1|CMD_INVERT_RETURN; $$ = $3; } | BANG timespec pipeline { - $3->flags |= $2|CMD_INVERT_RETURN; + if ($3) + $3->flags |= $2|CMD_INVERT_RETURN; $$ = $3; } | timespec list_terminator @@ -999,9 +1113,30 @@ pipeline_command: pipeline ; -pipeline: - pipeline '|' newline_list pipeline +pipeline: pipeline '|' newline_list pipeline { $$ = command_connect ($1, $4, '|'); } + | pipeline BAR_AND newline_list pipeline + { + /* Make cmd1 |& cmd2 equivalent to cmd1 2>&1 | cmd2 */ + COMMAND *tc; + REDIRECTEE rd; + REDIRECT *r; + + tc = $1; + rd.dest = 1; + r = make_redirection (2, r_duplicating_output, rd); + if (tc->redirects) + { + register REDIRECT *t; + for (t = tc->redirects; t->next; t = t->next) + ; + t->next = r; + } + else + tc->redirects = r; + + $$ = command_connect ($1, $4, '|'); + } | command { $$ = $1; } ; @@ -1013,22 +1148,6 @@ timespec: TIME ; %% -/* Possible states for the parser that require it to do special things. */ -#define PST_CASEPAT 0x0001 /* in a case pattern list */ -#define PST_ALEXPNEXT 0x0002 /* expand next word for aliases */ -#define PST_ALLOWOPNBRC 0x0004 /* allow open brace for function def */ -#define PST_NEEDCLOSBRC 0x0008 /* need close brace */ -#define PST_DBLPAREN 0x0010 /* double-paren parsing */ -#define PST_SUBSHELL 0x0020 /* ( ... ) subshell */ -#define PST_CMDSUBST 0x0040 /* $( ... ) command substitution */ -#define PST_CASESTMT 0x0080 /* parsing a case statement */ -#define PST_CONDCMD 0x0100 /* parsing a [[...]] command */ -#define PST_CONDEXPR 0x0200 /* parsing the guts of [[...]] */ -#define PST_ARITHFOR 0x0400 /* parsing an arithmetic for command */ -#define PST_ALEXPAND 0x0800 /* OK to expand aliases - unused */ -#define PST_CMDTOKEN 0x1000 /* command token OK - unused */ -#define PST_COMPASSIGN 0x2000 /* parsing x=(...) compound assignment */ - /* Initial size to allocate for tokens, and the amount to grow them by. */ #define TOKEN_DEFAULT_INITIAL_SIZE 496 @@ -1044,22 +1163,6 @@ timespec: TIME # define expanding_alias() 0 #endif -/* The token currently being read. */ -static int current_token; - -/* The last read token, or NULL. read_token () uses this for context - checking. */ -static int last_read_token; - -/* The token read prior to last_read_token. */ -static int token_before_that; - -/* The token read prior to token_before_that. */ -static int two_tokens_ago; - -/* The current parser state. */ -static int parser_state; - /* Global var is non-zero when end of file has been reached. */ int EOF_Reached = 0; @@ -1206,10 +1309,12 @@ yy_readline_get () old_sigint = (SigHandler *)set_signal_handler (SIGINT, sigint_sighandler); interrupt_immediately++; } + terminate_immediately = 1; current_readline_line = readline (current_readline_prompt ? current_readline_prompt : ""); + terminate_immediately = 0; if (signal_is_ignored (SIGINT) == 0 && old_sigint) { interrupt_immediately--; @@ -1320,6 +1425,33 @@ with_input_from_string (string, name) init_yy_io (yy_string_get, yy_string_unget, st_string, name, location); } +/* Count the number of characters we've consumed from bash_input.location.string + and read into shell_input_line, but have not returned from shell_getc. + That is the true input location. Rewind bash_input.location.string by + that number of characters, so it points to the last character actually + consumed by the parser. */ +static void +rewind_input_string () +{ + int xchars; + + /* number of unconsumed characters in the input -- XXX need to take newlines + into account, e.g., $(...\n) */ + xchars = shell_input_line_len - shell_input_line_index; + if (bash_input.location.string[-1] == '\n') + xchars++; + + /* XXX - how to reflect bash_input.location.string back to string passed to + parse_and_execute or xparse_dolparen? xparse_dolparen needs to know how + far into the string we parsed. parse_and_execute knows where bash_input. + location.string is, and how far from orig_string that is -- that's the + number of characters the command consumed. */ + + /* bash_input.location.string - xchars should be where we parsed to */ + /* need to do more validation on xchars value for sanity -- test cases. */ + bash_input.location.string -= xchars; +} + /* **************************************************************** */ /* */ /* Let input come from STREAM. */ @@ -1339,8 +1471,19 @@ yy_stream_get () result = EOF; if (bash_input.location.file) - result = getc_with_restart (bash_input.location.file); - + { + if (interactive) + { + interrupt_immediately++; + terminate_immediately++; + } + result = getc_with_restart (bash_input.location.file); + if (interactive) + { + interrupt_immediately--; + terminate_immediately--; + } + } return (result); } @@ -1653,11 +1796,11 @@ read_a_line (remove_quoted_newline) pass_next = 0; while (1) { - c = yy_getc (); - /* Allow immediate exit if interrupted during input. */ QUIT; + c = yy_getc (); + /* Ignore null bytes in input. */ if (c == 0) { @@ -1726,10 +1869,27 @@ char * read_secondary_line (remove_quoted_newline) int remove_quoted_newline; { + char *ret; + int n, c; + prompt_string_pointer = &ps2_prompt; if (SHOULD_PROMPT()) prompt_again (); - return (read_a_line (remove_quoted_newline)); + ret = read_a_line (remove_quoted_newline); +#if defined (HISTORY) + if (remember_on_history && (parser_state & PST_HEREDOC)) + { + /* To make adding the the here-document body right, we need to rely + on history_delimiting_chars() returning \n for the first line of + the here-document body and the null string for the second and + subsequent lines, so we avoid double newlines. + current_command_line_count == 2 for the first line of the body. */ + + current_command_line_count++; + maybe_add_history (ret); + } +#endif /* HISTORY */ + return ret; } /* **************************************************************** */ @@ -1768,6 +1928,9 @@ STRING_INT_ALIST word_token_alist[] = { { "[[", COND_START }, { "]]", COND_END }, #endif +#if defined (COPROCESS_SUPPORT) + { "coproc", COPROC }, +#endif { (char *)NULL, 0} }; @@ -1782,11 +1945,15 @@ STRING_INT_ALIST other_token_alist[] = { { "<&", LESS_AND }, { ">&", GREATER_AND }, { ";;", SEMI_SEMI }, + { ";&", SEMI_AND }, + { ";;&", SEMI_SEMI_AND }, { "<<-", LESS_LESS_MINUS }, { "<<<", LESS_LESS_LESS }, { "&>", AND_GREATER }, + { "&>>", AND_GREATER_GREATER }, { "<>", LESS_GREATER }, { ">|", GREATER_BAR }, + { "|&", BAR_AND }, { "EOF", yacc_EOF }, /* Tokens whose value is the character itself */ { ">", '>' }, @@ -1858,10 +2025,15 @@ shell_getc (remove_quoted_newline) register int i; int c; unsigned char uc; - static int mustpop = 0; QUIT; + if (sigwinch_received) + { + sigwinch_received = 0; + get_new_window_size (0, (int *)0, (int *)0); + } + if (eol_ungetc_lookahead) { c = eol_ungetc_lookahead; @@ -2055,14 +2227,6 @@ shell_getc (remove_quoted_newline) if (uc) shell_input_line_index++; - if MBTEST(uc == '\\' && remove_quoted_newline && shell_input_line[shell_input_line_index] == '\n') - { - if (SHOULD_PROMPT ()) - prompt_again (); - line_number++; - goto restart_read; - } - #if defined (ALIAS) || defined (DPAREN_ARITHMETIC) /* If UC is NULL, we have reached the end of the current input string. If pushed_string_list is non-empty, it's time to pop to the previous string @@ -2078,6 +2242,14 @@ shell_getc (remove_quoted_newline) } #endif /* ALIAS || DPAREN_ARITHMETIC */ + if MBTEST(uc == '\\' && remove_quoted_newline && shell_input_line[shell_input_line_index] == '\n') + { + if (SHOULD_PROMPT ()) + prompt_again (); + line_number++; + goto restart_read; + } + if (!uc && shell_input_line_terminator == EOF) return ((shell_input_line_index != 0) ? '\n' : EOF); @@ -2125,8 +2297,8 @@ discard_until (character) } void -execute_prompt_command (command) - char *command; +execute_variable_command (command, vname) + char *command, *vname; { char *last_lastarg; sh_parser_state_t ps; @@ -2136,10 +2308,10 @@ execute_prompt_command (command) if (last_lastarg) last_lastarg = savestring (last_lastarg); - parse_and_execute (savestring (command), "PROMPT_COMMAND", SEVAL_NONINT|SEVAL_NOHIST); + parse_and_execute (savestring (command), vname, SEVAL_NONINT|SEVAL_NOHIST); restore_parser_state (&ps); - bind_variable ("_", last_lastarg); + bind_variable ("_", last_lastarg, 0); FREE (last_lastarg); if (token_to_read == '\n') /* reset_parser was called */ @@ -2186,6 +2358,15 @@ yylex () token_before_that = last_read_token; last_read_token = current_token; current_token = read_token (READ); + + if ((parser_state & PST_EOFTOKEN) && current_token == shell_eof_token) + { + current_token = yacc_EOF; + if (bash_input.type == st_string) + rewind_input_string (); + } + parser_state &= ~PST_EOFTOKEN; + return (current_token); } @@ -2196,10 +2377,14 @@ static int esacs_needed_count; void gather_here_documents () { - int r = 0; + int r; + + r = 0; while (need_here_doc) { - make_here_document (redir_stack[r++]); + parser_state |= PST_HEREDOC; + make_here_document (redir_stack[r++], line_number); + parser_state &= ~PST_HEREDOC; need_here_doc--; } } @@ -2210,7 +2395,7 @@ static int open_brace_count; #define command_token_position(token) \ (((token) == ASSIGNMENT_WORD) || \ - ((token) != SEMI_SEMI && reserved_word_acceptable(token))) + ((token) != SEMI_SEMI && (token) != SEMI_AND && (token) != SEMI_SEMI_AND && reserved_word_acceptable(token))) #define assignment_acceptable(token) \ (command_token_position(token) && ((parser_state & PST_CASEPAT) == 0)) @@ -2539,7 +2724,7 @@ read_token (command) #endif /* ALIAS */ /* Read a single word from input. Start by skipping blanks. */ - while ((character = shell_getc (1)) != EOF && whitespace (character)) + while ((character = shell_getc (1)) != EOF && shellblank (character)) ; if (character == EOF) @@ -2567,9 +2752,14 @@ read_token (command) parser_state &= ~PST_ALEXPNEXT; #endif /* ALIAS */ + parser_state &= ~PST_ASSIGNOK; + return (character); } + if (parser_state & PST_REGEXP) + goto tokword; + /* Shell meta-characters. */ if MBTEST(shellmeta (character) && ((parser_state & PST_DBLPAREN) == 0)) { @@ -2580,6 +2770,8 @@ read_token (command) parser_state &= ~PST_ALEXPNEXT; #endif /* ALIAS */ + parser_state &= ~PST_ASSIGNOK; + peek_char = shell_getc (1); if (character == peek_char) { @@ -2589,9 +2781,9 @@ read_token (command) /* If '<' then we could be at "<<" or at "<<-". We have to look ahead one more character. */ peek_char = shell_getc (1); - if (peek_char == '-') + if MBTEST(peek_char == '-') return (LESS_LESS_MINUS); - else if (peek_char == '<') + else if MBTEST(peek_char == '<') return (LESS_LESS_LESS); else { @@ -2607,7 +2799,15 @@ read_token (command) #if defined (ALIAS) parser_state &= ~PST_ALEXPNEXT; #endif /* ALIAS */ - return (SEMI_SEMI); + + peek_char = shell_getc (1); + if MBTEST(peek_char == '&') + return (SEMI_SEMI_AND); + else + { + shell_ungetc (peek_char); + return (SEMI_SEMI); + } case '&': return (AND_AND); @@ -2633,8 +2833,27 @@ read_token (command) return (LESS_GREATER); else if MBTEST(character == '>' && peek_char == '|') return (GREATER_BAR); - else if MBTEST(peek_char == '>' && character == '&') - return (AND_GREATER); + else if MBTEST(character == '&' && peek_char == '>') + { + peek_char = shell_getc (1); + if MBTEST(peek_char == '>') + return (AND_GREATER_GREATER); + else + { + shell_ungetc (peek_char); + return (AND_GREATER); + } + } + else if MBTEST(character == '|' && peek_char == '&') + return (BAR_AND); + else if MBTEST(character == ';' && peek_char == '&') + { + parser_state |= PST_CASEPAT; +#if defined (ALIAS) + parser_state &= ~PST_ALEXPNEXT; +#endif /* ALIAS */ + return (SEMI_AND); + } shell_ungetc (peek_char); @@ -2674,6 +2893,7 @@ read_token (command) if MBTEST(character == '-' && (last_read_token == LESS_AND || last_read_token == GREATER_AND)) return (character); +tokword: /* Okay, if we got this far, we have to read a word. Read one, and then check it against the known ones. */ result = read_token_word (character); @@ -2687,28 +2907,67 @@ read_token (command) /* * Match a $(...) or other grouping construct. This has to handle embedded * quoted strings ('', ``, "") and nested constructs. It also must handle - * reprompting the user, if necessary, after reading a newline (unless the - * P_NONL flag is passed), and returning correct error values if it reads - * EOF. + * reprompting the user, if necessary, after reading a newline, and returning + * correct error values if it reads EOF. */ #define P_FIRSTCLOSE 0x01 #define P_ALLOWESC 0x02 #define P_DQUOTE 0x04 +#define P_COMMAND 0x08 /* parsing a command, so look for comments */ +#define P_BACKQUOTE 0x10 /* parsing a backquoted command substitution */ + +/* Lexical state while parsing a grouping construct or $(...). */ +#define LEX_WASDOL 0x001 +#define LEX_CKCOMMENT 0x002 +#define LEX_INCOMMENT 0x004 +#define LEX_PASSNEXT 0x008 +#define LEX_RESWDOK 0x010 +#define LEX_CKCASE 0x020 +#define LEX_INCASE 0x040 +#define LEX_INHEREDOC 0x080 +#define LEX_HEREDELIM 0x100 /* reading here-doc delimiter */ +#define LEX_STRIPDOC 0x200 /* <<- strip tabs from here doc delim */ + +#define COMSUB_META(ch) ((ch) == ';' || (ch) == '&' || (ch) == '|') + +#define CHECK_NESTRET_ERROR() \ + do { \ + if (nestret == &matched_pair_error) \ + { \ + free (ret); \ + return &matched_pair_error; \ + } \ + } while (0) + +#define APPEND_NESTRET() \ + do { \ + if (nestlen) \ + { \ + RESIZE_MALLOCED_BUFFER (ret, retind, nestlen, retsize, 64); \ + strcpy (ret + retind, nestret); \ + retind += nestlen; \ + } \ + } while (0) static char matched_pair_error; + static char * parse_matched_pair (qc, open, close, lenp, flags) int qc; /* `"' if this construct is within double quotes */ int open, close; int *lenp, flags; { - int count, ch, was_dollar; - int pass_next_character, nestlen, ttranslen, start_lineno; + int count, ch, tflags; + int nestlen, ttranslen, start_lineno; char *ret, *nestret, *ttrans; int retind, retsize, rflags; +/* itrace("parse_matched_pair: open = %c close = %c flags = %d", open, close, flags); */ count = 1; - pass_next_character = was_dollar = 0; + tflags = 0; + + if ((flags & P_COMMAND) && qc != '`' && qc != '\'' && qc != '"' && (flags & P_DQUOTE) == 0) + tflags |= LEX_CKCOMMENT; /* RFLAGS is the set of flags we want to pass to recursive calls. */ rflags = (qc == '"') ? P_DQUOTE : (flags & P_DQUOTE); @@ -2719,7 +2978,8 @@ parse_matched_pair (qc, open, close, lenp, flags) start_lineno = line_number; while (count) { - ch = shell_getc ((qc != '\'' || (flags & P_ALLOWESC)) && pass_next_character == 0); + ch = shell_getc (qc != '\'' && (tflags & LEX_PASSNEXT) == 0); + if (ch == EOF) { free (ret); @@ -2732,12 +2992,33 @@ parse_matched_pair (qc, open, close, lenp, flags) if (ch == '\n' && SHOULD_PROMPT ()) prompt_again (); - if (pass_next_character) /* last char was backslash */ + /* Don't bother counting parens or doing anything else if in a comment + or part of a case statement */ + if (tflags & LEX_INCOMMENT) { - pass_next_character = 0; + /* Add this character. */ + RESIZE_MALLOCED_BUFFER (ret, retind, 1, retsize, 64); + ret[retind++] = ch; + + if (ch == '\n') + tflags &= ~LEX_INCOMMENT; + + continue; + } + + /* Not exactly right yet, should handle shell metacharacters, too. If + any changes are made to this test, make analogous changes to subst.c: + extract_delimited_string(). */ + else if MBTEST((tflags & LEX_CKCOMMENT) && (tflags & LEX_INCOMMENT) == 0 && ch == '#' && (retind == 0 || ret[retind-1] == '\n' || shellblank (ret[retind - 1]))) + tflags |= LEX_INCOMMENT; + + if (tflags & LEX_PASSNEXT) /* last char was backslash */ + { + tflags &= ~LEX_PASSNEXT; if (qc != '\'' && ch == '\n') /* double-quoted \ disappears. */ { - if (retind > 0) retind--; /* swallow previously-added backslash */ + if (retind > 0) + retind--; /* swallow previously-added backslash */ continue; } @@ -2747,6 +3028,16 @@ parse_matched_pair (qc, open, close, lenp, flags) ret[retind++] = ch; continue; } + /* If we're reparsing the input (e.g., from parse_string_to_word_list), + we've already prepended CTLESC to single-quoted results of $'...'. + We may want to do this for other CTLESC-quoted characters in + reparse, too. */ + else if MBTEST((parser_state & PST_REPARSE) && open == '\'' && (ch == CTLESC || ch == CTLNUL)) + { + RESIZE_MALLOCED_BUFFER (ret, retind, 1, retsize, 64); + ret[retind++] = ch; + continue; + } else if MBTEST(ch == CTLESC || ch == CTLNUL) /* special shell escapes */ { RESIZE_MALLOCED_BUFFER (ret, retind, 2, retsize, 64); @@ -2756,11 +3047,9 @@ parse_matched_pair (qc, open, close, lenp, flags) } else if MBTEST(ch == close) /* ending delimiter */ count--; -#if 1 /* handle nested ${...} specially. */ - else if MBTEST(open != close && was_dollar && open == '{' && ch == open) /* } */ + else if MBTEST(open != close && (tflags & LEX_WASDOL) && open == '{' && ch == open) /* } */ count++; -#endif else if MBTEST(((flags & P_FIRSTCLOSE) == 0) && ch == open) /* nested begin */ count++; @@ -2768,63 +3057,76 @@ parse_matched_pair (qc, open, close, lenp, flags) RESIZE_MALLOCED_BUFFER (ret, retind, 1, retsize, 64); ret[retind++] = ch; + /* If we just read the ending character, don't bother continuing. */ + if (count == 0) + break; + if (open == '\'') /* '' inside grouping construct */ { if MBTEST((flags & P_ALLOWESC) && ch == '\\') - pass_next_character++; + tflags |= LEX_PASSNEXT; continue; } if MBTEST(ch == '\\') /* backslashes */ - pass_next_character++; + tflags |= LEX_PASSNEXT; + +#if 0 + /* The big hammer. Single quotes aren't special in double quotes. The + problem is that Posix says the single quotes are semi-special: + within a double-quoted ${...} construct "an even number of + unescaped double-quotes or single-quotes, if any, shall occur." */ + if MBTEST(open == '{' && (flags & P_DQUOTE) && ch == '\'') /* } */ + continue; +#endif + /* Could also check open == '`' if we want to parse grouping constructs + inside old-style command substitution. */ if (open != close) /* a grouping construct */ { if MBTEST(shellquote (ch)) { /* '', ``, or "" inside $(...) or other grouping construct. */ push_delimiter (dstack, ch); - if MBTEST(was_dollar && ch == '\'') /* $'...' inside group */ + if MBTEST((tflags & LEX_WASDOL) && ch == '\'') /* $'...' inside group */ nestret = parse_matched_pair (ch, ch, ch, &nestlen, P_ALLOWESC|rflags); else nestret = parse_matched_pair (ch, ch, ch, &nestlen, rflags); pop_delimiter (dstack); - if (nestret == &matched_pair_error) - { - free (ret); - return &matched_pair_error; - } - if MBTEST(was_dollar && ch == '\'' && (extended_quote || (rflags & P_DQUOTE) == 0)) + CHECK_NESTRET_ERROR (); + + if MBTEST((tflags & LEX_WASDOL) && ch == '\'' && (extended_quote || (rflags & P_DQUOTE) == 0)) { /* Translate $'...' here. */ ttrans = ansiexpand (nestret, 0, nestlen - 1, &ttranslen); xfree (nestret); - nestret = sh_single_quote (ttrans); - free (ttrans); - nestlen = strlen (nestret); + + if ((rflags & P_DQUOTE) == 0) + { + nestret = sh_single_quote (ttrans); + free (ttrans); + nestlen = strlen (nestret); + } + else + { + nestret = ttrans; + nestlen = ttranslen; + } retind -= 2; /* back up before the $' */ } - else if MBTEST(was_dollar && ch == '"' && (extended_quote || (rflags & P_DQUOTE) == 0)) + else if MBTEST((tflags & LEX_WASDOL) && ch == '"' && (extended_quote || (rflags & P_DQUOTE) == 0)) { /* Locale expand $"..." here. */ ttrans = localeexpand (nestret, 0, nestlen - 1, start_lineno, &ttranslen); xfree (nestret); - nestret = (char *)xmalloc (ttranslen + 3); - nestret[0] = '"'; - strcpy (nestret + 1, ttrans); - nestret[ttranslen + 1] = '"'; - nestret[ttranslen += 2] = '\0'; + + nestret = sh_mkdoublequoted (ttrans, ttranslen, 0); free (ttrans); - nestlen = ttranslen; + nestlen = ttranslen + 2; retind -= 2; /* back up before the $" */ } - if (nestlen) - { - RESIZE_MALLOCED_BUFFER (ret, retind, nestlen, retsize, 64); - strcpy (ret + retind, nestret); - retind += nestlen; - } + APPEND_NESTRET (); FREE (nestret); } } @@ -2834,49 +3136,465 @@ parse_matched_pair (qc, open, close, lenp, flags) else if MBTEST(open == '"' && ch == '`') { nestret = parse_matched_pair (0, '`', '`', &nestlen, rflags); - if (nestret == &matched_pair_error) - { - free (ret); - return &matched_pair_error; - } - if (nestlen) - { - RESIZE_MALLOCED_BUFFER (ret, retind, nestlen, retsize, 64); - strcpy (ret + retind, nestret); - retind += nestlen; - } + + CHECK_NESTRET_ERROR (); + APPEND_NESTRET (); + FREE (nestret); } - else if MBTEST(was_dollar && (ch == '(' || ch == '{' || ch == '[')) /* ) } ] */ + else if MBTEST(open != '`' && (tflags & LEX_WASDOL) && (ch == '(' || ch == '{' || ch == '[')) /* ) } ] */ /* check for $(), $[], or ${} inside quoted string. */ { if (open == ch) /* undo previous increment */ count--; if (ch == '(') /* ) */ - nestret = parse_matched_pair (0, '(', ')', &nestlen, rflags); + nestret = parse_comsub (0, '(', ')', &nestlen, (rflags|P_COMMAND) & ~P_DQUOTE); else if (ch == '{') /* } */ nestret = parse_matched_pair (0, '{', '}', &nestlen, P_FIRSTCLOSE|rflags); else if (ch == '[') /* ] */ nestret = parse_matched_pair (0, '[', ']', &nestlen, rflags); - if (nestret == &matched_pair_error) + + CHECK_NESTRET_ERROR (); + APPEND_NESTRET (); + + FREE (nestret); + } + if MBTEST(ch == '$') + tflags |= LEX_WASDOL; + else + tflags &= ~LEX_WASDOL; + } + + ret[retind] = '\0'; + if (lenp) + *lenp = retind; + return ret; +} + +/* Parse a $(...) command substitution. This is messier than I'd like, and + reproduces a lot more of the token-reading code than I'd like. */ +static char * +parse_comsub (qc, open, close, lenp, flags) + int qc; /* `"' if this construct is within double quotes */ + int open, close; + int *lenp, flags; +{ + int count, ch, peekc, tflags, lex_rwlen, lex_firstind; + int nestlen, ttranslen, start_lineno; + char *ret, *nestret, *ttrans, *heredelim; + int retind, retsize, rflags, hdlen; + +/*itrace("parse_comsub: qc = `%c' open = %c close = %c", qc, open, close);*/ + count = 1; + tflags = LEX_RESWDOK; + + if ((flags & P_COMMAND) && qc != '\'' && qc != '"' && (flags & P_DQUOTE) == 0) + tflags |= LEX_CKCASE; + if ((tflags & LEX_CKCASE) && (interactive == 0 || interactive_comments)) + tflags |= LEX_CKCOMMENT; + + /* RFLAGS is the set of flags we want to pass to recursive calls. */ + rflags = (flags & P_DQUOTE); + + ret = (char *)xmalloc (retsize = 64); + retind = 0; + + start_lineno = line_number; + lex_rwlen = 0; + + heredelim = 0; + lex_firstind = -1; + + while (count) + { +comsub_readchar: + ch = shell_getc (qc != '\'' && (tflags & LEX_PASSNEXT) == 0); + + if (ch == EOF) + { +eof_error: + free (ret); + FREE (heredelim); + parser_error (start_lineno, _("unexpected EOF while looking for matching `%c'"), close); + EOF_Reached = 1; /* XXX */ + return (&matched_pair_error); + } + + /* If we hit the end of a line and are reading the contents of a here + document, and it's not the same line that the document starts on, + check for this line being the here doc delimiter. Otherwise, if + we're in a here document, mark the next character as the beginning + of a line. */ + if (ch == '\n') + { + if ((tflags & LEX_HEREDELIM) && heredelim) + { + tflags &= ~LEX_HEREDELIM; + tflags |= LEX_INHEREDOC; + lex_firstind = retind + 1; + } + else if (tflags & LEX_INHEREDOC) + { + int tind; + tind = lex_firstind; + while ((tflags & LEX_STRIPDOC) && ret[tind] == '\t') + tind++; + if (STREQN (ret + tind, heredelim, hdlen)) + { + tflags &= ~(LEX_STRIPDOC|LEX_INHEREDOC); +/*itrace("parse_comsub:%d: found here doc end `%s'", line_number, ret + tind);*/ + lex_firstind = -1; + } + else + lex_firstind = retind + 1; + } + } + + /* Possible reprompting. */ + if (ch == '\n' && SHOULD_PROMPT ()) + prompt_again (); + + /* Don't bother counting parens or doing anything else if in a comment */ + if (tflags & (LEX_INCOMMENT|LEX_INHEREDOC)) + { + /* Add this character. */ + RESIZE_MALLOCED_BUFFER (ret, retind, 1, retsize, 64); + ret[retind++] = ch; + + if ((tflags & LEX_INCOMMENT) && ch == '\n') + tflags &= ~LEX_INCOMMENT; + + continue; + } + + /* Skip whitespace */ + if MBTEST(shellblank (ch) && lex_rwlen == 0) + { + /* Add this character. */ + RESIZE_MALLOCED_BUFFER (ret, retind, 1, retsize, 64); + ret[retind++] = ch; + continue; + } + + /* Either we are looking for the start of the here-doc delimiter + (lex_firstind == -1) or we are reading one (lex_firstind >= 0). + If this character is a shell break character and we are reading + the delimiter, save it and note that we are now reading a here + document. If we've found the start of the delimiter, note it by + setting lex_firstind. Backslashes can quote shell metacharacters + in here-doc delimiters. */ + if (tflags & LEX_HEREDELIM) + { + if (lex_firstind == -1 && shellbreak (ch) == 0) + lex_firstind = retind; + else if (lex_firstind >= 0 && (tflags & LEX_PASSNEXT) == 0 && shellbreak (ch)) + { + nestret = substring (ret, lex_firstind, retind); + heredelim = string_quote_removal (nestret, 0); + free (nestret); + hdlen = STRLEN(heredelim); +/*itrace("parse_comsub:%d: found here doc delimiter `%s' (%d)", line_number, heredelim, hdlen);*/ + if (ch == '\n') + { + tflags |= LEX_INHEREDOC; + tflags &= ~LEX_HEREDELIM; + lex_firstind = retind + 1; + } + else + lex_firstind = -1; + } + } + + /* Meta-characters that can introduce a reserved word. Not perfect yet. */ + if MBTEST((tflags & LEX_RESWDOK) == 0 && (tflags & LEX_CKCASE) && (tflags & LEX_INCOMMENT) == 0 && shellmeta(ch)) + { + /* Add this character. */ + RESIZE_MALLOCED_BUFFER (ret, retind, 1, retsize, 64); + ret[retind++] = ch; + peekc = shell_getc (1); + if (ch == peekc && (ch == '&' || ch == '|' || ch == ';')) /* two-character tokens */ + { + RESIZE_MALLOCED_BUFFER (ret, retind, 1, retsize, 64); + ret[retind++] = peekc; +/*itrace("parse_comsub:%d: set lex_reswordok = 1, ch = `%c'", line_number, ch); */ + tflags |= LEX_RESWDOK; + lex_rwlen = 0; + continue; + } + else if (ch == '\n' || COMSUB_META(ch)) + { + shell_ungetc (peekc); + tflags |= LEX_RESWDOK; +/*itrace("parse_comsub:%d: set lex_reswordok = 1, ch = `%c'", line_number, ch);*/ + lex_rwlen = 0; + continue; + } + else if (ch == EOF) + goto eof_error; + else + { + /* `unget' the character we just added and fall through */ + retind--; + shell_ungetc (peekc); + } + } + + /* If we can read a reserved word, try to read one. */ + if (tflags & LEX_RESWDOK) + { + if MBTEST(islower (ch)) + { + /* Add this character. */ + RESIZE_MALLOCED_BUFFER (ret, retind, 1, retsize, 64); + ret[retind++] = ch; + lex_rwlen++; + continue; + } + else if MBTEST(lex_rwlen == 4 && shellbreak (ch)) + { + if (STREQN (ret + retind - 4, "case", 4)) +{ + tflags |= LEX_INCASE; +/*itrace("parse_comsub:%d: found `case', lex_incase -> 1", line_number);*/ +} + else if (STREQN (ret + retind - 4, "esac", 4)) +{ + tflags &= ~LEX_INCASE; +/*itrace("parse_comsub:%d: found `esac', lex_incase -> 0", line_number);*/ +} + tflags &= ~LEX_RESWDOK; + } + else if (shellbreak (ch) == 0) +{ + tflags &= ~LEX_RESWDOK; +/*itrace("parse_comsub:%d: found `%c', lex_reswordok -> 0", line_number, ch);*/ +} + } + + if MBTEST((tflags & LEX_INCOMMENT) == 0 && (tflags & LEX_CKCASE) && ch == '<') + { + /* Add this character. */ + RESIZE_MALLOCED_BUFFER (ret, retind, 1, retsize, 64); + ret[retind++] = ch; + peekc = shell_getc (1); + if (peekc == EOF) + goto eof_error; + if (peekc == ch) + { + RESIZE_MALLOCED_BUFFER (ret, retind, 1, retsize, 64); + ret[retind++] = peekc; + peekc = shell_getc (1); + if (peekc == EOF) + goto eof_error; + if (peekc == '-') + { + RESIZE_MALLOCED_BUFFER (ret, retind, 1, retsize, 64); + ret[retind++] = peekc; + tflags |= LEX_STRIPDOC; + } + else + shell_ungetc (peekc); + tflags |= LEX_HEREDELIM; + lex_firstind = -1; + continue; + } + else + ch = peekc; /* fall through and continue XXX - this skips comments if peekc == '#' */ + } + /* Not exactly right yet, should handle shell metacharacters, too. If + any changes are made to this test, make analogous changes to subst.c: + extract_delimited_string(). */ + else if MBTEST((tflags & LEX_CKCOMMENT) && (tflags & LEX_INCOMMENT) == 0 && ch == '#' && (retind == 0 || ret[retind-1] == '\n' || shellblank (ret[retind - 1]))) + tflags |= LEX_INCOMMENT; + + if (tflags & LEX_PASSNEXT) /* last char was backslash */ + { + tflags &= ~LEX_PASSNEXT; + if (qc != '\'' && ch == '\n') /* double-quoted \ disappears. */ + { + if (retind > 0) + retind--; /* swallow previously-added backslash */ + continue; + } + + RESIZE_MALLOCED_BUFFER (ret, retind, 2, retsize, 64); + if MBTEST(ch == CTLESC || ch == CTLNUL) + ret[retind++] = CTLESC; + ret[retind++] = ch; + continue; + } + else if MBTEST(ch == CTLESC || ch == CTLNUL) /* special shell escapes */ + { + RESIZE_MALLOCED_BUFFER (ret, retind, 2, retsize, 64); + ret[retind++] = CTLESC; + ret[retind++] = ch; + continue; + } +#if 0 + else if MBTEST((tflags & LEX_INCASE) && ch == close && close == ')') + tflags &= ~LEX_INCASE; /* XXX */ +#endif + else if MBTEST(ch == close && (tflags & LEX_INCASE) == 0) /* ending delimiter */ +{ + count--; +/*itrace("parse_comsub:%d: found close: count = %d", line_number, count);*/ +} + else if MBTEST(((flags & P_FIRSTCLOSE) == 0) && (tflags & LEX_INCASE) == 0 && ch == open) /* nested begin */ + count++; + + /* Add this character. */ + RESIZE_MALLOCED_BUFFER (ret, retind, 1, retsize, 64); + ret[retind++] = ch; + + /* If we just read the ending character, don't bother continuing. */ + if (count == 0) + break; + + if MBTEST(ch == '\\') /* backslashes */ + tflags |= LEX_PASSNEXT; + + if MBTEST(shellquote (ch)) + { + /* '', ``, or "" inside $(...). */ + push_delimiter (dstack, ch); + if MBTEST((tflags & LEX_WASDOL) && ch == '\'') /* $'...' inside group */ + nestret = parse_matched_pair (ch, ch, ch, &nestlen, P_ALLOWESC|rflags); + else + nestret = parse_matched_pair (ch, ch, ch, &nestlen, rflags); + pop_delimiter (dstack); + CHECK_NESTRET_ERROR (); + + if MBTEST((tflags & LEX_WASDOL) && ch == '\'' && (extended_quote || (rflags & P_DQUOTE) == 0)) { - free (ret); - return &matched_pair_error; + /* Translate $'...' here. */ + ttrans = ansiexpand (nestret, 0, nestlen - 1, &ttranslen); + xfree (nestret); + + if ((rflags & P_DQUOTE) == 0) + { + nestret = sh_single_quote (ttrans); + free (ttrans); + nestlen = strlen (nestret); + } + else + { + nestret = ttrans; + nestlen = ttranslen; + } + retind -= 2; /* back up before the $' */ } - if (nestlen) + else if MBTEST((tflags & LEX_WASDOL) && ch == '"' && (extended_quote || (rflags & P_DQUOTE) == 0)) { - RESIZE_MALLOCED_BUFFER (ret, retind, nestlen, retsize, 64); - strcpy (ret + retind, nestret); - retind += nestlen; + /* Locale expand $"..." here. */ + ttrans = localeexpand (nestret, 0, nestlen - 1, start_lineno, &ttranslen); + xfree (nestret); + + nestret = sh_mkdoublequoted (ttrans, ttranslen, 0); + free (ttrans); + nestlen = ttranslen + 2; + retind -= 2; /* back up before the $" */ } + + APPEND_NESTRET (); FREE (nestret); } - was_dollar = MBTEST(ch == '$'); + else if MBTEST((tflags & LEX_WASDOL) && (ch == '(' || ch == '{' || ch == '[')) /* ) } ] */ + /* check for $(), $[], or ${} inside command substitution. */ + { + if ((tflags & LEX_INCASE) == 0 && open == ch) /* undo previous increment */ + count--; + if (ch == '(') /* ) */ + nestret = parse_comsub (0, '(', ')', &nestlen, (rflags|P_COMMAND) & ~P_DQUOTE); + else if (ch == '{') /* } */ + nestret = parse_matched_pair (0, '{', '}', &nestlen, P_FIRSTCLOSE|rflags); + else if (ch == '[') /* ] */ + nestret = parse_matched_pair (0, '[', ']', &nestlen, rflags); + + CHECK_NESTRET_ERROR (); + APPEND_NESTRET (); + + FREE (nestret); + } + if MBTEST(ch == '$') + tflags |= LEX_WASDOL; + else + tflags &= ~LEX_WASDOL; } + FREE (heredelim); ret[retind] = '\0'; if (lenp) *lenp = retind; +/*itrace("parse_comsub:%d: returning `%s'", line_number, ret);*/ + return ret; +} + +/* XXX - this needs to handle functionality like subst.c:no_longjmp_on_fatal_error; + maybe extract_command_subst should handle it. */ +char * +xparse_dolparen (base, string, indp, flags) + char *base; + char *string; + int *indp; + int flags; +{ + sh_parser_state_t ps; + int orig_ind, nc, sflags; + char *ret, *s, *ep, *ostring; + + /*yydebug = 1;*/ + orig_ind = *indp; + ostring = string; + + sflags = SEVAL_NONINT|SEVAL_NOHIST|SEVAL_NOFREE; + if (flags & SX_NOLONGJMP) + sflags |= SEVAL_NOLONGJMP; + save_parser_state (&ps); + + /*(*/ + parser_state |= PST_CMDSUBST|PST_EOFTOKEN; /* allow instant ')' */ /*(*/ + shell_eof_token = ')'; + parse_string (string, "command substitution", sflags, &ep); + + restore_parser_state (&ps); + reset_parser (); + if (interactive) + token_to_read = 0; + + /* Need to find how many characters parse_and_execute consumed, update + *indp, if flags != 0, copy the portion of the string parsed into RET + and return it. If flags & 1 (EX_NOALLOC) we can return NULL. */ + + /*(*/ + if (ep[-1] != ')') + { +#if DEBUG + if (ep[-1] != '\n') + itrace("xparse_dolparen:%d: ep[-1] != RPAREN (%d), ep = `%s'", line_number, ep[-1], ep); +#endif + while (ep > ostring && ep[-1] == '\n') ep--; + } + + nc = ep - ostring; + *indp = ep - base - 1; + + /*(*/ +#if DEBUG + if (base[*indp] != ')') + itrace("xparse_dolparen:%d: base[%d] != RPAREN (%d), base = `%s'", line_number, *indp, base[*indp], base); +#endif + + if (flags & SX_NOALLOC) + return (char *)NULL; + + if (nc == 0) + { + ret = xmalloc (1); + ret[0] = '\0'; + } + else + ret = substring (ostring, 0, nc - 1); + return ret; } @@ -2889,8 +3607,8 @@ static int parse_dparen (c) int c; { - int cmdtyp, len, sline; - char *wval, *wv2; + int cmdtyp, sline; + char *wval; WORD_DESC *wd; #if defined (ARITH_FOR_COMMAND) @@ -2900,9 +3618,9 @@ parse_dparen (c) cmdtyp = parse_arith_cmd (&wval, 0); if (cmdtyp == 1) { - wd = make_word (wval); + wd = alloc_word_desc (); + wd->word = wval; yylval.word_list = make_word_list (wd, (WORD_LIST *)NULL); - free (wval); return (ARITH_FOR_EXPRS); } else @@ -2914,21 +3632,14 @@ parse_dparen (c) if (reserved_word_acceptable (last_read_token)) { sline = line_number; -#if 0 - cmdtyp = parse_arith_cmd (&wval, 1); -#else + cmdtyp = parse_arith_cmd (&wval, 0); -#endif if (cmdtyp == 1) /* arithmetic command */ { - wd = make_word (wval); -#if 0 - wd->flags = W_QUOTED; -#else - wd->flags = W_QUOTED|W_NOSPLIT|W_NOGLOB; -#endif + wd = alloc_word_desc (); + wd->word = wval; + wd->flags = W_QUOTED|W_NOSPLIT|W_NOGLOB|W_DQUOTE; yylval.word_list = make_word_list (wd, (WORD_LIST *)NULL); - free (wval); /* make_word copies it */ return (ARITH_CMD); } else if (cmdtyp == 0) /* nested subshell */ @@ -3113,7 +3824,7 @@ cond_term () if (term) term->flags |= CMD_INVERT_RETURN; } - else if (tok == WORD && test_unop (yylval.word->word)) + else if (tok == WORD && yylval.word->word[0] == '-' && yylval.word->word[2] == 0 && test_unop (yylval.word->word)) { op = yylval.word; tok = read_token (READ); @@ -3147,8 +3858,11 @@ cond_term () if (tok == WORD && test_binop (yylval.word->word)) op = yylval.word; #if defined (COND_REGEXP) - else if (tok == WORD && STREQ (yylval.word->word,"=~")) - op = yylval.word; + else if (tok == WORD && STREQ (yylval.word->word, "=~")) + { + op = yylval.word; + parser_state |= PST_REGEXP; + } #endif else if (tok == '<' || tok == '>') op = make_word_from_token (tok); /* ( */ @@ -3179,6 +3893,7 @@ cond_term () /* rhs */ tok = read_token (READ); + parser_state &= ~PST_REGEXP; if (tok == WORD) { tright = make_cond_node (COND_TERM, yylval.word, (COND_COM *)NULL, (COND_COM *)NULL); @@ -3246,6 +3961,7 @@ token_is_assignment (t, i) return r; } +/* XXX - possible changes here for `+=' */ static int token_is_ident (t, i) char *t; @@ -3278,6 +3994,10 @@ read_token_word (character) /* DOLLAR_PRESENT becomes non-zero if we see a `$'. */ int dollar_present; + /* COMPOUND_ASSIGNMENT becomes non-zero if we are parsing a compound + assignment. */ + int compound_assignment; + /* QUOTED becomes non-zero if we see one of ("), ('), (`), or (\). */ int quoted; @@ -3297,7 +4017,7 @@ read_token_word (character) token_index = 0; all_digit_token = DIGIT (character); - dollar_present = quoted = pass_next_character = 0; + dollar_present = quoted = pass_next_character = compound_assignment = 0; for (;;) { @@ -3307,7 +4027,7 @@ read_token_word (character) if (pass_next_character) { pass_next_character = 0; - goto got_character; + goto got_escaped_character; } cd = current_delimiter (dstack); @@ -3343,7 +4063,7 @@ read_token_word (character) if MBTEST(shellquote (character)) { push_delimiter (dstack, character); - ttok = parse_matched_pair (character, character, character, &ttoklen, 0); + ttok = parse_matched_pair (character, character, character, &ttoklen, (character == '`') ? P_COMMAND : 0); pop_delimiter (dstack); if (ttok == &matched_pair_error) return -1; /* Bail immediately. */ @@ -3359,9 +4079,34 @@ read_token_word (character) goto next_character; } +#ifdef COND_REGEXP + /* When parsing a regexp as a single word inside a conditional command, + we need to special-case characters special to both the shell and + regular expressions. Right now, that is only '(' and '|'. */ /*)*/ + if MBTEST((parser_state & PST_REGEXP) && (character == '(' || character == '|')) /*)*/ + { + if (character == '|') + goto got_character; + + push_delimiter (dstack, character); + ttok = parse_matched_pair (cd, '(', ')', &ttoklen, 0); + pop_delimiter (dstack); + if (ttok == &matched_pair_error) + return -1; /* Bail immediately. */ + RESIZE_MALLOCED_BUFFER (token, token_index, ttoklen + 2, + token_buffer_size, TOKEN_DEFAULT_GROW_SIZE); + token[token_index++] = character; + strcpy (token + token_index, ttok); + token_index += ttoklen; + FREE (ttok); + dollar_present = all_digit_token = 0; + goto next_character; + } +#endif /* COND_REGEXP */ + #ifdef EXTENDED_GLOB /* Parse a ksh-style extended pattern matching specification. */ - if (extended_glob && PATTERN_CHAR (character)) + if MBTEST(extended_glob && PATTERN_CHAR (character)) { peek_char = shell_getc (1); if MBTEST(peek_char == '(') /* ) */ @@ -3406,7 +4151,7 @@ read_token_word (character) history literally rather than causing a possibly- incorrect `;' to be added. ) */ push_delimiter (dstack, peek_char); - ttok = parse_matched_pair (cd, '(', ')', &ttoklen, 0); + ttok = parse_comsub (cd, '(', ')', &ttoklen, P_COMMAND); pop_delimiter (dstack); } else @@ -3442,13 +4187,14 @@ read_token_word (character) { ttrans = ansiexpand (ttok, 0, ttoklen - 1, &ttranslen); free (ttok); + /* Insert the single quotes and correctly quote any embedded single quotes (allowed because P_ALLOWESC was passed to parse_matched_pair). */ ttok = sh_single_quote (ttrans); free (ttrans); + ttranslen = strlen (ttok); ttrans = ttok; - ttranslen = strlen (ttrans); } else { @@ -3457,12 +4203,9 @@ read_token_word (character) free (ttok); /* Add the double quotes back */ - ttok = (char *)xmalloc (ttranslen + 3); - ttok[0] = '"'; - strcpy (ttok + 1, ttrans); - ttok[ttranslen + 1] = '"'; - ttok[ttranslen += 2] = '\0'; + ttok = sh_mkdoublequoted (ttrans, ttranslen, 0); free (ttrans); + ttranslen += 2; ttrans = ttok; } @@ -3498,8 +4241,12 @@ read_token_word (character) } #if defined (ARRAY_VARS) - /* Identify possible array subscript assignment; match [...] */ - else if MBTEST(character == '[' && token_index > 0 && assignment_acceptable (last_read_token) && token_is_ident (token, token_index)) /* ] */ + /* Identify possible array subscript assignment; match [...]. If + parser_state&PST_COMPASSIGN, we need to parse [sub]=words treating + `sub' as if it were enclosed in double quotes. */ + else if MBTEST(character == '[' && /* ] */ + ((token_index > 0 && assignment_acceptable (last_read_token) && token_is_ident (token, token_index)) || + (token_index == 0 && (parser_state&PST_COMPASSIGN)))) { ttok = parse_matched_pair (cd, '[', ']', &ttoklen, 0); if (ttok == &matched_pair_error) @@ -3515,7 +4262,7 @@ read_token_word (character) goto next_character; } /* Identify possible compound array variable assignment. */ - else if MBTEST(character == '=' && token_index > 0 && token_is_assignment (token, token_index)) + else if MBTEST(character == '=' && token_index > 0 && (assignment_acceptable (last_read_token) || (parser_state & PST_ASSIGNOK)) && token_is_assignment (token, token_index)) { peek_char = shell_getc (1); if MBTEST(peek_char == '(') /* ) */ @@ -3536,7 +4283,12 @@ read_token_word (character) token[token_index++] = ')'; FREE (ttok); all_digit_token = 0; + compound_assignment = 1; +#if 1 goto next_character; +#else + goto got_token; /* ksh93 seems to do this */ +#endif } else shell_ungetc (peek_char); @@ -3553,12 +4305,14 @@ read_token_word (character) got_character: - all_digit_token &= DIGIT (character); - dollar_present |= character == '$'; - if (character == CTLESC || character == CTLNUL) token[token_index++] = CTLESC; + got_escaped_character: + + all_digit_token &= DIGIT (character); + dollar_present |= character == '$'; + token[token_index++] = character; RESIZE_MALLOCED_BUFFER (token, token_index, 1, token_buffer_size, @@ -3630,7 +4384,9 @@ got_token: if (dollar_present) the_word->flags |= W_HASDOLLAR; if (quoted) - the_word->flags |= W_QUOTED; + the_word->flags |= W_QUOTED; /*(*/ + if (compound_assignment && token[token_index-1] == ')') + the_word->flags |= W_COMPASSIGN; /* A word is an assignment if it appears at the beginning of a simple command, or after another assignment word. This is context-dependent, so it cannot be handled in the grammar. */ @@ -3642,6 +4398,16 @@ got_token: the_word->flags |= W_NOSPLIT; } + if (command_token_position (last_read_token)) + { + struct builtin *b; + b = builtin_address_internal (token, 0); + if (b && (b->flags & ASSIGNMENT_BUILTIN)) + parser_state |= PST_ASSIGNOK; + else if (STREQ (token, "eval") || STREQ (token, "let")) + parser_state |= PST_ASSIGNOK; + } + yylval.word = the_word; result = ((the_word->flags & (W_ASSIGNMENT|W_NOSPLIT)) == (W_ASSIGNMENT|W_NOSPLIT)) @@ -3692,14 +4458,21 @@ reserved_word_acceptable (toksym) case IF: case OR_OR: case SEMI_SEMI: + case SEMI_AND: + case SEMI_SEMI_AND: case THEN: case TIME: case TIMEOPT: + case COPROC: case UNTIL: case WHILE: case 0: return 1; default: +#if defined (COPROCESS_SUPPORT) + if (last_read_token == WORD && token_before_that == COPROC) + return 1; +#endif return 0; } } @@ -3750,9 +4523,10 @@ reset_readline_prompt () /* A list of tokens which can be followed by newlines, but not by semi-colons. When concatenating multiple lines of history, the newline separator for such tokens is replaced with a space. */ -static int no_semi_successors[] = { +static const int no_semi_successors[] = { '\n', '{', '(', ')', ';', '&', '|', - CASE, DO, ELSE, IF, SEMI_SEMI, THEN, UNTIL, WHILE, AND_AND, OR_OR, IN, + CASE, DO, ELSE, IF, SEMI_SEMI, SEMI_AND, SEMI_SEMI_AND, THEN, UNTIL, + WHILE, AND_AND, OR_OR, IN, 0 }; @@ -3767,7 +4541,13 @@ history_delimiting_chars () if (dstack.delimiter_depth != 0) return ("\n"); - + + /* We look for current_command_line_count == 2 because we are looking to + add the first line of the body of the here document (the second line + of the command). */ + if (parser_state & PST_HEREDOC) + return (current_command_line_count == 2 ? "\n" : ""); + /* First, handle some special cases. */ /*(*/ /* If we just read `()', assume it's a function definition, and don't @@ -3793,12 +4573,14 @@ history_delimiting_chars () { /* Tricky. `for i\nin ...' should not have a semicolon, but `for i\ndo ...' should. We do what we can. */ - for (i = shell_input_line_index; whitespace(shell_input_line[i]); i++) + for (i = shell_input_line_index; whitespace (shell_input_line[i]); i++) ; if (shell_input_line[i] && shell_input_line[i] == 'i' && shell_input_line[i+1] == 'n') return " "; return ";"; } + else if (two_tokens_ago == CASE && token_before_that == WORD && (parser_state & PST_CASESTMT)) + return " "; for (i = 0; no_semi_successors[i]; i++) { @@ -3916,7 +4698,7 @@ decode_prompt_string (string) int last_exit_value; #if defined (PROMPT_STRING_DECODE) int result_size, result_index; - int c, n; + int c, n, i; char *temp, octal_string[4]; struct tm *tm; time_t the_time; @@ -4078,7 +4860,7 @@ decode_prompt_string (string) case 'W': { /* Use the value of PWD because it is much more efficient. */ - char t_string[PATH_MAX], *t; + char t_string[PATH_MAX]; int tlen; temp = get_string_value ("PWD"); @@ -4103,7 +4885,7 @@ decode_prompt_string (string) #define ROOT_PATH(x) ((x)[0] == '/' && (x)[1] == 0) #define DOUBLE_SLASH_ROOT(x) ((x)[0] == '/' && (x)[1] == '/' && (x)[2] == 0) /* Abbreviate \W as ~ if $PWD == $HOME */ - if (c == 'W' && (((t = get_string_value ("HOME")) == 0) || STREQ (t, temp) == 0)) + if (c == 'W' && (((t = get_string_value ("HOME")) == 0) || STREQ (t, t_string) == 0)) { if (ROOT_PATH (t_string) == 0 && DOUBLE_SLASH_ROOT (t_string) == 0) { @@ -4119,6 +4901,7 @@ decode_prompt_string (string) no longer than PATH_MAX - 1 characters. */ strcpy (t_string, polite_directory_format (t_string)); + temp = trim_pathname (t_string, PATH_MAX - 1); /* If we're going to be expanding the prompt string later, quote the directory name. */ if (promptvars || posixly_correct) @@ -4188,9 +4971,12 @@ decode_prompt_string (string) break; } temp = (char *)xmalloc (3); - temp[0] = '\001'; - temp[1] = (c == '[') ? RL_PROMPT_START_IGNORE : RL_PROMPT_END_IGNORE; - temp[2] = '\0'; + n = (c == '[') ? RL_PROMPT_START_IGNORE : RL_PROMPT_END_IGNORE; + i = 0; + if (n == CTLESC || n == CTLNUL) + temp[i++] = CTLESC; + temp[i++] = n; + temp[i] = '\0'; goto add_string; #endif /* READLINE */ @@ -4250,7 +5036,7 @@ not_escape: if (promptvars || posixly_correct) { last_exit_value = last_command_exit_value; - list = expand_prompt_string (result, Q_DOUBLE_QUOTES); + list = expand_prompt_string (result, Q_DOUBLE_QUOTES, 0); free (result); result = string_list (list); dispose_words (list); @@ -4286,15 +5072,15 @@ yyerror (msg) } static char * -error_token_from_token (token) - int token; +error_token_from_token (tok) + int tok; { char *t; - if (t = find_token_in_alist (token, word_token_alist, 0)) + if (t = find_token_in_alist (tok, word_token_alist, 0)) return t; - if (t = find_token_in_alist (token, other_token_alist, 0)) + if (t = find_token_in_alist (tok, other_token_alist, 0)) return t; t = (char *)NULL; @@ -4569,7 +5355,7 @@ parse_string_to_word_list (s, flags, whom) wl = (WORD_LIST *)NULL; if (flags & 1) - parser_state |= PST_COMPASSIGN; + parser_state |= PST_COMPASSIGN|PST_REPARSE; while ((tok = read_token (READ)) != yacc_EOF) { @@ -4582,7 +5368,7 @@ parse_string_to_word_list (s, flags, whom) line_number = orig_line_number + line_number - 1; orig_current_token = current_token; current_token = tok; - yyerror ((char *)NULL); /* does the right thing */ + yyerror (NULL); /* does the right thing */ current_token = orig_current_token; if (wl) dispose_words (wl); @@ -4609,7 +5395,7 @@ parse_string_to_word_list (s, flags, whom) shell_input_line_terminator = orig_input_terminator; if (flags & 1) - parser_state &= ~PST_COMPASSIGN; + parser_state &= ~(PST_COMPASSIGN|PST_REPARSE); if (wl == &parse_string_error) { @@ -4628,18 +5414,21 @@ parse_compound_assignment (retlenp) int *retlenp; { WORD_LIST *wl, *rl; - int tok, orig_line_number, orig_token_size; + int tok, orig_line_number, orig_token_size, orig_last_token, assignok; char *saved_token, *ret; saved_token = token; orig_token_size = token_buffer_size; orig_line_number = line_number; + orig_last_token = last_read_token; last_read_token = WORD; /* WORD to allow reserved words here */ token = (char *)NULL; token_buffer_size = 0; + assignok = parser_state&PST_ASSIGNOK; /* XXX */ + wl = (WORD_LIST *)NULL; /* ( */ parser_state |= PST_COMPASSIGN; @@ -4657,7 +5446,7 @@ parse_compound_assignment (retlenp) if (tok == yacc_EOF) /* ( */ parser_error (orig_line_number, _("unexpected EOF while looking for matching `)'")); else - yyerror ((char *)NULL); /* does the right thing */ + yyerror(NULL); /* does the right thing */ if (wl) dispose_words (wl); wl = &parse_string_error; @@ -4682,7 +5471,8 @@ parse_compound_assignment (retlenp) jump_to_top_level (DISCARD); } - last_read_token = WORD; + last_read_token = orig_last_token; /* XXX - was WORD? */ + if (wl) { rl = REVERSE_LIST (wl, WORD_LIST *); @@ -4694,6 +5484,10 @@ parse_compound_assignment (retlenp) if (retlenp) *retlenp = (ret && *ret) ? strlen (ret) : 0; + + if (assignok) + parser_state |= PST_ASSIGNOK; + return ret; } @@ -4712,7 +5506,7 @@ save_parser_state (ps) #endif if (ps == 0) - ps = xmalloc (sizeof (sh_parser_state_t)); + ps = (sh_parser_state_t *)xmalloc (sizeof (sh_parser_state_t)); if (ps == 0) return ((sh_parser_state_t *)NULL);