From 3328ab5af72319f76fe9be3910a8e07d38b14de2 Mon Sep 17 00:00:00 2001 From: Father Chrysostomos Date: Wed, 29 Aug 2012 12:35:49 -0700 Subject: [PATCH] Finish fixing here-docs in re-evals MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This commit fixes here-docs in single-line re-evals in files (as opposed to evals) and here-docs in single-line quote-like operators inside re-evals. In both cases, the here-doc parser has to look into an outer lexing scope to find the here-doc body. And in both cases it was stomping on PL_linestr (the current line buffer) while PL_sublex_info.re_eval_start was pointing to an offset in that buffer. (re_eval_start is used to construct the string to include in the regexp’s stringification once the lexer reaches the end of the re-eval.) Fixing this entails moving re_eval_start and re_eval_str to PL_parser->lex_shared, making the pre-localised values visible. This is so that the code that peeks into an outer linestr buffer to steal the here-doc body can set up re_eval_str in the right scope. (re_eval_str is used to store the re-eval text when the here- oc parser has no choice but to modify linestr; see also commit db4442662555874019.) It also entails making the stream-based parser (i.e., that reads from an input stream) leave PL_linestr alone, instead of clobbering it and then reconstructing part of it afterwards. --- parser.h | 2 ++ perl.h | 2 -- t/base/lex.t | 14 ++++++++++- toke.c | 77 +++++++++++++++++++++++++++++++++++------------------------- 4 files changed, 60 insertions(+), 35 deletions(-) diff --git a/parser.h b/parser.h index 95083d6..97e016d 100644 --- a/parser.h +++ b/parser.h @@ -27,6 +27,8 @@ typedef struct yy_lexshared { struct yy_lexshared *ls_prev; SV *ls_linestr; /* mirrors PL_parser->linestr */ char *ls_bufptr; /* mirrors PL_parser->bufptr */ + char *re_eval_start; /* start of "(?{..." text */ + SV *re_eval_str; /* "(?{...})" text */ line_t herelines; /* number of lines in here-doc */ } LEXSHARED; diff --git a/perl.h b/perl.h index 7f907df..f42849c 100644 --- a/perl.h +++ b/perl.h @@ -3456,8 +3456,6 @@ struct _sublex_info { U8 super_state; /* lexer state to save */ U16 sub_inwhat; /* "lex_inwhat" to use */ OP *sub_op; /* "lex_op" to use */ - char *re_eval_start;/* start of "(?{..." text */ - SV *re_eval_str; /* "(?{...})" text */ SV *repl; /* replacement of s/// or y/// */ }; diff --git a/t/base/lex.t b/t/base/lex.t index 4c4981d..9868d4c 100644 --- a/t/base/lex.t +++ b/t/base/lex.t @@ -1,6 +1,6 @@ #!./perl -print "1..66\n"; +print "1..68\n"; $x = 'x'; @@ -326,3 +326,15 @@ END eval 'print qq ;ok 66 - eval ending with semicolon\n;' or print "not ok 66 - eval ending with semicolon\n"; + +print "not " unless qr/(?{<linestart - buf; last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0; last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0; - re_eval_start_pos = PL_sublex_info.re_eval_start ? - PL_sublex_info.re_eval_start - buf : 0; + re_eval_start_pos = PL_parser->lex_shared->re_eval_start ? + PL_parser->lex_shared->re_eval_start - buf : 0; buf = sv_grow(linestr, len); @@ -944,8 +944,8 @@ Perl_lex_grow_linestr(pTHX_ STRLEN len) PL_parser->last_uni = buf + last_uni_pos; if (PL_parser->last_lop) PL_parser->last_lop = buf + last_lop_pos; - if (PL_sublex_info.re_eval_start) - PL_sublex_info.re_eval_start = buf + re_eval_start_pos; + if (PL_parser->lex_shared->re_eval_start) + PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos; return buf; } @@ -2471,8 +2471,6 @@ S_sublex_push(pTHX) SAVEI32(PL_lex_starts); SAVEI8(PL_lex_state); SAVESPTR(PL_lex_repl); - SAVEPPTR(PL_sublex_info.re_eval_start); - SAVESPTR(PL_sublex_info.re_eval_str); SAVEVPTR(PL_lex_inpat); SAVEI16(PL_lex_inwhat); SAVECOPLINE(PL_curcop); @@ -2499,8 +2497,6 @@ S_sublex_push(pTHX) PL_lex_repl = PL_sublex_info.repl; PL_lex_stuff = NULL; PL_sublex_info.repl = NULL; - PL_sublex_info.re_eval_start = NULL; - PL_sublex_info.re_eval_str = NULL; PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr); @@ -4683,7 +4679,7 @@ Perl_yylex(pTHX) } /* Convert (?{...}) and friends to 'do {...}' */ if (PL_lex_inpat && *PL_bufptr == '(') { - PL_sublex_info.re_eval_start = PL_bufptr; + PL_parser->lex_shared->re_eval_start = PL_bufptr; PL_bufptr += 2; if (*PL_bufptr != '{') PL_bufptr++; @@ -4742,28 +4738,30 @@ Perl_yylex(pTHX) re_eval_str. If the here-doc body’s length equals the previous value of re_eval_start, re_eval_start will now be null. So check re_eval_str as well. */ - if (PL_sublex_info.re_eval_start || PL_sublex_info.re_eval_str) { + if (PL_parser->lex_shared->re_eval_start + || PL_parser->lex_shared->re_eval_str) { SV *sv; if (*PL_bufptr != ')') Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'"); PL_bufptr++; /* having compiled a (?{..}) expression, return the original * text too, as a const */ - if (PL_sublex_info.re_eval_str) { - sv = PL_sublex_info.re_eval_str; - PL_sublex_info.re_eval_str = NULL; - SvCUR_set(sv, PL_bufptr - PL_sublex_info.re_eval_start); + if (PL_parser->lex_shared->re_eval_str) { + sv = PL_parser->lex_shared->re_eval_str; + PL_parser->lex_shared->re_eval_str = NULL; + SvCUR_set(sv, + PL_bufptr - PL_parser->lex_shared->re_eval_start); SvPV_shrink_to_cur(sv); } - else sv = newSVpvn(PL_sublex_info.re_eval_start, - PL_bufptr - PL_sublex_info.re_eval_start); + else sv = newSVpvn(PL_parser->lex_shared->re_eval_start, + PL_bufptr - PL_parser->lex_shared->re_eval_start); start_force(PL_curforce); /* XXX probably need a CURMAD(something) here */ NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, sv); force_next(THING); - PL_sublex_info.re_eval_start = NULL; + PL_parser->lex_shared->re_eval_start = NULL; PL_expect = XTERM; return REPORT(','); } @@ -9680,14 +9678,24 @@ S_scan_heredoc(pTHX_ register char *s) CopLINE_set(PL_curcop, (line_t)PL_multi_start-1); missingterm(PL_tokenbuf + 1); } + sv_setpvn(herewas,bufptr,d-bufptr+1); + sv_setpvn(tmpstr,d+1,s-d); + s += len - 1; + /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we + check shared->re_eval_str. */ + if (shared->re_eval_start || shared->re_eval_str) { + /* Set aside the rest of the regexp */ + if (!shared->re_eval_str) + shared->re_eval_str = + newSVpvn(shared->re_eval_start, + SvEND(linestr) - shared->re_eval_start); + shared->re_eval_start -= s-d; + } if (CxTYPE(cx) == CXt_EVAL && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL && cx->blk_eval.cur_text == linestr) { cx->blk_eval.cur_text = newSVsv(linestr); SvSCREAM_on(cx->blk_eval.cur_text); } - sv_setpvn(herewas,bufptr,d-bufptr+1); - sv_setpvn(tmpstr,d+1,s-d); - s += len - 1; sv_catpvn(herewas,s,bufend-s); Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char); SvCUR_set(linestr, @@ -9730,14 +9738,14 @@ S_scan_heredoc(pTHX_ register char *s) d points to the newline before the body of the heredoc. */ /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we - check PL_sublex_info.re_eval_str. */ - if (PL_sublex_info.re_eval_start || PL_sublex_info.re_eval_str) { + check shared->re_eval_str. */ + if (shared->re_eval_start || shared->re_eval_str) { /* Set aside the rest of the regexp */ - if (!PL_sublex_info.re_eval_str) - PL_sublex_info.re_eval_str = - newSVpvn(PL_sublex_info.re_eval_start, - PL_bufend - PL_sublex_info.re_eval_start); - PL_sublex_info.re_eval_start -= s-d; + if (!shared->re_eval_str) + shared->re_eval_str = + newSVpvn(shared->re_eval_start, + PL_bufend - shared->re_eval_start); + shared->re_eval_start -= s-d; } if (CxTYPE(cx) == CXt_EVAL && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL && cx->blk_eval.cur_text == PL_linestr) { @@ -9752,10 +9760,15 @@ S_scan_heredoc(pTHX_ register char *s) } else { + SV *linestr_save; streaming: sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */ term = PL_tokenbuf[1]; len--; + linestr_save = PL_linestr; /* must restore this afterwards */ + d = s - SvCUR(herewas) - 1; /* s gets set to this afterwards */ + PL_linestr = newSVpvs(""); + PL_bufptr = PL_bufend = s = SvPVX(PL_linestr); while (s >= PL_bufend) { /* multiple line string? */ #ifdef PERL_MAD if (PL_madskills) { @@ -9773,6 +9786,7 @@ S_scan_heredoc(pTHX_ register char *s) && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) { SvREFCNT_dec(herewas); SvREFCNT_dec(tmpstr); + SvREFCNT_dec(linestr_save); CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1); missingterm(PL_tokenbuf + 1); } @@ -9804,12 +9818,11 @@ S_scan_heredoc(pTHX_ register char *s) PL_bufend[-1] = '\n'; #endif if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) { - STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr); - *(SvPVX(PL_linestr) + off ) = ' '; - lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1); - sv_catsv(PL_linestr,herewas); + SvREFCNT_dec(PL_linestr); + PL_linestr = linestr_save; + PL_linestart = SvPVX(linestr_save); PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); - s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */ + s = d; } else { s = PL_bufend; -- 2.7.4