From 074b1c594a0c131c2fee2e237282c7fc3bc00586 Mon Sep 17 00:00:00 2001 From: Father Chrysostomos Date: Wed, 29 Aug 2012 20:37:44 -0700 Subject: [PATCH] toke.c:scan_heredoc: less pointer fiddling; one less SV MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The loop for reading lines of input to find the end of a here-doc has always checked to see whether the cursor (s) was at the end of the current buffer: while (s >= PL_bufend) { /* multiple line string? */ (Actually, when it was added in perl 3.000, it was in scanstr and that loop was not specific to here-docs, but also applied to multi- line strings.) The code inside the loop ends up fiddling with s by setting it explic- itly to the end of the buffer or the end of the here-doc marker, minus one to make sure it does not coincide with the end of the buffer. This doesn’t make any sense, and it makes the rest of this function more complicated. Because the loop used to be outside the else block, it was also reached for a here-doc inside a string eval, but the code for that ensured the condition for the while loop was never true. Since the while loop set s to one less than it needed to be set to, in order to break out of it, it had to have s++ just after the loop. That s++ was reached also by the eval code, which, consequently, had to adjust its value of s. That adjustment actually took place farther up in the function, where the herewas SV was assigned to. (herewas contains the text after the here-doc marker to the end of the line.) The beginning of herewas would point to the last character of the here-doc marker inside an eval, so that subtracting SvCUR(herewas) from the buffer end would result in an adjusted pointer. herewas is currently not actually used, except for the length. Until recently, the text inside it would be copied back into PL_linestr to recreate where the lexer needed to continue (because PL_linestr was being clobbered). That no longer happens. So we can get rid of herewas altogether. Since it is in an else block, the stream-based parser does not need to fiddle pointers to exit the loop. It can just break explicitly. So the s++ can also go, requiring changes (and simplifications) to the eval code. The comment about it being a multiline string is irrelevant and can go, too. It dates from when that line was actually in scanstr and applied to quoted strings containing line breaks. --- toke.c | 44 ++++++++++++-------------------------------- 1 file changed, 12 insertions(+), 32 deletions(-) diff --git a/toke.c b/toke.c index 02f226a..8ac754e 100644 --- a/toke.c +++ b/toke.c @@ -9502,12 +9502,11 @@ STATIC char * S_scan_heredoc(pTHX_ register char *s) { dVAR; - SV *herewas; I32 op_type = OP_SCALAR; I32 len; SV *tmpstr; char term; - const char *found_newline = 0; + char *found_newline = 0; char *d; char *e; char *peek; @@ -9590,17 +9589,8 @@ S_scan_heredoc(pTHX_ register char *s) s = olds; } #endif - if ((infile && !PL_lex_inwhat) - || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s))) { - herewas = newSVpvn(s,PL_bufend-s); - } - else { -#ifdef PERL_MAD - herewas = newSVpvn(s-1,found_newline-s+1); -#else - s--; - herewas = newSVpvn(s,found_newline-s); -#endif + if (!infile || PL_lex_inwhat) { + found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s); } #ifdef PERL_MAD if (PL_madskills) { @@ -9611,13 +9601,9 @@ S_scan_heredoc(pTHX_ register char *s) PL_thisstuff = newSVpvn(tstart, s - tstart); } #endif - s += SvCUR(herewas); #ifdef PERL_MAD stuffstart = s - SvPVX(PL_linestr); - - if (found_newline) - s--; #endif tmpstr = newSV_type(SVt_PVIV); @@ -9641,8 +9627,7 @@ S_scan_heredoc(pTHX_ register char *s) */ SV *linestr; char *bufptr, *bufend; - char * const olds = s - SvCUR(herewas); - char * const real_olds = s; + char * const olds = s; PERL_CONTEXT * const cx = &cxstack[cxstack_ix]; do { shared = shared->ls_prev; @@ -9656,7 +9641,7 @@ S_scan_heredoc(pTHX_ register char *s) most lexing scope. In a file, shared->ls_linestr at that level is just one line, so there is no body to steal. */ if (infile && !shared->ls_prev) { - s = real_olds; + s = olds; goto streaming; } } while (!(s = (char *)memchr( @@ -9698,12 +9683,11 @@ S_scan_heredoc(pTHX_ register char *s) SvCUR(linestr) - (s-d)); s = olds; - goto retval; } else if (!infile || found_newline) { - char * const olds = s - SvCUR(herewas); + char * const olds = s; PERL_CONTEXT * const cx = &cxstack[cxstack_ix]; - d = s; + d = s = found_newline ? found_newline : PL_bufend; while (s < PL_bufend && (*s != '\n' || memNE(s,PL_tokenbuf,len)) ) { if (*s++ == '\n') @@ -9758,10 +9742,10 @@ S_scan_heredoc(pTHX_ register char *s) term = PL_tokenbuf[1]; len--; linestr_save = PL_linestr; /* must restore this afterwards */ - d = s - SvCUR(herewas) - 1; /* s gets set to this afterwards */ + d = s; /* and this */ PL_linestr = newSVpvs(""); - PL_bufptr = PL_bufend = s = SvPVX(PL_linestr); - while (s >= PL_bufend) { /* multiple line string? */ + PL_bufend = SvPVX(PL_linestr); + while (1) { #ifdef PERL_MAD if (PL_madskills) { tstart = SvPVX(PL_linestr) + stuffstart; @@ -9771,7 +9755,7 @@ S_scan_heredoc(pTHX_ register char *s) PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart); } #endif - PL_bufptr = s; + PL_bufptr = PL_bufend; CopLINE_set(PL_curcop, PL_multi_start + shared->herelines); if (!lex_next_chunk(LEX_NO_TERM) @@ -9812,20 +9796,17 @@ S_scan_heredoc(pTHX_ register char *s) PL_linestart = SvPVX(linestr_save); PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); s = d; + break; } else { - s = PL_bufend; sv_catsv(tmpstr,PL_linestr); } } } - s++; -retval: PL_multi_end = CopLINE(PL_curcop); if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) { SvPV_shrink_to_cur(tmpstr); } - SvREFCNT_dec(herewas); if (!IN_BYTES) { if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr))) SvUTF8_on(tmpstr); @@ -9837,7 +9818,6 @@ retval: return s; interminable: - SvREFCNT_dec(herewas); SvREFCNT_dec(tmpstr); CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1); missingterm(PL_tokenbuf + 1); -- 2.7.4