From 60ac52eb5d5157fbe18e603a2d72ef6249b62083 Mon Sep 17 00:00:00 2001 From: Father Chrysostomos Date: Wed, 4 Jul 2012 00:17:55 -0700 Subject: [PATCH] Fix our sub with proto MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit yylex must emit exactly one token each time it is called. Some- times yylex needs to parse several tokens at once. That’s what the various force functions are for. But that is also what PL_pending_ident is for. The various force_next, force_word, force_ident, etc., functions keep a stack of tokens (PL_nextval/PL_nexttype) that yylex will check imme- diately when called. PL_pending_ident is used to track a single identifier that yylex will hand off to S_pending_ident to handle. S_pending_ident is the only piece of code for resolving an identi- fier that could be lexical but could also be a package variable. force_ident assumes it is looking for a package variable. force_* takes precedence over PL_pending_ident. All this means that, if an identifier needs to be looked up in the pad on the next yylex invocation, it has to use PL_pending_ident, and the force_* functions cannot be used at the same time. Not realising that, when I made ‘our sub foo’ store the sub in the pad I also made ‘our sub foo ($)’ into a syntax error, because it was being parsed as ‘our sub ($) foo’ (the prototype being ‘forced’); i.e., the pending tokens were being pulled out of the ‘queue’ in the wrong order. (I put queue in quotes, because one queue and one unre- lated buffer together don’t exactly count as ‘a queue’.) Changing PL_pending_ident to have precedence over the force stack breaks ext/XS-APItest/t/swaptwostmts.t, because the statement-parsing interface does not localise PL_pending_ident. It could be changed to do that, but I don’t think it is the right solution. Having two separate pending token mechanisms makes things need- lessly fragile. This commit eliminates the PL_pending_ident mechanism and modifies S_pending_ident (renaming it in the process to S_force_ident_maybe_lex) to work with the force mechanism. I was going to merge it with force_ident, but the two make incompatible assumptions that just complicate the code if merged. S_pending_ident needs the sigil in the same string buffer, to pass to the pad inter- face. force_ident needs to be able to work without a sigil present. So now we only have one queue for pending tokens and the order is more predictable. --- embed.fnc | 1 + parser.h | 2 +- proto.h | 1 + sv.c | 1 - t/cmd/lexsub.t | 6 +++- toke.c | 95 +++++++++++++++++++++++++++++----------------------------- 6 files changed, 55 insertions(+), 51 deletions(-) diff --git a/embed.fnc b/embed.fnc index fa36a18..b2f1887 100644 --- a/embed.fnc +++ b/embed.fnc @@ -2148,6 +2148,7 @@ s |U8* |add_utf16_textfilter|NN U8 *const s|bool reversed s |void |checkcomma |NN const char *s|NN const char *name \ |NN const char *what s |void |force_ident |NN const char *s|int kind +so |void |force_ident_maybe_lex|char pit s |void |incline |NN const char *s s |int |intuit_method |NN char *s|NULLOK GV *gv|NULLOK CV *cv s |int |intuit_more |NN char *s diff --git a/parser.h b/parser.h index 97e016d..05735bf 100644 --- a/parser.h +++ b/parser.h @@ -70,8 +70,8 @@ typedef struct yy_parser { I32 multi_end; /* last line of multi-line string */ char multi_open; /* delimiter of said string */ char multi_close; /* delimiter of said string */ - char pending_ident; /* pending identifier lookup */ bool preambled; + /*** 8-bit hole ***/ I32 lex_allbrackets;/* (), [], {}, ?: bracket count */ SUBLEXINFO sublex_info; LEXSHARED *lex_shared; diff --git a/proto.h b/proto.h index c2c858c..e6cba14 100644 --- a/proto.h +++ b/proto.h @@ -7050,6 +7050,7 @@ STATIC void S_force_ident(pTHX_ const char *s, int kind) #define PERL_ARGS_ASSERT_FORCE_IDENT \ assert(s) +STATIC void S_force_ident_maybe_lex(pTHX_ char pit); STATIC void S_force_next(pTHX_ I32 type); STATIC char* S_force_strict_version(pTHX_ char *s) __attribute__nonnull__(pTHX_1); diff --git a/sv.c b/sv.c index 2bf9e8f..63523dd 100644 --- a/sv.c +++ b/sv.c @@ -11231,7 +11231,6 @@ Perl_parser_dup(pTHX_ const yy_parser *const proto, CLONE_PARAMS *const param) parser->multi_open = proto->multi_open; parser->multi_start = proto->multi_start; parser->multi_end = proto->multi_end; - parser->pending_ident = proto->pending_ident; parser->preambled = proto->preambled; parser->sublex_info = proto->sublex_info; /* XXX not quite right */ parser->linestr = sv_dup_inc(proto->linestr, param); diff --git a/t/cmd/lexsub.t b/t/cmd/lexsub.t index 02bb71c..c21190b 100644 --- a/t/cmd/lexsub.t +++ b/t/cmd/lexsub.t @@ -7,7 +7,7 @@ BEGIN { *bar::is = *is; } no warnings 'deprecated'; -plan 19; +plan 20; { our sub foo { 42 } @@ -74,3 +74,7 @@ sub bar::c { 43 } is eval { ::d },'d42', 'our sub foo; applies to subsequent sub foo {}'; } } +{ + our sub e ($); + is prototype "::e", '$', 'our sub with proto'; +} diff --git a/toke.c b/toke.c index 6912863..e258bf6 100644 --- a/toke.c +++ b/toke.c @@ -66,7 +66,6 @@ Individual members of C have their own documentation. #define PL_multi_start (PL_parser->multi_start) #define PL_multi_open (PL_parser->multi_open) #define PL_multi_close (PL_parser->multi_close) -#define PL_pending_ident (PL_parser->pending_ident) #define PL_preambled (PL_parser->preambled) #define PL_sublex_info (PL_parser->sublex_info) #define PL_linestr (PL_parser->linestr) @@ -111,10 +110,8 @@ Individual members of C have their own documentation. # define PL_nextval (PL_parser->nextval) #endif -/* This can't be done with embed.fnc, because struct yy_parser contains a - member named pending_ident, which clashes with the generated #define */ -static int -S_pending_ident(pTHX); +#define force_ident_maybe_lex(p) \ + (PL_bufptr = s, S_force_ident_maybe_lex(aTHX_ p)) static const char ident_too_long[] = "Identifier too long"; @@ -4183,10 +4180,6 @@ Perl_madlex(pTHX) PL_thiswhite = 0; PL_thismad = 0; - /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */ - if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident) - return S_pending_ident(aTHX); - /* previous token ate up our whitespace? */ if (!PL_lasttoke && PL_nextwhite) { PL_thiswhite = PL_nextwhite; @@ -4451,11 +4444,6 @@ Perl_yylex(pTHX) pv_display(tmp, s, strlen(s), 0, 60)); SvREFCNT_dec(tmp); } ); - /* check if there's an identifier for us to look at */ - if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident) - return REPORT(S_pending_ident(aTHX)); - - /* no identifier pending identification */ switch (PL_lex_state) { #ifdef COMMENTARY @@ -5503,7 +5491,8 @@ Perl_yylex(pTHX) if (!PL_tokenbuf[1]) { PREREF('%'); } - PL_pending_ident = '%'; + PL_expect = XOPERATOR; + force_ident_maybe_lex('%'); TERM('%'); case '^': @@ -5992,7 +5981,7 @@ Perl_yylex(pTHX) sizeof PL_tokenbuf - 1, TRUE); if (PL_tokenbuf[1]) { PL_expect = XOPERATOR; - PL_pending_ident = '&'; + force_ident_maybe_lex('&'); } else PREREF('&'); @@ -6228,7 +6217,7 @@ Perl_yylex(pTHX) if (!PL_tokenbuf[1]) PREREF(DOLSHARP); PL_expect = XOPERATOR; - PL_pending_ident = '#'; + force_ident_maybe_lex('#'); TOKEN(DOLSHARP); } @@ -6346,7 +6335,7 @@ Perl_yylex(pTHX) PL_expect = XTERM; /* print $fh <<"EOF" */ } } - PL_pending_ident = '$'; + force_ident_maybe_lex('$'); TOKEN('$'); case '@': @@ -6383,7 +6372,8 @@ Perl_yylex(pTHX) } } } - PL_pending_ident = '@'; + PL_expect = XOPERATOR; + force_ident_maybe_lex('@'); TERM('@'); case '/': /* may be division, defined-or, or pattern */ @@ -7429,7 +7419,7 @@ Perl_yylex(pTHX) if (len && !keyword(PL_tokenbuf + 1, len, 0)) { d = SKIPSPACE1(d); if (*d == '(') { - PL_pending_ident = '&'; + force_ident_maybe_lex('&'); s = d; } } @@ -8252,14 +8242,21 @@ Perl_yylex(pTHX) SvUTF8_on(PL_subname); have_name = TRUE; + if (key == KEY_our) { + *PL_tokenbuf = '&'; + Copy(tmpbuf, PL_tokenbuf+1, len, char); + PL_tokenbuf[len+1] = '\0'; + } + #ifdef PERL_MAD - if (key != KEY_our) { - start_force(0); - CURMAD('X', nametoke); - CURMAD('_', tmpwhite); + start_force(0); + CURMAD('X', nametoke); + CURMAD('_', tmpwhite); + if (key == KEY_our) + force_ident_maybe_lex('&'); + else (void) force_word(PL_oldbufptr + tboffset, WORD, FALSE, TRUE, TRUE); - } s = SKIPSPACE2(d,tmpwhite); #else @@ -8422,9 +8419,7 @@ Perl_yylex(pTHX) TOKEN(ANONSUB); } if (key == KEY_our) { - PL_pending_ident = *PL_tokenbuf = '&'; - Copy(tmpbuf, PL_tokenbuf+1, len, char); - PL_tokenbuf[len+1] = '\0'; + force_ident_maybe_lex('&'); } #ifndef PERL_MAD else @@ -8593,21 +8588,18 @@ Perl_yylex(pTHX) #pragma segment Main #endif -static int -S_pending_ident(pTHX) +static void +S_force_ident_maybe_lex(pTHX_ char pit) { dVAR; + OP *o; + int force_type; PADOFFSET tmp = 0; - /* pit holds the identifier we read and pending_ident is reset */ - char pit = PL_pending_ident; const STRLEN tokenbuf_len = strlen(PL_tokenbuf); /* All routes through this function want to know if there is a colon. */ const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len); - PL_pending_ident = 0; - /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */ - DEBUG_T({ PerlIO_printf(Perl_debug_log, - "### Pending identifier '%s'\n", PL_tokenbuf); }); + start_force(PL_curforce); /* if we're in a my(), we can't allow dynamics here. $foo'bar has already been turned into $foo::bar, so @@ -8629,10 +8621,11 @@ S_pending_ident(pTHX) PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf), UTF ? SVf_UTF8 : 0); - pl_yylval.opval = newOP(OP_PADANY, 0); - pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, + o = newOP(OP_PADANY, 0); + o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0); - return PRIVATEREF; + force_type = PRIVATEREF; + goto doforce; } } @@ -8653,8 +8646,8 @@ S_pending_ident(pTHX) SV * const sym = newSVhek(stashname); sv_catpvs(sym, "::"); sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES )); - pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym); - pl_yylval.opval->op_private = OPpCONST_ENTERED; + o = (OP*)newSVOP(OP_CONST, 0, sym); + o->op_private = OPpCONST_ENTERED; gv_fetchsv(sym, (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) @@ -8664,12 +8657,14 @@ S_pending_ident(pTHX) : (PL_tokenbuf[0] == '@') ? SVt_PVAV : (PL_tokenbuf[0] == '&') ? SVt_PVGV : SVt_PVHV)); - return WORD; + force_type = WORD; + goto doforce; } - pl_yylval.opval = newOP(OP_PADANY, 0); - pl_yylval.opval->op_targ = tmp; - return PRIVATEREF; + o = newOP(OP_PADANY, 0); + o->op_targ = tmp; + force_type = PRIVATEREF; + goto doforce; } } @@ -8697,10 +8692,10 @@ S_pending_ident(pTHX) } /* build ops for a bareword */ - pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(PL_tokenbuf + 1, + o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, UTF ? SVf_UTF8 : 0 )); - pl_yylval.opval->op_private = OPpCONST_ENTERED; + o->op_private = OPpCONST_ENTERED; gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1, (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ), @@ -8708,7 +8703,11 @@ S_pending_ident(pTHX) : (PL_tokenbuf[0] == '@') ? SVt_PVAV : (PL_tokenbuf[0] == '&') ? SVt_PVGV : SVt_PVHV)); - return WORD; + force_type = WORD; + + doforce: + NEXTVAL_NEXTTOKE.opval = o; + force_next(force_type); } STATIC void -- 2.7.4