From 2cd61cdbd64958437da8294b84109bc8b63ab360 Mon Sep 17 00:00:00 2001 From: Ilya Zakharevich Date: Thu, 9 Jul 1998 14:47:25 -0400 Subject: [PATCH] add patch, along with all the missing bits, and doc tweaks Message-Id: <199807092247.SAA06314@monk.mps.ohio-state.edu> Subject: Re: [PATCH 5.004_71] Secure RE update p4raw-id: //depot/perl@1444 --- ObjXSub.h | 4 + embed.h | 1 + embedvar.h | 6 + ext/Opcode/Opcode.pm | 2 +- ext/re/re.pm | 37 ++- global.sym | 1 + globals.c | 2 + interp.sym | 2 + intrpvar.h | 3 + op.c | 12 +- opcode.h | 643 ++++++++++++++++++++++++++------------------------- opcode.pl | 2 + pp_ctl.c | 12 + pp_proto.h | 1 + regcomp.c | 14 +- sv.c | 5 +- t/op/misc.t | 8 +- t/op/pat.t | 47 +++- t/op/subst.t | 1 - 19 files changed, 454 insertions(+), 349 deletions(-) diff --git a/ObjXSub.h b/ObjXSub.h index 6cb2baa..f8adba8 100644 --- a/ObjXSub.h +++ b/ObjXSub.h @@ -478,6 +478,8 @@ #define regindent pPerl->Perl_regindent #undef reginput #define reginput pPerl->Perl_reginput +#undef reginterp_cnt +#define reginterp_cnt pPerl->Perl_reginterp_cnt #undef reglastparen #define reglastparen pPerl->Perl_reglastparen #undef regnarrate @@ -552,6 +554,8 @@ #define secondgv pPerl->Perl_secondgv #undef seen_zerolen #define seen_zerolen pPerl->Perl_seen_zerolen +#undef seen_evals +#define seen_evals pPerl->Perl_seen_evals #undef sh_path #define sh_path pPerl->Perl_sh_path #undef siggv diff --git a/embed.h b/embed.h index 2543d0b..6e8f5e9 100644 --- a/embed.h +++ b/embed.h @@ -703,6 +703,7 @@ #define pp_refgen Perl_pp_refgen #define pp_regcmaybe Perl_pp_regcmaybe #define pp_regcomp Perl_pp_regcomp +#define pp_regcreset Perl_pp_regcreset #define pp_rename Perl_pp_rename #define pp_repeat Perl_pp_repeat #define pp_require Perl_pp_require diff --git a/embedvar.h b/embedvar.h index 2468aa3..2f69973 100644 --- a/embedvar.h +++ b/embedvar.h @@ -206,6 +206,7 @@ #define regflags (curinterp->Iregflags) #define regindent (curinterp->Iregindent) #define reginput (curinterp->Ireginput) +#define reginterp_cnt (curinterp->Ireginterp_cnt) #define reglastparen (curinterp->Ireglastparen) #define regnarrate (curinterp->Iregnarrate) #define regnaughty (curinterp->Iregnaughty) @@ -229,6 +230,7 @@ #define screamfirst (curinterp->Iscreamfirst) #define screamnext (curinterp->Iscreamnext) #define secondgv (curinterp->Isecondgv) +#define seen_evals (curinterp->Iseen_evals) #define seen_zerolen (curinterp->Iseen_zerolen) #define siggv (curinterp->Isiggv) #define sortcop (curinterp->Isortcop) @@ -384,6 +386,7 @@ #define Iregflags regflags #define Iregindent regindent #define Ireginput reginput +#define Ireginterp_cnt reginterp_cnt #define Ireglastparen reglastparen #define Iregnarrate regnarrate #define Iregnaughty regnaughty @@ -407,6 +410,7 @@ #define Iscreamfirst screamfirst #define Iscreamnext screamnext #define Isecondgv secondgv +#define Iseen_evals seen_evals #define Iseen_zerolen seen_zerolen #define Isiggv siggv #define Isortcop sortcop @@ -624,6 +628,7 @@ #define regflags Perl_regflags #define regindent Perl_regindent #define reginput Perl_reginput +#define reginterp_cnt Perl_reginterp_cnt #define reglastparen Perl_reglastparen #define regnarrate Perl_regnarrate #define regnaughty Perl_regnaughty @@ -647,6 +652,7 @@ #define screamfirst Perl_screamfirst #define screamnext Perl_screamnext #define secondgv Perl_secondgv +#define seen_evals Perl_seen_evals #define seen_zerolen Perl_seen_zerolen #define siggv Perl_siggv #define sortcop Perl_sortcop diff --git a/ext/Opcode/Opcode.pm b/ext/Opcode/Opcode.pm index 717b97f..af1ab1d 100644 --- a/ext/Opcode/Opcode.pm +++ b/ext/Opcode/Opcode.pm @@ -398,7 +398,7 @@ These are a hotchpotch of opcodes still waiting to be considered bless -- could be used to change ownership of objects (reblessing) - pushre regcmaybe regcomp subst substcont + pushre regcmaybe regcreset regcomp subst substcont sprintf prtf -- can core dump diff --git a/ext/re/re.pm b/ext/re/re.pm index 53873fc..a033d97 100644 --- a/ext/re/re.pm +++ b/ext/re/re.pm @@ -11,17 +11,22 @@ re - Perl pragma to alter regular expression behaviour use re 'taint'; ($x) = ($^X =~ /^(.*)$/s); # $x is tainted here + $pat = '(?{ $foo = 1 })'; use re 'eval'; - /foo(?{ $foo = 1 })bar/; # won't fail (when not under -T switch) + /foo${pat}bar/; # won't fail (when not under -T switch) { no re 'taint'; # the default ($x) = ($^X =~ /^(.*)$/s); # $x is not tainted here no re 'eval'; # the default - /foo(?{ $foo = 1 })bar/; # disallowed (with or without -T switch) + /foo${pat}bar/; # disallowed (with or without -T switch) } + use re 'debug'; + /^(.*)$/s; # output debugging info + # during compile and run time + =head1 DESCRIPTION When C is in effect, and a tainted string is the target @@ -31,11 +36,29 @@ on tainted data aren't meant to extract safe substrings, but to perform other transformations. When C is in effect, a regex is allowed to contain -C<(?{ ... })> zero-width assertions (which may not be interpolated in -the regex). That is normally disallowed, since it is a potential security -risk. Note that this pragma is ignored when perl detects tainted data, -i.e. evaluation is always disallowed with tainted data. See -L. +C<(?{ ... })> zero-width assertions even if regular expression contains +variable interpolation. That is normally disallowed, since it is a +potential security risk. Note that this pragma is ignored when the regular +expression is obtained from tainted data, i.e. evaluation is always +disallowed with tainted regular expresssions. See L. + +For the purpose of this pragma, interpolation of preexisting regular +expressions is I considered a variable interpolation, thus + + /foo${pat}bar/ + +I allowed if $pat is a preexisting regular expressions, even +if $pat contains C<(?{ ... })> assertions. + +When C is in effect, perl emits debugging messages when +compiling and using regular expressions. The output is the same as that +obtained by running a C<-DDEBUGGING>-enabled perl interpreter with the +B<-Dr> switch. It may be quite voluminous depending on the complexity +of the match. +See L for additional info. + +I is not lexically scoped.> It has +both compile-time and run-time effects. See L. diff --git a/global.sym b/global.sym index cdd10ba..ff97e45 100644 --- a/global.sym +++ b/global.sym @@ -769,6 +769,7 @@ pp_redo pp_ref pp_refgen pp_regcmaybe +pp_regcreset pp_regcomp pp_rename pp_repeat diff --git a/globals.c b/globals.c index 1daf4f1..451b461 100644 --- a/globals.c +++ b/globals.c @@ -60,6 +60,8 @@ #define pp_rcatline CPerlObj::Perl_pp_rcatline #undef pp_regcmaybe #define pp_regcmaybe CPerlObj::Perl_pp_regcmaybe +#undef pp_regcreset +#define pp_regcreset CPerlObj::Perl_pp_regcreset #undef pp_regcomp #define pp_regcomp CPerlObj::Perl_pp_regcomp #undef pp_match diff --git a/interp.sym b/interp.sym index 6270324..687abb7 100644 --- a/interp.sym +++ b/interp.sym @@ -142,6 +142,7 @@ regeol regflags regindent reginput +reginterp_cnt reglastparen regnarrate regnaughty @@ -170,6 +171,7 @@ screamfirst screamnext secondgv seen_zerolen +seen_evals siggv sortcop sortcxix diff --git a/intrpvar.h b/intrpvar.h index 73cc5f3..1239338 100644 --- a/intrpvar.h +++ b/intrpvar.h @@ -213,6 +213,7 @@ PERLVAR(Iregflags, U16) /* are we folding, multilining? */ PERLVAR(Iregseen, U32) /* from regcomp.c */ PERLVAR(Iseen_zerolen, I32) /* from regcomp.c */ +PERLVAR(Iseen_evals, I32) /* from regcomp.c */ PERLVAR(Iregcomp_rx, regexp *) /* from regcomp.c */ PERLVAR(Iextralen, I32) /* from regcomp.c */ PERLVAR(Icolorset, int) /* from regcomp.c */ @@ -245,6 +246,8 @@ PERLVARI(Iregcompp, regcomp_t, FUNC_NAME_TO_PTR(pregcomp)) /* Pointer to RE compiler */ PERLVARI(Iregexecp, regexec_t, FUNC_NAME_TO_PTR(regexec_flags)) /* Pointer to RE executer */ +PERLVARI(Ireginterp_cnt, int, 0) /* Whether `Regexp' + was interpolated. */ PERLVARI(Iin_clean_objs,bool, FALSE) /* from sv.c */ PERLVARI(Iin_clean_all, bool, FALSE) /* from sv.c */ diff --git a/op.c b/op.c index fe6d8e8..6897a8b 100644 --- a/op.c +++ b/op.c @@ -2146,19 +2146,23 @@ pmruntime(OP *o, OP *expr, OP *repl) op_free(expr); } else { - if (pm->op_pmflags & PMf_KEEP) - expr = newUNOP(OP_REGCMAYBE,0,expr); + if (pm->op_pmflags & PMf_KEEP || !(hints & HINT_RE_EVAL)) + expr = newUNOP((!(hints & HINT_RE_EVAL) + ? OP_REGCRESET + : OP_REGCMAYBE),0,expr); Newz(1101, rcop, 1, LOGOP); rcop->op_type = OP_REGCOMP; rcop->op_ppaddr = ppaddr[OP_REGCOMP]; rcop->op_first = scalar(expr); - rcop->op_flags |= OPf_KIDS; + rcop->op_flags |= ((hints & HINT_RE_EVAL) + ? (OPf_SPECIAL | OPf_KIDS) + : OPf_KIDS); rcop->op_private = 1; rcop->op_other = o; /* establish postfix order */ - if (pm->op_pmflags & PMf_KEEP) { + if (pm->op_pmflags & PMf_KEEP || !(hints & HINT_RE_EVAL)) { LINKLIST(expr); rcop->op_next = expr; ((UNOP*)expr)->op_first->op_next = (OP*)rcop; diff --git a/opcode.h b/opcode.h index 47dd777..a1e9174 100644 --- a/opcode.h +++ b/opcode.h @@ -33,327 +33,328 @@ typedef enum { OP_READLINE, /* 26 */ OP_RCATLINE, /* 27 */ OP_REGCMAYBE, /* 28 */ - OP_REGCOMP, /* 29 */ - OP_MATCH, /* 30 */ - OP_SUBST, /* 31 */ - OP_SUBSTCONT, /* 32 */ - OP_TRANS, /* 33 */ - OP_SASSIGN, /* 34 */ - OP_AASSIGN, /* 35 */ - OP_CHOP, /* 36 */ - OP_SCHOP, /* 37 */ - OP_CHOMP, /* 38 */ - OP_SCHOMP, /* 39 */ - OP_DEFINED, /* 40 */ - OP_UNDEF, /* 41 */ - OP_STUDY, /* 42 */ - OP_POS, /* 43 */ - OP_PREINC, /* 44 */ - OP_I_PREINC, /* 45 */ - OP_PREDEC, /* 46 */ - OP_I_PREDEC, /* 47 */ - OP_POSTINC, /* 48 */ - OP_I_POSTINC, /* 49 */ - OP_POSTDEC, /* 50 */ - OP_I_POSTDEC, /* 51 */ - OP_POW, /* 52 */ - OP_MULTIPLY, /* 53 */ - OP_I_MULTIPLY, /* 54 */ - OP_DIVIDE, /* 55 */ - OP_I_DIVIDE, /* 56 */ - OP_MODULO, /* 57 */ - OP_I_MODULO, /* 58 */ - OP_REPEAT, /* 59 */ - OP_ADD, /* 60 */ - OP_I_ADD, /* 61 */ - OP_SUBTRACT, /* 62 */ - OP_I_SUBTRACT, /* 63 */ - OP_CONCAT, /* 64 */ - OP_STRINGIFY, /* 65 */ - OP_LEFT_SHIFT, /* 66 */ - OP_RIGHT_SHIFT, /* 67 */ - OP_LT, /* 68 */ - OP_I_LT, /* 69 */ - OP_GT, /* 70 */ - OP_I_GT, /* 71 */ - OP_LE, /* 72 */ - OP_I_LE, /* 73 */ - OP_GE, /* 74 */ - OP_I_GE, /* 75 */ - OP_EQ, /* 76 */ - OP_I_EQ, /* 77 */ - OP_NE, /* 78 */ - OP_I_NE, /* 79 */ - OP_NCMP, /* 80 */ - OP_I_NCMP, /* 81 */ - OP_SLT, /* 82 */ - OP_SGT, /* 83 */ - OP_SLE, /* 84 */ - OP_SGE, /* 85 */ - OP_SEQ, /* 86 */ - OP_SNE, /* 87 */ - OP_SCMP, /* 88 */ - OP_BIT_AND, /* 89 */ - OP_BIT_XOR, /* 90 */ - OP_BIT_OR, /* 91 */ - OP_NEGATE, /* 92 */ - OP_I_NEGATE, /* 93 */ - OP_NOT, /* 94 */ - OP_COMPLEMENT, /* 95 */ - OP_ATAN2, /* 96 */ - OP_SIN, /* 97 */ - OP_COS, /* 98 */ - OP_RAND, /* 99 */ - OP_SRAND, /* 100 */ - OP_EXP, /* 101 */ - OP_LOG, /* 102 */ - OP_SQRT, /* 103 */ - OP_INT, /* 104 */ - OP_HEX, /* 105 */ - OP_OCT, /* 106 */ - OP_ABS, /* 107 */ - OP_LENGTH, /* 108 */ - OP_SUBSTR, /* 109 */ - OP_VEC, /* 110 */ - OP_INDEX, /* 111 */ - OP_RINDEX, /* 112 */ - OP_SPRINTF, /* 113 */ - OP_FORMLINE, /* 114 */ - OP_ORD, /* 115 */ - OP_CHR, /* 116 */ - OP_CRYPT, /* 117 */ - OP_UCFIRST, /* 118 */ - OP_LCFIRST, /* 119 */ - OP_UC, /* 120 */ - OP_LC, /* 121 */ - OP_QUOTEMETA, /* 122 */ - OP_RV2AV, /* 123 */ - OP_AELEMFAST, /* 124 */ - OP_AELEM, /* 125 */ - OP_ASLICE, /* 126 */ - OP_EACH, /* 127 */ - OP_VALUES, /* 128 */ - OP_KEYS, /* 129 */ - OP_DELETE, /* 130 */ - OP_EXISTS, /* 131 */ - OP_RV2HV, /* 132 */ - OP_HELEM, /* 133 */ - OP_HSLICE, /* 134 */ - OP_UNPACK, /* 135 */ - OP_PACK, /* 136 */ - OP_SPLIT, /* 137 */ - OP_JOIN, /* 138 */ - OP_LIST, /* 139 */ - OP_LSLICE, /* 140 */ - OP_ANONLIST, /* 141 */ - OP_ANONHASH, /* 142 */ - OP_SPLICE, /* 143 */ - OP_PUSH, /* 144 */ - OP_POP, /* 145 */ - OP_SHIFT, /* 146 */ - OP_UNSHIFT, /* 147 */ - OP_SORT, /* 148 */ - OP_REVERSE, /* 149 */ - OP_GREPSTART, /* 150 */ - OP_GREPWHILE, /* 151 */ - OP_MAPSTART, /* 152 */ - OP_MAPWHILE, /* 153 */ - OP_RANGE, /* 154 */ - OP_FLIP, /* 155 */ - OP_FLOP, /* 156 */ - OP_AND, /* 157 */ - OP_OR, /* 158 */ - OP_XOR, /* 159 */ - OP_COND_EXPR, /* 160 */ - OP_ANDASSIGN, /* 161 */ - OP_ORASSIGN, /* 162 */ - OP_METHOD, /* 163 */ - OP_ENTERSUB, /* 164 */ - OP_LEAVESUB, /* 165 */ - OP_CALLER, /* 166 */ - OP_WARN, /* 167 */ - OP_DIE, /* 168 */ - OP_RESET, /* 169 */ - OP_LINESEQ, /* 170 */ - OP_NEXTSTATE, /* 171 */ - OP_DBSTATE, /* 172 */ - OP_UNSTACK, /* 173 */ - OP_ENTER, /* 174 */ - OP_LEAVE, /* 175 */ - OP_SCOPE, /* 176 */ - OP_ENTERITER, /* 177 */ - OP_ITER, /* 178 */ - OP_ENTERLOOP, /* 179 */ - OP_LEAVELOOP, /* 180 */ - OP_RETURN, /* 181 */ - OP_LAST, /* 182 */ - OP_NEXT, /* 183 */ - OP_REDO, /* 184 */ - OP_DUMP, /* 185 */ - OP_GOTO, /* 186 */ - OP_EXIT, /* 187 */ - OP_OPEN, /* 188 */ - OP_CLOSE, /* 189 */ - OP_PIPE_OP, /* 190 */ - OP_FILENO, /* 191 */ - OP_UMASK, /* 192 */ - OP_BINMODE, /* 193 */ - OP_TIE, /* 194 */ - OP_UNTIE, /* 195 */ - OP_TIED, /* 196 */ - OP_DBMOPEN, /* 197 */ - OP_DBMCLOSE, /* 198 */ - OP_SSELECT, /* 199 */ - OP_SELECT, /* 200 */ - OP_GETC, /* 201 */ - OP_READ, /* 202 */ - OP_ENTERWRITE, /* 203 */ - OP_LEAVEWRITE, /* 204 */ - OP_PRTF, /* 205 */ - OP_PRINT, /* 206 */ - OP_SYSOPEN, /* 207 */ - OP_SYSSEEK, /* 208 */ - OP_SYSREAD, /* 209 */ - OP_SYSWRITE, /* 210 */ - OP_SEND, /* 211 */ - OP_RECV, /* 212 */ - OP_EOF, /* 213 */ - OP_TELL, /* 214 */ - OP_SEEK, /* 215 */ - OP_TRUNCATE, /* 216 */ - OP_FCNTL, /* 217 */ - OP_IOCTL, /* 218 */ - OP_FLOCK, /* 219 */ - OP_SOCKET, /* 220 */ - OP_SOCKPAIR, /* 221 */ - OP_BIND, /* 222 */ - OP_CONNECT, /* 223 */ - OP_LISTEN, /* 224 */ - OP_ACCEPT, /* 225 */ - OP_SHUTDOWN, /* 226 */ - OP_GSOCKOPT, /* 227 */ - OP_SSOCKOPT, /* 228 */ - OP_GETSOCKNAME, /* 229 */ - OP_GETPEERNAME, /* 230 */ - OP_LSTAT, /* 231 */ - OP_STAT, /* 232 */ - OP_FTRREAD, /* 233 */ - OP_FTRWRITE, /* 234 */ - OP_FTREXEC, /* 235 */ - OP_FTEREAD, /* 236 */ - OP_FTEWRITE, /* 237 */ - OP_FTEEXEC, /* 238 */ - OP_FTIS, /* 239 */ - OP_FTEOWNED, /* 240 */ - OP_FTROWNED, /* 241 */ - OP_FTZERO, /* 242 */ - OP_FTSIZE, /* 243 */ - OP_FTMTIME, /* 244 */ - OP_FTATIME, /* 245 */ - OP_FTCTIME, /* 246 */ - OP_FTSOCK, /* 247 */ - OP_FTCHR, /* 248 */ - OP_FTBLK, /* 249 */ - OP_FTFILE, /* 250 */ - OP_FTDIR, /* 251 */ - OP_FTPIPE, /* 252 */ - OP_FTLINK, /* 253 */ - OP_FTSUID, /* 254 */ - OP_FTSGID, /* 255 */ - OP_FTSVTX, /* 256 */ - OP_FTTTY, /* 257 */ - OP_FTTEXT, /* 258 */ - OP_FTBINARY, /* 259 */ - OP_CHDIR, /* 260 */ - OP_CHOWN, /* 261 */ - OP_CHROOT, /* 262 */ - OP_UNLINK, /* 263 */ - OP_CHMOD, /* 264 */ - OP_UTIME, /* 265 */ - OP_RENAME, /* 266 */ - OP_LINK, /* 267 */ - OP_SYMLINK, /* 268 */ - OP_READLINK, /* 269 */ - OP_MKDIR, /* 270 */ - OP_RMDIR, /* 271 */ - OP_OPEN_DIR, /* 272 */ - OP_READDIR, /* 273 */ - OP_TELLDIR, /* 274 */ - OP_SEEKDIR, /* 275 */ - OP_REWINDDIR, /* 276 */ - OP_CLOSEDIR, /* 277 */ - OP_FORK, /* 278 */ - OP_WAIT, /* 279 */ - OP_WAITPID, /* 280 */ - OP_SYSTEM, /* 281 */ - OP_EXEC, /* 282 */ - OP_KILL, /* 283 */ - OP_GETPPID, /* 284 */ - OP_GETPGRP, /* 285 */ - OP_SETPGRP, /* 286 */ - OP_GETPRIORITY, /* 287 */ - OP_SETPRIORITY, /* 288 */ - OP_TIME, /* 289 */ - OP_TMS, /* 290 */ - OP_LOCALTIME, /* 291 */ - OP_GMTIME, /* 292 */ - OP_ALARM, /* 293 */ - OP_SLEEP, /* 294 */ - OP_SHMGET, /* 295 */ - OP_SHMCTL, /* 296 */ - OP_SHMREAD, /* 297 */ - OP_SHMWRITE, /* 298 */ - OP_MSGGET, /* 299 */ - OP_MSGCTL, /* 300 */ - OP_MSGSND, /* 301 */ - OP_MSGRCV, /* 302 */ - OP_SEMGET, /* 303 */ - OP_SEMCTL, /* 304 */ - OP_SEMOP, /* 305 */ - OP_REQUIRE, /* 306 */ - OP_DOFILE, /* 307 */ - OP_ENTEREVAL, /* 308 */ - OP_LEAVEEVAL, /* 309 */ - OP_ENTERTRY, /* 310 */ - OP_LEAVETRY, /* 311 */ - OP_GHBYNAME, /* 312 */ - OP_GHBYADDR, /* 313 */ - OP_GHOSTENT, /* 314 */ - OP_GNBYNAME, /* 315 */ - OP_GNBYADDR, /* 316 */ - OP_GNETENT, /* 317 */ - OP_GPBYNAME, /* 318 */ - OP_GPBYNUMBER, /* 319 */ - OP_GPROTOENT, /* 320 */ - OP_GSBYNAME, /* 321 */ - OP_GSBYPORT, /* 322 */ - OP_GSERVENT, /* 323 */ - OP_SHOSTENT, /* 324 */ - OP_SNETENT, /* 325 */ - OP_SPROTOENT, /* 326 */ - OP_SSERVENT, /* 327 */ - OP_EHOSTENT, /* 328 */ - OP_ENETENT, /* 329 */ - OP_EPROTOENT, /* 330 */ - OP_ESERVENT, /* 331 */ - OP_GPWNAM, /* 332 */ - OP_GPWUID, /* 333 */ - OP_GPWENT, /* 334 */ - OP_SPWENT, /* 335 */ - OP_EPWENT, /* 336 */ - OP_GGRNAM, /* 337 */ - OP_GGRGID, /* 338 */ - OP_GGRENT, /* 339 */ - OP_SGRENT, /* 340 */ - OP_EGRENT, /* 341 */ - OP_GETLOGIN, /* 342 */ - OP_SYSCALL, /* 343 */ - OP_LOCK, /* 344 */ - OP_THREADSV, /* 345 */ + OP_REGCRESET, /* 29 */ + OP_REGCOMP, /* 30 */ + OP_MATCH, /* 31 */ + OP_SUBST, /* 32 */ + OP_SUBSTCONT, /* 33 */ + OP_TRANS, /* 34 */ + OP_SASSIGN, /* 35 */ + OP_AASSIGN, /* 36 */ + OP_CHOP, /* 37 */ + OP_SCHOP, /* 38 */ + OP_CHOMP, /* 39 */ + OP_SCHOMP, /* 40 */ + OP_DEFINED, /* 41 */ + OP_UNDEF, /* 42 */ + OP_STUDY, /* 43 */ + OP_POS, /* 44 */ + OP_PREINC, /* 45 */ + OP_I_PREINC, /* 46 */ + OP_PREDEC, /* 47 */ + OP_I_PREDEC, /* 48 */ + OP_POSTINC, /* 49 */ + OP_I_POSTINC, /* 50 */ + OP_POSTDEC, /* 51 */ + OP_I_POSTDEC, /* 52 */ + OP_POW, /* 53 */ + OP_MULTIPLY, /* 54 */ + OP_I_MULTIPLY, /* 55 */ + OP_DIVIDE, /* 56 */ + OP_I_DIVIDE, /* 57 */ + OP_MODULO, /* 58 */ + OP_I_MODULO, /* 59 */ + OP_REPEAT, /* 60 */ + OP_ADD, /* 61 */ + OP_I_ADD, /* 62 */ + OP_SUBTRACT, /* 63 */ + OP_I_SUBTRACT, /* 64 */ + OP_CONCAT, /* 65 */ + OP_STRINGIFY, /* 66 */ + OP_LEFT_SHIFT, /* 67 */ + OP_RIGHT_SHIFT, /* 68 */ + OP_LT, /* 69 */ + OP_I_LT, /* 70 */ + OP_GT, /* 71 */ + OP_I_GT, /* 72 */ + OP_LE, /* 73 */ + OP_I_LE, /* 74 */ + OP_GE, /* 75 */ + OP_I_GE, /* 76 */ + OP_EQ, /* 77 */ + OP_I_EQ, /* 78 */ + OP_NE, /* 79 */ + OP_I_NE, /* 80 */ + OP_NCMP, /* 81 */ + OP_I_NCMP, /* 82 */ + OP_SLT, /* 83 */ + OP_SGT, /* 84 */ + OP_SLE, /* 85 */ + OP_SGE, /* 86 */ + OP_SEQ, /* 87 */ + OP_SNE, /* 88 */ + OP_SCMP, /* 89 */ + OP_BIT_AND, /* 90 */ + OP_BIT_XOR, /* 91 */ + OP_BIT_OR, /* 92 */ + OP_NEGATE, /* 93 */ + OP_I_NEGATE, /* 94 */ + OP_NOT, /* 95 */ + OP_COMPLEMENT, /* 96 */ + OP_ATAN2, /* 97 */ + OP_SIN, /* 98 */ + OP_COS, /* 99 */ + OP_RAND, /* 100 */ + OP_SRAND, /* 101 */ + OP_EXP, /* 102 */ + OP_LOG, /* 103 */ + OP_SQRT, /* 104 */ + OP_INT, /* 105 */ + OP_HEX, /* 106 */ + OP_OCT, /* 107 */ + OP_ABS, /* 108 */ + OP_LENGTH, /* 109 */ + OP_SUBSTR, /* 110 */ + OP_VEC, /* 111 */ + OP_INDEX, /* 112 */ + OP_RINDEX, /* 113 */ + OP_SPRINTF, /* 114 */ + OP_FORMLINE, /* 115 */ + OP_ORD, /* 116 */ + OP_CHR, /* 117 */ + OP_CRYPT, /* 118 */ + OP_UCFIRST, /* 119 */ + OP_LCFIRST, /* 120 */ + OP_UC, /* 121 */ + OP_LC, /* 122 */ + OP_QUOTEMETA, /* 123 */ + OP_RV2AV, /* 124 */ + OP_AELEMFAST, /* 125 */ + OP_AELEM, /* 126 */ + OP_ASLICE, /* 127 */ + OP_EACH, /* 128 */ + OP_VALUES, /* 129 */ + OP_KEYS, /* 130 */ + OP_DELETE, /* 131 */ + OP_EXISTS, /* 132 */ + OP_RV2HV, /* 133 */ + OP_HELEM, /* 134 */ + OP_HSLICE, /* 135 */ + OP_UNPACK, /* 136 */ + OP_PACK, /* 137 */ + OP_SPLIT, /* 138 */ + OP_JOIN, /* 139 */ + OP_LIST, /* 140 */ + OP_LSLICE, /* 141 */ + OP_ANONLIST, /* 142 */ + OP_ANONHASH, /* 143 */ + OP_SPLICE, /* 144 */ + OP_PUSH, /* 145 */ + OP_POP, /* 146 */ + OP_SHIFT, /* 147 */ + OP_UNSHIFT, /* 148 */ + OP_SORT, /* 149 */ + OP_REVERSE, /* 150 */ + OP_GREPSTART, /* 151 */ + OP_GREPWHILE, /* 152 */ + OP_MAPSTART, /* 153 */ + OP_MAPWHILE, /* 154 */ + OP_RANGE, /* 155 */ + OP_FLIP, /* 156 */ + OP_FLOP, /* 157 */ + OP_AND, /* 158 */ + OP_OR, /* 159 */ + OP_XOR, /* 160 */ + OP_COND_EXPR, /* 161 */ + OP_ANDASSIGN, /* 162 */ + OP_ORASSIGN, /* 163 */ + OP_METHOD, /* 164 */ + OP_ENTERSUB, /* 165 */ + OP_LEAVESUB, /* 166 */ + OP_CALLER, /* 167 */ + OP_WARN, /* 168 */ + OP_DIE, /* 169 */ + OP_RESET, /* 170 */ + OP_LINESEQ, /* 171 */ + OP_NEXTSTATE, /* 172 */ + OP_DBSTATE, /* 173 */ + OP_UNSTACK, /* 174 */ + OP_ENTER, /* 175 */ + OP_LEAVE, /* 176 */ + OP_SCOPE, /* 177 */ + OP_ENTERITER, /* 178 */ + OP_ITER, /* 179 */ + OP_ENTERLOOP, /* 180 */ + OP_LEAVELOOP, /* 181 */ + OP_RETURN, /* 182 */ + OP_LAST, /* 183 */ + OP_NEXT, /* 184 */ + OP_REDO, /* 185 */ + OP_DUMP, /* 186 */ + OP_GOTO, /* 187 */ + OP_EXIT, /* 188 */ + OP_OPEN, /* 189 */ + OP_CLOSE, /* 190 */ + OP_PIPE_OP, /* 191 */ + OP_FILENO, /* 192 */ + OP_UMASK, /* 193 */ + OP_BINMODE, /* 194 */ + OP_TIE, /* 195 */ + OP_UNTIE, /* 196 */ + OP_TIED, /* 197 */ + OP_DBMOPEN, /* 198 */ + OP_DBMCLOSE, /* 199 */ + OP_SSELECT, /* 200 */ + OP_SELECT, /* 201 */ + OP_GETC, /* 202 */ + OP_READ, /* 203 */ + OP_ENTERWRITE, /* 204 */ + OP_LEAVEWRITE, /* 205 */ + OP_PRTF, /* 206 */ + OP_PRINT, /* 207 */ + OP_SYSOPEN, /* 208 */ + OP_SYSSEEK, /* 209 */ + OP_SYSREAD, /* 210 */ + OP_SYSWRITE, /* 211 */ + OP_SEND, /* 212 */ + OP_RECV, /* 213 */ + OP_EOF, /* 214 */ + OP_TELL, /* 215 */ + OP_SEEK, /* 216 */ + OP_TRUNCATE, /* 217 */ + OP_FCNTL, /* 218 */ + OP_IOCTL, /* 219 */ + OP_FLOCK, /* 220 */ + OP_SOCKET, /* 221 */ + OP_SOCKPAIR, /* 222 */ + OP_BIND, /* 223 */ + OP_CONNECT, /* 224 */ + OP_LISTEN, /* 225 */ + OP_ACCEPT, /* 226 */ + OP_SHUTDOWN, /* 227 */ + OP_GSOCKOPT, /* 228 */ + OP_SSOCKOPT, /* 229 */ + OP_GETSOCKNAME, /* 230 */ + OP_GETPEERNAME, /* 231 */ + OP_LSTAT, /* 232 */ + OP_STAT, /* 233 */ + OP_FTRREAD, /* 234 */ + OP_FTRWRITE, /* 235 */ + OP_FTREXEC, /* 236 */ + OP_FTEREAD, /* 237 */ + OP_FTEWRITE, /* 238 */ + OP_FTEEXEC, /* 239 */ + OP_FTIS, /* 240 */ + OP_FTEOWNED, /* 241 */ + OP_FTROWNED, /* 242 */ + OP_FTZERO, /* 243 */ + OP_FTSIZE, /* 244 */ + OP_FTMTIME, /* 245 */ + OP_FTATIME, /* 246 */ + OP_FTCTIME, /* 247 */ + OP_FTSOCK, /* 248 */ + OP_FTCHR, /* 249 */ + OP_FTBLK, /* 250 */ + OP_FTFILE, /* 251 */ + OP_FTDIR, /* 252 */ + OP_FTPIPE, /* 253 */ + OP_FTLINK, /* 254 */ + OP_FTSUID, /* 255 */ + OP_FTSGID, /* 256 */ + OP_FTSVTX, /* 257 */ + OP_FTTTY, /* 258 */ + OP_FTTEXT, /* 259 */ + OP_FTBINARY, /* 260 */ + OP_CHDIR, /* 261 */ + OP_CHOWN, /* 262 */ + OP_CHROOT, /* 263 */ + OP_UNLINK, /* 264 */ + OP_CHMOD, /* 265 */ + OP_UTIME, /* 266 */ + OP_RENAME, /* 267 */ + OP_LINK, /* 268 */ + OP_SYMLINK, /* 269 */ + OP_READLINK, /* 270 */ + OP_MKDIR, /* 271 */ + OP_RMDIR, /* 272 */ + OP_OPEN_DIR, /* 273 */ + OP_READDIR, /* 274 */ + OP_TELLDIR, /* 275 */ + OP_SEEKDIR, /* 276 */ + OP_REWINDDIR, /* 277 */ + OP_CLOSEDIR, /* 278 */ + OP_FORK, /* 279 */ + OP_WAIT, /* 280 */ + OP_WAITPID, /* 281 */ + OP_SYSTEM, /* 282 */ + OP_EXEC, /* 283 */ + OP_KILL, /* 284 */ + OP_GETPPID, /* 285 */ + OP_GETPGRP, /* 286 */ + OP_SETPGRP, /* 287 */ + OP_GETPRIORITY, /* 288 */ + OP_SETPRIORITY, /* 289 */ + OP_TIME, /* 290 */ + OP_TMS, /* 291 */ + OP_LOCALTIME, /* 292 */ + OP_GMTIME, /* 293 */ + OP_ALARM, /* 294 */ + OP_SLEEP, /* 295 */ + OP_SHMGET, /* 296 */ + OP_SHMCTL, /* 297 */ + OP_SHMREAD, /* 298 */ + OP_SHMWRITE, /* 299 */ + OP_MSGGET, /* 300 */ + OP_MSGCTL, /* 301 */ + OP_MSGSND, /* 302 */ + OP_MSGRCV, /* 303 */ + OP_SEMGET, /* 304 */ + OP_SEMCTL, /* 305 */ + OP_SEMOP, /* 306 */ + OP_REQUIRE, /* 307 */ + OP_DOFILE, /* 308 */ + OP_ENTEREVAL, /* 309 */ + OP_LEAVEEVAL, /* 310 */ + OP_ENTERTRY, /* 311 */ + OP_LEAVETRY, /* 312 */ + OP_GHBYNAME, /* 313 */ + OP_GHBYADDR, /* 314 */ + OP_GHOSTENT, /* 315 */ + OP_GNBYNAME, /* 316 */ + OP_GNBYADDR, /* 317 */ + OP_GNETENT, /* 318 */ + OP_GPBYNAME, /* 319 */ + OP_GPBYNUMBER, /* 320 */ + OP_GPROTOENT, /* 321 */ + OP_GSBYNAME, /* 322 */ + OP_GSBYPORT, /* 323 */ + OP_GSERVENT, /* 324 */ + OP_SHOSTENT, /* 325 */ + OP_SNETENT, /* 326 */ + OP_SPROTOENT, /* 327 */ + OP_SSERVENT, /* 328 */ + OP_EHOSTENT, /* 329 */ + OP_ENETENT, /* 330 */ + OP_EPROTOENT, /* 331 */ + OP_ESERVENT, /* 332 */ + OP_GPWNAM, /* 333 */ + OP_GPWUID, /* 334 */ + OP_GPWENT, /* 335 */ + OP_SPWENT, /* 336 */ + OP_EPWENT, /* 337 */ + OP_GGRNAM, /* 338 */ + OP_GGRGID, /* 339 */ + OP_GGRENT, /* 340 */ + OP_SGRENT, /* 341 */ + OP_EGRENT, /* 342 */ + OP_GETLOGIN, /* 343 */ + OP_SYSCALL, /* 344 */ + OP_LOCK, /* 345 */ + OP_THREADSV, /* 346 */ OP_max } opcode; -#define MAXO 346 +#define MAXO 347 #ifndef DOINIT EXT char *op_name[]; @@ -388,6 +389,7 @@ EXT char *op_name[] = { "readline", "rcatline", "regcmaybe", + "regcreset", "regcomp", "match", "subst", @@ -741,6 +743,7 @@ EXT char *op_desc[] = { "", "append I/O operator", "regexp comp once", + "regexp reset interpolation flag", "regexp compilation", "pattern match", "substitution", @@ -1126,6 +1129,7 @@ OP * pp_glob _((ARGSproto)); OP * pp_readline _((ARGSproto)); OP * pp_rcatline _((ARGSproto)); OP * pp_regcmaybe _((ARGSproto)); +OP * pp_regcreset _((ARGSproto)); OP * pp_regcomp _((ARGSproto)); OP * pp_match _((ARGSproto)); OP * pp_subst _((ARGSproto)); @@ -1481,6 +1485,7 @@ EXT OP * (CPERLscope(*ppaddr)[])(ARGSproto) = { pp_readline, pp_rcatline, pp_regcmaybe, + pp_regcreset, pp_regcomp, pp_match, pp_subst, @@ -1836,6 +1841,7 @@ EXT OP * (CPERLscope(*check)[]) _((OP *op)) = { ck_null, /* readline */ ck_null, /* rcatline */ ck_fun, /* regcmaybe */ + ck_fun, /* regcreset */ ck_null, /* regcomp */ ck_match, /* match */ ck_null, /* subst */ @@ -2190,6 +2196,7 @@ EXT U32 opargs[] = { 0x00000c08, /* readline */ 0x00000c08, /* rcatline */ 0x00001104, /* regcmaybe */ + 0x00001104, /* regcreset */ 0x00001304, /* regcomp */ 0x00000640, /* match */ 0x00001654, /* subst */ diff --git a/opcode.pl b/opcode.pl index fe6e202..f7e4f76 100755 --- a/opcode.pl +++ b/opcode.pl @@ -209,6 +209,7 @@ END close OC or die "Error closing opcode.h: $!"; +unlink "pp_proto.h"; open PP, '>pp_proto.h' or die "Error creating pp_proto.h: $!"; for (@ops) { next if /^i_(pre|post)(inc|dec)$/; @@ -273,6 +274,7 @@ rcatline append I/O operator ck_null t% # Bindable operators. regcmaybe regexp comp once ck_fun s1 S +regcreset regexp reset interpolation flag ck_fun s1 S regcomp regexp compilation ck_null s| S match pattern match ck_match d/ subst substitution ck_null dis/ S diff --git a/pp_ctl.c b/pp_ctl.c index 88ec213..672e0e2 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -67,6 +67,14 @@ PP(pp_regcmaybe) return NORMAL; } +PP(pp_regcreset) +{ + /* XXXX Should store the old value to allow for tie/overload - and + restore in regcomp, where marked with XXXX. */ + reginterp_cnt = 0; + return NORMAL; +} + PP(pp_regcomp) { djSP; @@ -99,9 +107,13 @@ PP(pp_regcomp) ReREFCNT_dec(pm->op_pmregexp); pm->op_pmregexp = Null(REGEXP*); /* crucial if regcomp aborts */ } + if (op->op_flags & OPf_SPECIAL) + reginterp_cnt = I32_MAX; /* Mark as safe. */ pm->op_pmflags = pm->op_pmpermflags; /* reset case sensitivity */ pm->op_pmregexp = CALLREGCOMP(t, t + len, pm); + reginterp_cnt = 0; /* XXXX Be extra paranoid - needed + inside tie/overload accessors. */ } } diff --git a/pp_proto.h b/pp_proto.h index ec98f02..fedb0dd 100644 --- a/pp_proto.h +++ b/pp_proto.h @@ -27,6 +27,7 @@ PPDEF(pp_glob) PPDEF(pp_readline) PPDEF(pp_rcatline) PPDEF(pp_regcmaybe) +PPDEF(pp_regcreset) PPDEF(pp_regcomp) PPDEF(pp_match) PPDEF(pp_subst) diff --git a/regcomp.c b/regcomp.c index 420d2fb..3dd95b8 100644 --- a/regcomp.c +++ b/regcomp.c @@ -720,6 +720,7 @@ pregcomp(char *exp, char *xend, PMOP *pm) regseen = 0; seen_zerolen = *exp == '^' ? -1 : 0; + seen_evals = 0; extralen = 0; /* First pass: determine size, legality. */ @@ -787,6 +788,8 @@ pregcomp(char *exp, char *xend, PMOP *pm) regnaughty = 0; regnpar = 1; regcode = r->program; + /* Store the count of eval-groups for security checks: */ + regcode->next_off = ((seen_evals > U16_MAX) ? U16_MAX : seen_evals); regc((U8)MAGIC, (char*) regcode++); r->data = 0; if (reg(0, &flags) == NULL) @@ -1059,13 +1062,10 @@ reg(I32 paren, I32 *flagp) regcomp_rx->data->data[n+2] = (void*)sop; SvREFCNT_dec(sv); } else { /* First pass */ - if (curcop == &compiling) { - if (!(hints & HINT_RE_EVAL)) - FAIL("Eval-group not allowed, use re 'eval'"); - } - else { - FAIL("Eval-group not allowed at run time"); - } + if (reginterp_cnt < ++seen_evals && curcop != &compiling) + /* No compiled RE interpolated, has runtime + components ===> unsafe. */ + FAIL("Eval-group not allowed at runtime, use re 'eval'"); if (tainted) FAIL("Eval-group in insecure regular expression"); } diff --git a/sv.c b/sv.c index b462127..3fcc2da 100644 --- a/sv.c +++ b/sv.c @@ -1706,9 +1706,9 @@ sv_2pv(register SV *sv, STRLEN *lp) == (SVs_OBJECT|SVs_RMG)) && strEQ(s=HvNAME(SvSTASH(sv)), "Regexp") && (mg = mg_find(sv, 'r'))) { - if (!mg->mg_ptr) { - regexp *re = (regexp *)mg->mg_obj; + regexp *re = (regexp *)mg->mg_obj; + if (!mg->mg_ptr) { mg->mg_len = re->prelen + 4; New(616, mg->mg_ptr, mg->mg_len + 1, char); Copy("(?:", mg->mg_ptr, 3, char); @@ -1716,6 +1716,7 @@ sv_2pv(register SV *sv, STRLEN *lp) mg->mg_ptr[mg->mg_len - 1] = ')'; mg->mg_ptr[mg->mg_len] = 0; } + reginterp_cnt += re->program[0].next_off; *lp = mg->mg_len; return mg->mg_ptr; } diff --git a/t/op/misc.t b/t/op/misc.t index d544df4..449d87c 100755 --- a/t/op/misc.t +++ b/t/op/misc.t @@ -336,18 +336,16 @@ sub foo { local $_ = shift; split; @_ } @x = foo(' x y z '); print "you die joe!\n" unless "@x" eq 'x y z'; ######## -use re 'eval'; /(?{"{"})/ # Check it outside of eval too EXPECT -Sequence (?{...}) not terminated or not {}-balanced at - line 2, within pattern -/(?{"{"})/: Sequence (?{...}) not terminated or not {}-balanced at - line 2. +Sequence (?{...}) not terminated or not {}-balanced at - line 1, within pattern +/(?{"{"})/: Sequence (?{...}) not terminated or not {}-balanced at - line 1. ######## -use re 'eval'; /(?{"{"}})/ # Check it outside of eval too EXPECT Unmatched right bracket at (re_eval 1) line 1, at end of line syntax error at (re_eval 1) line 1, near ""{"}" -Compilation failed in regexp at - line 2. +Compilation failed in regexp at - line 1. ######## BEGIN { @ARGV = qw(a b c) } BEGIN { print "argv <@ARGV>\nbegin <",shift,">\n" } diff --git a/t/op/pat.t b/t/op/pat.t index a4335b0..90623fb 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -6,14 +6,13 @@ # $RCSfile: pat.t,v $$Revision: 4.1 $$Date: 92/08/07 18:28:12 $ -print "1..124\n"; +print "1..130\n"; BEGIN { chdir 't' if -d 't'; @INC = "../lib" if -d "../lib"; } eval 'use Config'; # Defaults assumed if this fails -use re 'eval'; $x = "abc\ndef\n"; @@ -382,11 +381,23 @@ $test++; $code = '{$blah = 45}'; $blah = 12; -eval { /(?$code)/ }; -print "not " unless $@ and $@ =~ /not allowed at run time/ and $blah == 12; +eval { /(?$code)/ }; +print "not " unless $@ and $@ =~ /not allowed at runtime/ and $blah == 12; print "ok $test\n"; $test++; +for $code ('{$blah = 45}','=xx') { + $blah = 12; + $res = eval { "xx" =~ /(?$code)/o }; + if ($code eq '=xx') { + print "#'$@','$res','$blah'\nnot " unless not $@ and $res; + } else { + print "#'$@','$res','$blah'\nnot " unless $@ and $@ =~ /not allowed at runtime/ and $blah == 12; + } + print "ok $test\n"; + $test++; +} + $code = '{$blah = 45}'; $blah = 12; eval "/(?$code)/"; @@ -476,6 +487,34 @@ print "not " unless $1 and /$1/; print "ok $test\n"; $test++; +$a=study/(?{++$b})/; +$b = 7; +/$a$a/; +print "not " unless $b eq '9'; +print "ok $test\n"; +$test++; + +$c="$a"; +/$a$a/; +print "not " unless $b eq '11'; +print "ok $test\n"; +$test++; + +{ + use re "eval"; + /$a$c$a/; + print "not " unless $b eq '14'; + print "ok $test\n"; + $test++; + + no re "eval"; + $match = eval { /$a$c$a/ }; + print "not " + unless $b eq '14' and $@ =~ /Eval-group not allowed/ and not $match; + print "ok $test\n"; + $test++; +} + sub must_warn_pat { my $warn_pat = shift; return sub { print "not " unless $_[0] =~ /$warn_pat/ } diff --git a/t/op/subst.t b/t/op/subst.t index 1323b2d..57a956d 100755 --- a/t/op/subst.t +++ b/t/op/subst.t @@ -280,7 +280,6 @@ $_ = <<'EOL'; EOL $^R = 'junk'; -use re 'eval'; $foo = ' $@%#lowercase $@%# lowercase UPPERCASE$@%#UPPERCASE' . ' $@%#lowercase$@%#lowercase$@%# lowercase lowercase $@%#lowercase' . ' lowercase $@%#MiXeD$@%# '; -- 2.7.4