From f9176b44e50593d8f3446da63d3989558f6d4c20 Mon Sep 17 00:00:00 2001
From: David Mitchell <davem@iabyn.com>
Date: Sat, 18 May 2013 13:25:36 +0100
Subject: [PATCH] find_byclass, regrepeat: remove is_utf8_pat arg

Remove the is_utf8_pat arg from these two static functions in regexec.c.
Since both these functions are now passed a valid reginfo pointer, this
info is already available as one of the fields in that struct.
---
 embed.fnc |  6 ++----
 embed.h   |  4 ++--
 proto.h   |  4 ++--
 regexec.c | 35 ++++++++++++++++++-----------------
 4 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/embed.fnc b/embed.fnc
index ed32623..b8f85a4 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -2072,8 +2072,7 @@ ERs	|I32	|regrepeat	|NN regexp *prog|NN char **startposp \
 				|NN const regnode *p \
 				|NN regmatch_info *const reginfo \
 				|I32 max \
-				|int depth \
-				|bool is_utf8_pat
+				|int depth
 ERs	|I32	|regtry		|NN regmatch_info *reginfo|NN char **startposp
 ERs	|bool	|reginclass	|NULLOK regexp * const prog|NN const regnode * const n|NN const U8 * const p\
 				|bool const utf8_target
@@ -2092,8 +2091,7 @@ ERsn	|U8*	|reghop4	|NN U8 *s|I32 off|NN const U8 *llim \
 ERsn	|U8*	|reghopmaybe3	|NN U8 *s|I32 off|NN const U8 *lim
 ERs	|char*	|find_byclass	|NN regexp * prog|NN const regnode *c \
 				|NN char *s|NN const char *strend \
-				|NULLOK regmatch_info *reginfo \
-				|bool is_utf_pat
+				|NULLOK regmatch_info *reginfo
 Es	|void	|to_utf8_substr	|NN regexp * prog
 Es	|bool	|to_byte_substr	|NN regexp * prog
 ERs	|I32	|reg_check_named_buff_matched	|NN const regexp *rex \
diff --git a/embed.h b/embed.h
index 850d4c3..ff43e10 100644
--- a/embed.h
+++ b/embed.h
@@ -976,7 +976,7 @@
 #  endif
 #  if defined(PERL_IN_REGEXEC_C)
 #define core_regclass_swash(a,b,c,d)	S_core_regclass_swash(aTHX_ a,b,c,d)
-#define find_byclass(a,b,c,d,e,f)	S_find_byclass(aTHX_ a,b,c,d,e,f)
+#define find_byclass(a,b,c,d,e)	S_find_byclass(aTHX_ a,b,c,d,e)
 #define isFOO_lc(a,b)		S_isFOO_lc(aTHX_ a,b)
 #define isFOO_utf8_lc(a,b)	S_isFOO_utf8_lc(aTHX_ a,b)
 #define reg_check_named_buff_matched(a,b)	S_reg_check_named_buff_matched(aTHX_ a,b)
@@ -986,7 +986,7 @@
 #define reghopmaybe3		S_reghopmaybe3
 #define reginclass(a,b,c,d)	S_reginclass(aTHX_ a,b,c,d)
 #define regmatch(a,b,c)		S_regmatch(aTHX_ a,b,c)
-#define regrepeat(a,b,c,d,e,f,g)	S_regrepeat(aTHX_ a,b,c,d,e,f,g)
+#define regrepeat(a,b,c,d,e,f)	S_regrepeat(aTHX_ a,b,c,d,e,f)
 #define regtry(a,b)		S_regtry(aTHX_ a,b)
 #define to_byte_substr(a)	S_to_byte_substr(aTHX_ a)
 #define to_utf8_substr(a)	S_to_utf8_substr(aTHX_ a)
diff --git a/proto.h b/proto.h
index 714e412..806b56c 100644
--- a/proto.h
+++ b/proto.h
@@ -6891,7 +6891,7 @@ STATIC SV*	S_core_regclass_swash(pTHX_ const regexp *prog, const struct regnode
 #define PERL_ARGS_ASSERT_CORE_REGCLASS_SWASH	\
 	assert(node)
 
-STATIC char*	S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *strend, regmatch_info *reginfo, bool is_utf_pat)
+STATIC char*	S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *strend, regmatch_info *reginfo)
 			__attribute__warn_unused_result__
 			__attribute__nonnull__(pTHX_1)
 			__attribute__nonnull__(pTHX_2)
@@ -6956,7 +6956,7 @@ STATIC I32	S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *pro
 #define PERL_ARGS_ASSERT_REGMATCH	\
 	assert(reginfo); assert(startpos); assert(prog)
 
-STATIC I32	S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p, regmatch_info *const reginfo, I32 max, int depth, bool is_utf8_pat)
+STATIC I32	S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p, regmatch_info *const reginfo, I32 max, int depth)
 			__attribute__warn_unused_result__
 			__attribute__nonnull__(pTHX_1)
 			__attribute__nonnull__(pTHX_2)
diff --git a/regexec.c b/regexec.c
index e7c129b..0b73c72 100644
--- a/regexec.c
+++ b/regexec.c
@@ -653,6 +653,7 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
         strbeg = strpos;
 
     reginfo->strend = strend;
+    reginfo->is_utf8_pat = is_utf8_pat;
     reginfo->intuit = 1;
 
     if (utf8_target) {
@@ -1129,7 +1130,7 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
 
 	t = s;
         s = find_byclass(prog, progi->regstclass, checked_upto, endpos,
-                            reginfo, is_utf8_pat);
+                            reginfo);
 	if (s) {
 	    checked_upto = s;
 	} else {
@@ -1437,7 +1438,7 @@ if ((reginfo->intuit || regtry(reginfo, &s))) \
 
 STATIC char *
 S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, 
-    const char *strend, regmatch_info *reginfo, bool is_utf8_pat)
+    const char *strend, regmatch_info *reginfo)
 {
     dVAR;
     const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
@@ -1453,6 +1454,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
     I32 tmp = 1;	/* Scratch variable? */
     const bool utf8_target = PL_reg_match_utf8;
     UV utf8_fold_flags = 0;
+    const bool is_utf8_pat = reginfo->is_utf8_pat;
     bool to_complement = FALSE; /* Invert the result?  Taking the xor of this
                                    with a result inverts that result, as 0^1 =
                                    1 and 1^1 = 0 */
@@ -2464,7 +2466,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
 		     quoted, (int)(strend - s));
 	    }
 	});
-        if (find_byclass(prog, c, s, strend, reginfo, reginfo->is_utf8_pat))
+        if (find_byclass(prog, c, s, strend, reginfo))
 	    goto got_it;
 	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
     }
@@ -5980,7 +5982,7 @@ NULL
                 char *li = locinput;
 		minmod = 0;
 		if (ST.min &&
-                        regrepeat(rex, &li, ST.A, reginfo, ST.min, depth, is_utf8_pat)
+                        regrepeat(rex, &li, ST.A, reginfo, ST.min, depth)
                             < ST.min)
 		    sayNO;
                 SET_locinput(li);
@@ -6017,8 +6019,7 @@ NULL
                 /* avoid taking address of locinput, so it can remain
                  * a register var */
                 char *li = locinput;
-		ST.count = regrepeat(rex, &li, ST.A, reginfo, ST.max, depth,
-                                        is_utf8_pat);
+		ST.count = regrepeat(rex, &li, ST.A, reginfo, ST.max, depth);
 		if (ST.count < ST.min)
 		    sayNO;
                 SET_locinput(li);
@@ -6102,7 +6103,7 @@ NULL
                      * locinput matches */
                     char *li = ST.oldloc;
 		    ST.count += n;
-		    if (regrepeat(rex, &li, ST.A, reginfo, n, depth, is_utf8_pat) < n)
+		    if (regrepeat(rex, &li, ST.A, reginfo, n, depth) < n)
 			sayNO;
                     assert(n == REG_INFTY || locinput == li);
 		}
@@ -6126,7 +6127,7 @@ NULL
 	    /* failed -- move forward one */
             {
                 char *li = locinput;
-                if (!regrepeat(rex, &li, ST.A, reginfo, 1, depth, is_utf8_pat)) {
+                if (!regrepeat(rex, &li, ST.A, reginfo, 1, depth)) {
                     sayNO;
                 }
                 locinput = li;
@@ -6655,7 +6656,7 @@ no_silent:
  */
 STATIC I32
 S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
-            regmatch_info *const reginfo, I32 max, int depth, bool is_utf8_pat)
+            regmatch_info *const reginfo, I32 max, int depth)
 {
     dVAR;
     char *scan;     /* Pointer to current position in target string */
@@ -6734,7 +6735,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
         }
 	break;
     case EXACT:
-        assert(STR_LEN(p) == is_utf8_pat ? UTF8SKIP(STRING(p)) : 1);
+        assert(STR_LEN(p) == reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1);
 
 	c = (U8)*STRING(p);
 
@@ -6742,7 +6743,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
          * under UTF-8, or both target and pattern aren't UTF-8.  Note that we
          * can use UTF8_IS_INVARIANT() even if the pattern isn't UTF-8, as it's
          * true iff it doesn't matter if the argument is in UTF-8 or not */
-        if (UTF8_IS_INVARIANT(c) || (! utf8_target && ! is_utf8_pat)) {
+        if (UTF8_IS_INVARIANT(c) || (! utf8_target && ! reginfo->is_utf8_pat)) {
             if (utf8_target && scan + max < loceol) {
                 /* We didn't adjust <loceol> because is UTF-8, but ok to do so,
                  * since here, to match at all, 1 char == 1 byte */
@@ -6752,7 +6753,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
 		scan++;
 	    }
 	}
-	else if (is_utf8_pat) {
+	else if (reginfo->is_utf8_pat) {
             if (utf8_target) {
                 STRLEN scan_char_len;
 
@@ -6814,25 +6815,25 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
     case EXACTFU_SS:
     case EXACTFU_TRICKYFOLD:
     case EXACTFU:
-	utf8_flags = is_utf8_pat ? FOLDEQ_S2_ALREADY_FOLDED : 0;
+	utf8_flags = reginfo->is_utf8_pat ? FOLDEQ_S2_ALREADY_FOLDED : 0;
 
     do_exactf: {
         int c1, c2;
         U8 c1_utf8[UTF8_MAXBYTES+1], c2_utf8[UTF8_MAXBYTES+1];
 
-        assert(STR_LEN(p) == is_utf8_pat ? UTF8SKIP(STRING(p)) : 1);
+        assert(STR_LEN(p) == reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1);
 
         if (S_setup_EXACTISH_ST_c1_c2(aTHX_ p, &c1, c1_utf8, &c2, c2_utf8,
-                                        is_utf8_pat))
+                                        reginfo->is_utf8_pat))
         {
             if (c1 == CHRTEST_VOID) {
                 /* Use full Unicode fold matching */
                 char *tmpeol = reginfo->strend;
-                STRLEN pat_len = is_utf8_pat ? UTF8SKIP(STRING(p)) : 1;
+                STRLEN pat_len = reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1;
                 while (hardcount < max
                         && foldEQ_utf8_flags(scan, &tmpeol, 0, utf8_target,
                                              STRING(p), NULL, pat_len,
-                                             is_utf8_pat, utf8_flags))
+                                             reginfo->is_utf8_pat, utf8_flags))
                 {
                     scan = tmpeol;
                     tmpeol = reginfo->strend;
-- 
2.7.4