Make the "Switch condition not recognized" error message UTF-8 clean

author Brian Fraser <fraserbn@gmail.com>

Thu, 29 Aug 2013 14:18:55 +0000 (11:18 -0300)

committer Father Chrysostomos <sprout@cpan.org>

Tue, 10 Sep 2013 15:36:13 +0000 (08:36 -0700)
author Brian Fraser <fraserbn@gmail.com>
Thu, 29 Aug 2013 14:18:55 +0000 (11:18 -0300)
committer Father Chrysostomos <sprout@cpan.org>
Tue, 10 Sep 2013 15:36:13 +0000 (08:36 -0700)
diff --git a/regcomp.c b/regcomp.c

index 061d814..9da2849 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -9240,6 +9240,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                 else if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
                      /* (?(1)...) */
                     char c;
+                   char *tmp;
                     parno = atoi(RExC_parse++);
  
                     while (isDIGIT(*RExC_parse))
@@ -9247,8 +9248,17 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                      ret = reganode(pRExC_state, GROUPP, parno);
  
                   insert_if_check_paren:
-                   if ((c = *nextchar(pRExC_state)) != ')')
+                   if (*(tmp = nextchar(pRExC_state)) != ')') {
+                        if ( UTF ) {
+                        /* Like the name implies, nextchar deals in chars,
+                         * not characters, so if under UTF, undo its work
+                         * and skip over the the next character.
+                         */
+                           RExC_parse = tmp;
+                           RExC_parse += UTF8SKIP(RExC_parse);
+                       }
                         vFAIL("Switch condition not recognized");
+                   }
                   insert_if:
                      REGTAIL(pRExC_state, ret, reganode(pRExC_state, IFTHEN, 0));
                      br = regbranch(pRExC_state, &flags, 1,depth+1);
diff --git a/t/re/reg_mesg.t b/t/re/reg_mesg.t

index 573d661..0835ce7 100644 (file)
--- a/t/re/reg_mesg.t
+++ b/t/re/reg_mesg.t
@@ -223,9 +223,7 @@ my @death_utf8 = mark_as_utf8(
  
   '/ネ(?ネ)ネ/' => 'Sequence (?ネ...) not recognized {#} m/ネ(?ネ{#})ネ/',
  
-# TODO S_nextchar() deals in chars, not in characters, which leaves
-# this broken.
-# '/ネ(?(1ネ))ネ/' => 'Switch condition not recognized {#} m/ネ(?(1ネ{#}))ネ/',
+ '/ネ(?(1ネ))ネ/' => 'Switch condition not recognized {#} m/ネ(?(1ネ{#}))ネ/',
  
   '/(?(1)ネ|y|ヌ)/' => 'Switch (?(condition)... contains too many branches {#} m/(?(1)ネ|y|{#}ヌ)/',
author	Brian Fraser <fraserbn@gmail.com>
	Thu, 29 Aug 2013 14:18:55 +0000 (11:18 -0300)
committer	Father Chrysostomos <sprout@cpan.org>
	Tue, 10 Sep 2013 15:36:13 +0000 (08:36 -0700)
regcomp.c		patch \| blob \| history
t/re/reg_mesg.t		patch \| blob \| history