Add \o{} escape

author Karl Williamson <khw@khw-desktop.(none)>

Thu, 15 Jul 2010 23:28:28 +0000 (17:28 -0600)

committer David Golden <dagolden@cpan.org>

Sun, 18 Jul 2010 01:50:48 +0000 (21:50 -0400)
author Karl Williamson <khw@khw-desktop.(none)>
Thu, 15 Jul 2010 23:28:28 +0000 (17:28 -0600)
committer David Golden <dagolden@cpan.org>
Sun, 18 Jul 2010 01:50:48 +0000 (21:50 -0400)
diff --git a/embed.fnc b/embed.fnc

index 8493dd7..37c7f2b 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -639,6 +639,7 @@ p   |OP*    |localize       |NN OP *o|I32 lex
  ApdR   |I32    |looks_like_number|NN SV *const sv
  Apd    |UV     |grok_bin       |NN const char* start|NN STRLEN* len_p|NN I32* flags|NULLOK NV *result
  EXpR   |char   |grok_bslash_c  |const char source|const bool output_warning
+EXpR   |char*  |grok_bslash_o  |NN const char* s|NN UV* uv|NN STRLEN* len|const bool output_warning
  Apd    |UV     |grok_hex       |NN const char* start|NN STRLEN* len_p|NN I32* flags|NULLOK NV *result
  Apd    |int    |grok_number    |NN const char *pv|STRLEN len|NULLOK UV *valuep
  ApdR   |bool   |grok_numeric_radix|NN const char **sp|NN const char *send
diff --git a/embed.h b/embed.h

index 8fb3cbe..fffdede 100644 (file)
--- a/embed.h
+++ b/embed.h
@@ -463,6 +463,7 @@
  #define grok_bin               Perl_grok_bin
  #if defined(PERL_CORE) || defined(PERL_EXT)
  #define grok_bslash_c          Perl_grok_bslash_c
+#define grok_bslash_o          Perl_grok_bslash_o
  #endif
  #define grok_hex               Perl_grok_hex
  #define grok_number            Perl_grok_number
@@ -2909,6 +2910,7 @@
  #define grok_bin(a,b,c,d)      Perl_grok_bin(aTHX_ a,b,c,d)
  #if defined(PERL_CORE) || defined(PERL_EXT)
  #define grok_bslash_c(a,b)     Perl_grok_bslash_c(aTHX_ a,b)
+#define grok_bslash_o(a,b,c,d) Perl_grok_bslash_o(aTHX_ a,b,c,d)
  #endif
  #define grok_hex(a,b,c,d)      Perl_grok_hex(aTHX_ a,b,c,d)
  #define grok_number(a,b,c)     Perl_grok_number(aTHX_ a,b,c)
diff --git a/global.sym b/global.sym

index aa61a69..3323815 100644 (file)
--- a/global.sym
+++ b/global.sym
@@ -283,6 +283,7 @@ Perl_vload_module
  Perl_looks_like_number
  Perl_grok_bin
  Perl_grok_bslash_c
+Perl_grok_bslash_o
  Perl_grok_hex
  Perl_grok_number
  Perl_grok_numeric_radix
diff --git a/pod/perl5133delta.pod b/pod/perl5133delta.pod

index 476427e..d4db338 100644 (file)
--- a/pod/perl5133delta.pod
+++ b/pod/perl5133delta.pod
@@ -28,6 +28,17 @@ here, but most should go in the L</Performance Enhancements> section.
  
  [ List each enhancement as a =head2 entry ]
  
+=head2 \o{...}
+
+The escape sequence C<"\o"> in double-quotish contexts is now defined.  It
+must be followed by braces enclosing an octal number of at least one digit.  It
+means the character whose ordinal value is that octal number.  This construct
+allows large octal ordinals beyond the current max of 0777 to be represented.
+It also allows you to specify a character in octal which can safely be
+concatenated with other regex snippets without danger of changing its meaning,
+and one which won't ever be confused with being a backreference to a regex
+capture group.  See L<perlre/Capture groups>
+
  =head2 C<\N{I<name>}> and C<charnames> enhancements
  
  C<\N{}> and C<charnames::vianame> now know about the abbreviated character
@@ -84,7 +95,9 @@ anomalous behavior than their use in all other double-quotish contexts.   Since
  all double-quotish contexts have the same behavior, namely to be equivalent to
  C<\x{100}> - C<\x{1FF}>, with no deprecation warning. Use of these values in the
  command line option C<"-0"> retains the current meaning to slurp input files
-whole; previously, this was documented only for C<"-0777">.
+whole; previously, this was documented only for C<"-0777">.  It is recommended,
+however, because of various ambiguities, to use the new L</\o{...}> construct
+to represent characters in octal.
  
  =head1 Deprecations
  
diff --git a/pod/perldiag.pod b/pod/perldiag.pod

index 9e8a287..9f9fe4b 100644 (file)
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -2510,6 +2510,10 @@ comment) between the C<\N> and the C<{> in a regex with the C</x> modifier.
  This modifier does not change the requirement that the brace immediately follow
  the C<\N>.
  
+=item Missing braces on \o{}
+
+(F) A C<\o> must be followed immediately by a C<{> in double-quotish context.
+
  =item Missing comma after first argument to %s function
  
  (F) While certain functions allow you to specify a filehandle or an
@@ -2978,6 +2982,11 @@ to UTC.  If it's not, define the logical name
  F<SYS$TIMEZONE_DIFFERENTIAL> to translate to the number of seconds which
  need to be added to UTC to get local time.
  
+=item Non-octal character '%c'.  Resolved as "%s"
+
+(W digit)  In parsing an octal numeric constant, a character was unexpectedly
+encountered that isn't octal.  The resulting value is as indicated.
+
  =item Non-string passed as bitmask
  
  (W misc) A number has been passed as a bitmask argument to select().
@@ -3020,6 +3029,11 @@ versions of Perl are likely to eliminate this arbitrary limitation.  In
  the meantime, try using scientific notation (e.g. "1e6" instead of
  "1_000_000").
  
+=item Number with no digits
+
+(F) Perl was looking for a number but found nothing that looked like a number.
+This happens, for example with C<\o{}>, with no number between the braces.
+
  =item Octal number in vector unsupported
  
  (F) Numbers with a leading C<0> are not currently allowed in vectors.
diff --git a/pod/perlre.pod b/pod/perlre.pod

index 5ea15a0..2e00f0b 100644 (file)
--- a/pod/perlre.pod
+++ b/pod/perlre.pod
@@ -229,11 +229,11 @@ also work:
   \f          form feed             (FF)
   \a          alarm (bell)          (BEL)
   \e          escape (think troff)  (ESC)
- \033        octal char            (example: ESC)
   \cK         control char          (example: VT)
   \x{}, \x00  character whose ordinal is the given hexadecimal number
   \N{name}    named Unicode character
   \N{U+263D}  Unicode character     (example: FIRST QUARTER MOON)
+ \o{}, \000  character whose ordinal is the given octal number
   \l          lowercase next char (think vi)
   \u          uppercase next char (think vi)
   \L          lowercase till \E (think vi)
diff --git a/pod/perlrebackslash.pod b/pod/perlrebackslash.pod

index 9d246bd..d460f7f 100644 (file)
--- a/pod/perlrebackslash.pod
+++ b/pod/perlrebackslash.pod
@@ -62,7 +62,7 @@ quoted constructs>.
  Those not usable within a bracketed character class (like C<[\da-z]>) are marked
  as C<Not in [].>
  
- \000              Octal escape sequence.
+ \000              Octal escape sequence.  See also \o{}.
   \1                Absolute backreference.  Not in [].
   \a                Alarm or bell.
   \A                Beginning of string.  Not in [].
@@ -86,6 +86,7 @@ as C<Not in [].>
   \n                (Logical) newline character.
   \N                Any character but newline.  Experimental.  Not in [].
   \N{}              Named or numbered (Unicode) character.
+ \o{}              Octal escape sequence.
   \p{}, \pP         Character with the given Unicode property.
   \P{}, \PP         Character without the given Unicode property.
   \Q                Quotemeta till \E.  Not in [].
@@ -207,33 +208,57 @@ match "as is".
  
  =head3 Octal escapes
  
-Octal escapes consist of a backslash followed by three octal digits
-matching the code point of the character you want to use.  (In some contexts,
-two or even one octal digits are also accepted, sometimes with a warning.) This
-allows for 512 characters (C<\000> up to C<\777>) that can be expressed this
-way.  Enough in pre-Unicode days,
-but most Unicode characters cannot be escaped this way.
+There are two forms of octal escapes.  Each is used to specify a character by
+its ordinal, specified in octal notation.
+
+One form, available starting in Perl 5.14 looks like C<\o{...}>, where the dots
+represent one or more octal digits.  It can be used for any Unicode character.
+
+It was introduced to avoid the potential problems with the other form,
+available in all Perls.  That form consists of a backslash followed by three
+octal digits.  One problem with this form is that it can look exactly like an
+old-style backreference (see
+L</Disambiguation rules between old-style octal escapes and backreferences>
+below.)  You can avoid this by making the first of the three digits always a
+zero, but that makes \077 the largest ordinal unambiguously specifiable by this
+form.
+
+In some contexts, a backslash followed by two or even one octal digits may be
+interpreted as an octal escape, sometimes with a warning, and because of some
+bugs, sometimes with surprising results.  Also, if you are creating a regex
+out of smaller snippets concatentated together, and you use fewer than three
+digits, the beginning of one snippet may be interpreted as adding digits to the
+ending of the snippet before it.  See L</Absolute referencing> for more
+discussion and examples of the snippet problem.
  
  Note that a character that is expressed as an octal escape is considered
  as a character without special meaning by the regex engine, and will match
  "as is".
  
-=head4 Examples (assuming an ASCII platform)
+To summarize, the C<\o{}> form is always safe to use, and the other form is
+safe to use for ordinals up through \077 when you use exactly three digits to
+specify them.
  
- $str = "Perl";
- $str =~ /\120/;    # Match, "\120" is "P".
- $str =~ /\120+/;   # Match, "\120" is "P", it is repeated at least once
- $str =~ /P\053/;   # No match, "\053" is "+" and taken literally.
+Mnemonic: I<0>ctal or I<o>ctal.
  
-=head4 Caveat
+=head4 Examples (assuming an ASCII platform)
  
-Octal escapes potentially clash with old-style backreferences (see L</Absolute
-referencing> below). They both consist of a backslash followed by numbers. So
-Perl has to use heuristics to determine whether it is a backreference or an
-octal escape. You can avoid ambiguity by using the C<\g> form for
-backreferences, and by beginning octal escapes with a "0".  (Since octal
-escapes are 3 digits, this latter method works only up to C<\077>.)  In the
-absence of C<\g>, Perl uses the following rules:
+ $str = "Perl";
+ $str =~ /\o{120}/;  # Match, "\120" is "P".
+ $str =~ /\120/;     # Same.
+ $str =~ /\o{120}+/; # Match, "\120" is "P", it's repeated at least once
+ $str =~ /\120+/;    # Same.
+ $str =~ /P\053/;    # No match, "\053" is "+" and taken literally.
+ /\o{23073}/         # Black foreground, white background smiling face.
+ /\o{4801234567}/    # Raises a warning, and yields chr(4)
+
+=head4 Disambiguation rules between old-style octal escapes and backreferences
+
+Octal escapes of the C<\000> form outside of bracketed character classes
+potentially clash with old-style backreferences.  (see L</Absolute referencing>
+below).  They both consist of a backslash followed by numbers.  So Perl has to
+use heuristics to determine whether it is a backreference or an octal escape.
+Perl uses the following rules to disambiguate:
  
  =over 4
  
@@ -258,18 +283,24 @@ the rest are matched as is.
      $pat .= ")" x 999;
   /^($pat)\1000$/;   #  Matches 'aa'; there are 1000 capture groups.
   /^$pat\1000$/;     #  Matches 'a@0'; there are 999 capture groups
-                    #    and \1000 is seen as \100 (a '@') and a '0'.
+                    #    and \1000 is seen as \100 (a '@') and a '0'
  
  =back
  
+You can the force a backreference interpretation always by using the C<\g{...}>
+form.  You can the force an octal interpretation always by using the C<\o{...}>
+form, or for numbers up through \077 (= 63 decimal), by using three digits,
+beginning with a "0".
+
  =head3 Hexadecimal escapes
  
-Hexadecimal escapes start with C<\x> and are then either followed by a
-two digit hexadecimal number, or a hexadecimal number of arbitrary length
-surrounded by curly braces. The hexadecimal number is the code point of
-the character you want to express.
+Like octal escapes, there are two forms of hexadecimal escapes, but both start
+with the same thing, C<\x>.  This is followed by either exactly two hexadecimal
+digits forming a number, or a hexadecimal number of arbitrary length surrounded
+by curly braces. The hexadecimal number is the code point of the character you
+want to express.
  
-Note that a character that is expressed as a hexadecimal escape is considered
+Note that a character that is expressed as one of these escapes is considered
  as a character without special meaning by the regex engine, and will match
  "as is".
  
diff --git a/pod/perlretut.pod b/pod/perlretut.pod

index eae266a..f218717 100644 (file)
--- a/pod/perlretut.pod
+++ b/pod/perlretut.pod
@@ -184,7 +184,8 @@ bytes.  Here are some examples of escapes:
      "1000\t2000" =~ m(0\t2)   # matches
      "1000\n2000" =~ /0\n20/   # matches
      "1000\t2000" =~ /\000\t2/ # doesn't match, "0" ne "\000"
-    "cat"   =~ /\143\x61\x74/ # matches in ASCII, but a weird way to spell cat
+    "cat"   =~ /\o{143}\x61\x74/ # matches in ASCII, but a weird way
+                                 # to spell cat
  
  If you've been around Perl a while, all this talk of escape sequences
  may seem familiar.  Similar escape sequences are used in double-quoted
@@ -1876,9 +1877,9 @@ much about Perl's internal representation of strings.  But they do need
  to know 1) how to represent Unicode characters in a regexp and 2) that
  a matching operation will treat the string to be searched as a sequence
  of characters, not bytes.  The answer to 1) is that Unicode characters
-greater than C<chr(255)> are represented using the C<\x{hex}> notation,
-because the \0 octal and \x hex (without curly braces) don't go further
-than 255.
+greater than C<chr(255)> are represented using the C<\x{hex}> notation, because
+\x hex (without curly braces) doesn't go further than 255.  (Starting in Perl
+5.14) if you're an octal fan, you can also use C<\o{oct}>.
  
      /\x{263a}/;  # match a Unicode smiley face :)
  
diff --git a/proto.h b/proto.h

index 6a5110e..1fc1180 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -1633,6 +1633,14 @@ PERL_CALLCONV UV Perl_grok_bin(pTHX_ const char* start, STRLEN* len_p, I32* flag
  PERL_CALLCONV char     Perl_grok_bslash_c(pTHX_ const char source, const bool output_warning)
                         __attribute__warn_unused_result__;
  
+PERL_CALLCONV char*    Perl_grok_bslash_o(pTHX_ const char* s, UV* uv, STRLEN* len, const bool output_warning)
+                       __attribute__warn_unused_result__
+                       __attribute__nonnull__(pTHX_1)
+                       __attribute__nonnull__(pTHX_2)
+                       __attribute__nonnull__(pTHX_3);
+#define PERL_ARGS_ASSERT_GROK_BSLASH_O \
+       assert(s); assert(uv); assert(len)
+
  PERL_CALLCONV UV       Perl_grok_hex(pTHX_ const char* start, STRLEN* len_p, I32* flags, NV *result)
                         __attribute__nonnull__(pTHX_1)
                         __attribute__nonnull__(pTHX_2)
diff --git a/regcomp.c b/regcomp.c

index 72af569..74f996b 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -7362,6 +7362,7 @@ tryagain:
             register UV ender;
             register char *p;
             char *s;
+           char *error_msg;
             STRLEN foldlen;
             U8 tmpbuf[UTF8_MAXBYTES_CASE+1], *foldbuf;
  
@@ -7462,6 +7463,26 @@ tryagain:
                           ender = ASCII_TO_NATIVE('\007');
                         p++;
                         break;
+                   case 'o':
+                       {
+                           STRLEN brace_len = len;
+                           if ((error_msg = grok_bslash_o(p,
+                                                          &ender,
+                                                          &brace_len,
+                                                          SIZE_ONLY))
+                               != NULL)
+                           {
+                               vFAIL(error_msg);
+                           }
+                           p += brace_len;
+                           if (PL_encoding && ender < 0x100) {
+                               goto recode_encoding;
+                           }
+                           if (ender > 0xff) {
+                               RExC_utf8 = 1;
+                           }
+                           break;
+                       }
                     case 'x':
                         if (*++p == '{') {
                             char* const e = strchr(p, '}');
@@ -7971,6 +7992,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
  
  parseit:
      while (RExC_parse < RExC_end && UCHARAT(RExC_parse) != ']') {
+       char* error_msg;
  
      charclassloop:
  
@@ -8077,6 +8099,21 @@ parseit:
             case 'b':   value = '\b';                   break;
             case 'e':   value = ASCII_TO_NATIVE('\033');break;
             case 'a':   value = ASCII_TO_NATIVE('\007');break;
+           case 'o':
+               RExC_parse--;   /* function expects to be pointed at the 'o' */
+               if ((error_msg = grok_bslash_o(RExC_parse,
+                                              &value,
+                                              &numlen,
+                                              SIZE_ONLY))
+                   != NULL)
+               {
+                   vFAIL(error_msg);
+               }
+               RExC_parse += numlen;
+               if (PL_encoding && value < 0x100) {
+                   goto recode_encoding;
+               }
+               break;
             case 'x':
                 if (*RExC_parse == '{') {
                      I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
diff --git a/t/lib/warnings/regcomp b/t/lib/warnings/regcomp

index f85aa44..3f80ccc 100644 (file)
--- a/t/lib/warnings/regcomp
+++ b/t/lib/warnings/regcomp
@@ -207,3 +207,33 @@ Useless (?-c) - don't use /gc modifier in regex; marked by <-- HERE in m/(?o-c <
  Useless (?o) - use /o modifier in regex; marked by <-- HERE in m/(?o <-- HERE gc)/ at - line 12.
  Useless (?g) - use /g modifier in regex; marked by <-- HERE in m/(?og <-- HERE c)/ at - line 12.
  Useless (?c) - use /gc modifier in regex; marked by <-- HERE in m/(?ogc <-- HERE )/ at - line 12.
+########
+# regcomp.c [S_regatom]
+$a = qr/\o{/;
+EXPECT
+Missing right brace on \o{ in regex; marked by <-- HERE in m/\ <-- HERE o{/ at - line 2.
+########
+# regcomp.c [S_regatom]
+$a = qr/\o/;
+EXPECT
+Missing braces on \o{} in regex; marked by <-- HERE in m/\ <-- HERE o/ at - line 2.
+########
+# regcomp.c [S_regatom]
+$a = qr/\o{}/;
+EXPECT
+Number with no digits in regex; marked by <-- HERE in m/\ <-- HERE o{}/ at - line 2.
+########
+# regcomp.c [S_regclass]
+$a = qr/[\o{]/;
+EXPECT
+Missing right brace on \o{ in regex; marked by <-- HERE in m/[\ <-- HERE o{]/ at - line 2.
+########
+# regcomp.c [S_regclass]
+$a = qr/[\o]/;
+EXPECT
+Missing braces on \o{} in regex; marked by <-- HERE in m/[\ <-- HERE o]/ at - line 2.
+########
+# regcomp.c [S_regclass]
+$a = qr/[\o{}]/;
+EXPECT
+Number with no digits in regex; marked by <-- HERE in m/[\ <-- HERE o{}]/ at - line 2.
diff --git a/t/lib/warnings/toke b/t/lib/warnings/toke

index 4bb131f..076270c 100644 (file)
--- a/t/lib/warnings/toke
+++ b/t/lib/warnings/toke
@@ -966,3 +966,26 @@ Use of := for an empty attribute list is deprecated at - line 36.
  Use of := for an empty attribute list is deprecated at - line 38.
  Use of := for an empty attribute list is deprecated at - line 41.
  Use of := for an empty attribute list is deprecated at - line 42.
+########
+# toke.c
+use warnings 'syntax' ;
+my $a = "\o";
+my $a = "\o{";
+my $a = "\o{}";
+no warnings 'syntax' ;
+my $a = "\o";
+my $a = "\o{";
+my $a = "\o{}";
+EXPECT
+Missing braces on \o{} at - line 3, within string
+Missing right brace on \o{ at - line 4, within string
+Number with no digits at - line 5, within string
+BEGIN not safe after errors--compilation aborted at - line 6.
+########
+# toke.c
+use warnings 'digit' ;
+my $a = "\o{1238456}";
+no warnings 'digit' ;
+my $a = "\o{1238456}";
+EXPECT
+Non-octal character '8'.  Resolved as "\o{123}" at - line 3.
diff --git a/t/op/qq.t b/t/op/qq.t

index 7b75b9b..0136608 100644 (file)
--- a/t/op/qq.t
+++ b/t/op/qq.t
@@ -5,7 +5,7 @@ BEGIN {
      @INC = '../lib';
  }
  
-print q(1..26
+print q(1..29
  );
  
  # This is() function is written to avoid ""
@@ -64,6 +64,9 @@ is ("\x{10FFFD}", chr 1114109);
  is ("\400", chr 0x100);
  is ("\600", chr 0x180);
  is ("\777", chr 0x1FF);
+is ("a\o{120}b", "a" . chr(0x50) . "b");
+is ("a\o{400}b", "a" . chr(0x100) . "b");
+is ("a\o{1000}b", "a" . chr(0x200) . "b");
  
  # These kludged tests should change when we remove the temporary fatal error
  # in util.c for "\c{".  And, the warning there should probably not be
diff --git a/t/re/re_tests b/t/re/re_tests

index fc29fb6..36a2f4c 100644 (file)
--- a/t/re/re_tests
+++ b/t/re/re_tests
@@ -1460,5 +1460,11 @@ abc\N{def        -       c       -       \\N{NAME} must be resolved by the lexer
  [a\400]        \x{100} y       $&      \x{100}
  [b\600]        \x{180} y       $&      \x{180}
  [c\777]        \x{1FF} y       $&      \x{1FF}
+\o{120}        \x{50}  y       $&      \x{50}
+\o{400}        \x{100} y       $&      \x{100}
+\o{1000}       \x{200} y       $&      \x{200}
+[a\o{120}]     \x{50}  y       $&      \x{50}
+[a\o{400}]     \x{100} y       $&      \x{100}
+[a\o{1000}]    \x{200} y       $&      \x{200}
  
  # vim: softtabstop=0 noexpandtab
diff --git a/toke.c b/toke.c

index b7b33e8..75fb327 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -2879,6 +2879,20 @@ S_scan_const(pTHX_ char *start)
                 }
                 goto NUM_ESCAPE_INSERT;
  
+           /* eg. \o{24} indicates the octal constant \024 */
+           case 'o':
+               {
+                   STRLEN len;
+
+                   char* error = grok_bslash_o(s, &uv, &len, 1);
+                   s += len;
+                   if (error) {
+                       yyerror(error);
+                       continue;
+                   }
+                   goto NUM_ESCAPE_INSERT;
+               }
+
             /* eg. \x24 indicates the hex constant 0x24 */
             case 'x':
                 ++s;
diff --git a/util.c b/util.c

index b3b385e..6fdc653 100644 (file)
--- a/util.c
+++ b/util.c
@@ -3904,7 +3904,7 @@ Perl_report_evil_fh(pTHX_ const GV *gv, const IO *io, I32 op)
  char
  Perl_grok_bslash_c(pTHX_ const char source, const bool output_warning)
  {
-    
+
      U8 result;
  
      if (! isASCII(source)) {
@@ -3935,6 +3935,72 @@ Perl_grok_bslash_c(pTHX_ const char source, const bool output_warning)
      return result;
  }
  
+char *
+Perl_grok_bslash_o(pTHX_ const char *s, UV *uv, STRLEN *len, const bool output_warning)
+{
+
+/*  Documentation to be supplied when interface nailed down finally
+ *  This returns NULL on success, otherwise a pointer to an internal constant
+ *  error message.  On input:
+ *     s   points to a string that begins with o, and the previous character was
+ *         a backslash.
+ *     uv  points to a UV that will hold the output value
+ *     len will point to the next character in the string past the end of this
+ *         construct
+ *     output_warning says whether to output any warning messages, or suppress
+ *         them
+ */
+    char* e;
+    STRLEN numbers_len;
+    I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
+               | PERL_SCAN_DISALLOW_PREFIX
+               /* XXX Until the message is improved in grok_oct, handle errors
+                * ourselves */
+               | PERL_SCAN_SILENT_ILLDIGIT;
+
+    PERL_ARGS_ASSERT_GROK_BSLASH_O;
+
+
+    assert(*s == 'o');
+    s++;
+
+    if (*s != '{') {
+       *len = 1;       /* Move past the o */
+       return "Missing braces on \\o{}";
+    }
+
+    e = strchr(s, '}');
+    if (!e) {
+       *len = 2;       /* Move past the o{ */
+       return "Missing right brace on \\o{";
+    }
+
+    /* Return past the '}' no matter what is inside the braces */
+    *len = e - s + 2;  /* 2 = 1 for the o + 1 for the '}' */
+
+    s++;    /* Point to first digit */
+
+    numbers_len = e - s;
+    if (numbers_len == 0) {
+       return "Number with no digits";
+    }
+
+    *uv = NATIVE_TO_UNI(grok_oct(s, &numbers_len, &flags, NULL));
+    /* Note that if has non-octal, will ignore everything starting with that up
+     * to the '}' */
+
+    if (output_warning && numbers_len != (STRLEN) (e - s)) {
+       Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
+       /* diag_listed_as: Non-octal character '%c'.  Resolved as "%s" */
+                      "Non-octal character '%c'.  Resolved as \"\\o{%.*s}\"",
+                      *(s + numbers_len),
+                      (int) numbers_len,
+                      s);
+    }
+
+    return NULL;
+}
+
  /* To workaround core dumps from the uninitialised tm_zone we get the
   * system to give us a reasonable struct to copy.  This fix means that
   * strftime uses the tm_zone and tm_gmtoff values returned by
author	Karl Williamson <khw@khw-desktop.(none)>
	Thu, 15 Jul 2010 23:28:28 +0000 (17:28 -0600)
committer	David Golden <dagolden@cpan.org>
	Sun, 18 Jul 2010 01:50:48 +0000 (21:50 -0400)
embed.fnc		patch \| blob \| history
embed.h		patch \| blob \| history
global.sym		patch \| blob \| history
pod/perl5133delta.pod		patch \| blob \| history
pod/perldiag.pod		patch \| blob \| history
pod/perlre.pod		patch \| blob \| history
pod/perlrebackslash.pod		patch \| blob \| history
pod/perlretut.pod		patch \| blob \| history
proto.h		patch \| blob \| history
regcomp.c		patch \| blob \| history
t/lib/warnings/regcomp		patch \| blob \| history
t/lib/warnings/toke		patch \| blob \| history
t/op/qq.t		patch \| blob \| history
t/re/re_tests		patch \| blob \| history
toke.c		patch \| blob \| history
util.c		patch \| blob \| history