Revert the parts of #3926 that outlawed character ranges

author Jarkko Hietaniemi <jhi@iki.fi>

Tue, 12 Oct 1999 15:30:05 +0000 (15:30 +0000)

committer Jarkko Hietaniemi <jhi@iki.fi>

Tue, 12 Oct 1999 15:30:05 +0000 (15:30 +0000)
author Jarkko Hietaniemi <jhi@iki.fi>
Tue, 12 Oct 1999 15:30:05 +0000 (15:30 +0000)
committer Jarkko Hietaniemi <jhi@iki.fi>
Tue, 12 Oct 1999 15:30:05 +0000 (15:30 +0000)
diff --git a/pod/perldiag.pod b/pod/perldiag.pod

index d0f1be8..fb5c7e6 100644 (file)
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -1681,8 +1681,7 @@ by Perl or by a user-supplied handler.  See L<attributes>.
  =item invalid [] range in regexp
  
  (F) The range specified in a character class had a minimum character
-greater than the maximum character, or the range didn't start/end with
-a literal character.  See L<perlre>.
+greater than the maximum character.  See L<perlre>.
  
  =item Invalid conversion in %s: "%s"
  
diff --git a/pod/perlre.pod b/pod/perlre.pod

index 9a06305..1610254 100644 (file)
--- a/pod/perlre.pod
+++ b/pod/perlre.pod
@@ -185,8 +185,9 @@ Use C<\w+> to match a string of Perl-identifier characters (which isn't
  the same as matching an English word).  If C<use locale> is in effect, the
  list of alphabetic characters generated by C<\w> is taken from the
  current locale.  See L<perllocale>.  You may use C<\w>, C<\W>, C<\s>, C<\S>,
-C<\d>, and C<\D> within character classes (though not as either end of
-a range).  See L<utf8> for details about C<\pP>, C<\PP>, and C<\X>.
+C<\d>, and C<\D> within character classes, but if you try to use them
+as endpoints of a range, that's not a range, the "-" is understood literally.
+See L<utf8> for details about C<\pP>, C<\PP>, and C<\X>.
  
  The POSIX character class syntax
  
@@ -940,6 +941,9 @@ at the start or end of the list, or escape it with a backslash.  (The
  following all specify the same class of three characters: C<[-az]>,
  C<[az-]>, and C<[a\-z]>.  All are different from C<[a-z]>, which
  specifies a class containing twenty-six characters.)
+Also, if you try to use the character classes C<\w>, C<\W>, C<\s>,
+C<\S>, C<\d>, or C<\D> as endpoints of a range, that's not a range,
+the "-" is understood literally.
  
  Note also that the whole range idea is rather unportable between
  character sets--and even within character sets they may cause results
diff --git a/regcomp.c b/regcomp.c

index 99423e1..02dca51 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -2364,8 +2364,10 @@ S_regclass(pTHX)
             }
         }
         if (!SIZE_ONLY && namedclass > OOB_NAMEDCLASS) {
-           if (range)
-               FAIL("invalid [] range in regexp"); /* [a-\w], [a-[:word:]] */
+           if (range) {
+               ANYOF_BITMAP_SET(opnd, lastvalue);
+               ANYOF_BITMAP_SET(opnd, '-');
+           }
             switch (namedclass) {
             case ANYOF_ALNUM:
                 if (LOC)
@@ -2608,6 +2610,8 @@ S_regclass(pTHX)
                 ANYOF_FLAGS(opnd) |= ANYOF_CLASS;
             continue;
         }
+       if (range && namedclass > OOB_NAMEDCLASS)
+           range = 0; /* [a-\d], [a-[:digit:]], not a true range. */
         if (range) {
             if (lastvalue > value)
                 FAIL("invalid [] range in regexp"); /* [b-a] */
@@ -2617,8 +2621,6 @@ S_regclass(pTHX)
             lastvalue = value;
             if (*PL_regcomp_parse == '-' && PL_regcomp_parse+1 < PL_regxend &&
                 PL_regcomp_parse[1] != ']') {
-               if (namedclass > OOB_NAMEDCLASS)
-                   FAIL("invalid [] range in regexp"); /* [\w-a] */
                 PL_regcomp_parse++;
                 range = 1;
                 continue;       /* do it next time */
@@ -2777,9 +2779,10 @@ S_regclassutf8(pTHX)
             }
         }
         if (!SIZE_ONLY && namedclass > OOB_NAMEDCLASS) {
-           if (range)
-               FAIL("invalid [] range in regexp"); /* [a-\w], [a-[:word:]] */
-           switch (namedclass) {
+           if (range) /* [a-\d], [a-[:digit:]] */
+                Perl_sv_catpvf(aTHX_ listsv, /* 0x002D is Unicode for '-' */
+                              "%04"UVxf"\n%002D\n", (UV)lastvalue);
+           switch (namedclass) {
             case ANYOF_ALNUM:
                 Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n");        break;
             case ANYOF_NALNUM:
@@ -2835,6 +2838,8 @@ S_regclassutf8(pTHX)
             }
             continue;
         }
+       if (range && namedclass > OOB_NAMEDCLASS)
+           range = 0; /* [a-\d], [a-[:digit:]], not a true range. */
          if (range) {
             if (lastvalue > value)
                 FAIL("invalid [] range in regexp"); /* [b-a] */
@@ -2846,8 +2851,6 @@ S_regclassutf8(pTHX)
             lastvalue = value;
             if (*PL_regcomp_parse == '-' && PL_regcomp_parse+1 < PL_regxend &&
                 PL_regcomp_parse[1] != ']') {
-               if (namedclass > OOB_NAMEDCLASS)
-                   FAIL("invalid [] range in regexp"); /* [\w-a] */
                 PL_regcomp_parse++;
                 range = 1;
                 continue;       /* do it next time */
diff --git a/t/op/re_tests b/t/op/re_tests

index 695672d..974bec5 100644 (file)
--- a/t/op/re_tests
+++ b/t/op/re_tests
@@ -735,8 +735,10 @@ foo.bart   foo.bart        y       -       -
  .[X](.+)+[X][X]        bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa n       -       -
  .[X][X](.+)+[X]        bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa n       -       -
  tt+$   xxxtt   y       -       -
-[a-\w] -       c       -       /[a-\w]/: invalid [] range in regexp
-[\w-z] -       c       -       /[\w-z]/: invalid [] range in regexp
-[0-[:digit:]]  -       c       -       /[0-[:digit:]]/: invalid [] range in regexp
-[[:digit:]-9]  -       c       -       /[[:digit:]-9]/: invalid [] range in regexp
+([a-\d]+)      za-9z   y       $1      a-9
+([\d-\s]+)     a0- z   y       $1      0- 
+([\d-z]+)      a0-za   y       $1      0-z
+([a-[:digit:]]+)       za-9z   y       $1      a-9
+([[:digit:]-[:alpha:]]+)       =0-z=   y       $1      0-z
+([[:digit:]-z]+)       =0-z=   y       $1      0-z
  \GX.*X aaaXbX  n       -       -
author	Jarkko Hietaniemi <jhi@iki.fi>
	Tue, 12 Oct 1999 15:30:05 +0000 (15:30 +0000)
committer	Jarkko Hietaniemi <jhi@iki.fi>
	Tue, 12 Oct 1999 15:30:05 +0000 (15:30 +0000)
pod/perldiag.pod		patch \| blob \| history
pod/perlre.pod		patch \| blob \| history
regcomp.c		patch \| blob \| history
t/op/re_tests		patch \| blob \| history