regcomp.c: white space only

author Karl Williamson <public@khwilliamson.com>

Mon, 28 Feb 2011 16:25:03 +0000 (09:25 -0700)

committer Karl Williamson <public@khwilliamson.com>

Mon, 28 Feb 2011 16:38:15 +0000 (09:38 -0700)
author Karl Williamson <public@khwilliamson.com>
Mon, 28 Feb 2011 16:25:03 +0000 (09:25 -0700)
committer Karl Williamson <public@khwilliamson.com>
Mon, 28 Feb 2011 16:38:15 +0000 (09:38 -0700)
diff --git a/regcomp.c b/regcomp.c

index d48ac48..58c3a97 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -10056,166 +10056,170 @@ parseit:
      if (FOLD && nonbitmap) {
         UV i;
  
-           HV* fold_intersection;
-           UV* fold_list;
-
-           /* This is a list of all the characters that participate in folds
-            * (except marks, etc in multi-char folds */
-           if (! PL_utf8_foldable) {
-               SV* swash = swash_init("utf8", "Cased", &PL_sv_undef, 1, 0);
-               PL_utf8_foldable = _swash_to_invlist(swash);
-           }
-
-           /* This is a hash that for a particular fold gives all characters
-            * that are involved in it */
-           if (! PL_utf8_foldclosures) {
-
-               /* If we were unable to find any folds, then we likely won't be
-                * able to find the closures.  So just create an empty list.
-                * Folding will effectively be restricted to the non-Unicode
-                * rules hard-coded into Perl.  (This case happens legitimately
-                * during compilation of Perl itself before the Unicode tables
-                * are generated) */
-               if (invlist_len(PL_utf8_foldable) == 0) {
-                   PL_utf8_foldclosures = _new_invlist(0);
-               } else {
-                   /* If the folds haven't been read in, call a fold function
-                    * to force that */
-                   if (! PL_utf8_tofold) {
-                       U8 dummy[UTF8_MAXBYTES+1];
-                       STRLEN dummy_len;
-                       to_utf8_fold((U8*) "A", dummy, &dummy_len);
-                   }
-                   PL_utf8_foldclosures = _swash_inversion_hash(PL_utf8_tofold);
+       HV* fold_intersection;
+       UV* fold_list;
+
+       /* This is a list of all the characters that participate in folds
+           * (except marks, etc in multi-char folds */
+       if (! PL_utf8_foldable) {
+           SV* swash = swash_init("utf8", "Cased", &PL_sv_undef, 1, 0);
+           PL_utf8_foldable = _swash_to_invlist(swash);
+       }
+
+       /* This is a hash that for a particular fold gives all characters
+           * that are involved in it */
+       if (! PL_utf8_foldclosures) {
+
+           /* If we were unable to find any folds, then we likely won't be
+            * able to find the closures.  So just create an empty list.
+            * Folding will effectively be restricted to the non-Unicode rules
+            * hard-coded into Perl.  (This case happens legitimately during
+            * compilation of Perl itself before the Unicode tables are
+            * generated) */
+           if (invlist_len(PL_utf8_foldable) == 0) {
+               PL_utf8_foldclosures = _new_invlist(0);
+           } else {
+               /* If the folds haven't been read in, call a fold function
+                   * to force that */
+               if (! PL_utf8_tofold) {
+                   U8 dummy[UTF8_MAXBYTES+1];
+                   STRLEN dummy_len;
+                   to_utf8_fold((U8*) "A", dummy, &dummy_len);
                 }
+               PL_utf8_foldclosures = _swash_inversion_hash(PL_utf8_tofold);
             }
+       }
+
+       /* Only the characters in this class that participate in folds need
+           * be checked.  Get the intersection of this class and all the
+           * possible characters that are foldable.  This can quickly narrow
+           * down a large class */
+       fold_intersection = invlist_intersection(PL_utf8_foldable, nonbitmap);
+
+       /* Now look at the foldable characters in this class individually */
+       fold_list = invlist_array(fold_intersection);
+       for (i = 0; i < invlist_len(fold_intersection); i++) {
+           UV j;
+
+           /* The next entry is the beginning of the range that is in the
+            * class */
+           UV start = fold_list[i++];
+
+
+           /* The next entry is the beginning of the next range, which
+               * isn't in the class, so the end of the current range is one
+               * less than that */
+           UV end = fold_list[i] - 1;
  
-           /* Only the characters in this class that participate in folds need
-            * be checked.  Get the intersection of this class and all the
-            * possible characters that are foldable.  This can quickly narrow
-            * down a large class */
-           fold_intersection = invlist_intersection(PL_utf8_foldable, nonbitmap);
-
-           /* Now look at the foldable characters in this class individually */
-           fold_list = invlist_array(fold_intersection);
-           for (i = 0; i < invlist_len(fold_intersection); i++) {
-               UV j;
-
-               /* The next entry is the beginning of the range that is in the
-                * class */
-               UV start = fold_list[i++];
-
-
-               /* The next entry is the beginning of the next range, which
-                * isn't in the class, so the end of the current range is one
-                * less than that */
-               UV end = fold_list[i] - 1;
-
-               /* Look at every character in the range */
-               for (j = start; j <= end; j++) {
-
-                   /* Get its fold */
-                   U8 foldbuf[UTF8_MAXBYTES_CASE+1];
-                   STRLEN foldlen;
-                   const UV f = to_uni_fold(j, foldbuf, &foldlen);
-
-                   if (foldlen > (STRLEN)UNISKIP(f)) {
-
-                       /* Any multicharacter foldings (disallowed in
-                        * lookbehind patterns) require the following
-                        * transform: [ABCDEF] -> (?:[ABCabcDEFd]|pq|rst) where
-                        * E folds into "pq" and F folds into "rst", all other
-                        * characters fold to single characters.  We save away
-                        * these multicharacter foldings, to be later saved as
-                        * part of the additional "s" data. */
-                       if (! RExC_in_lookbehind) {
-                           U8* loc = foldbuf;
-                           U8* e = foldbuf + foldlen;
-
-                           /* If any of the folded characters of this are in
-                            * the Latin1 range, tell the regex engine that
-                            * this can match a non-utf8 target string.  The
-                            * only multi-byte fold whose source is in the
-                            * Latin1 range (U+00DF) applies only when the
-                            * target string is utf8, or under unicode rules */
-                           if (j > 255 || AT_LEAST_UNI_SEMANTICS) {
-                               while (loc < e) {
-
-                                   /* Can't mix ascii with non- under /aa */
-                                   if (MORE_ASCII_RESTRICTED
-                                       && (isASCII(*loc) != isASCII(j)))
-                                   {
+           /* Look at every character in the range */
+           for (j = start; j <= end; j++) {
+
+               /* Get its fold */
+               U8 foldbuf[UTF8_MAXBYTES_CASE+1];
+               STRLEN foldlen;
+               const UV f = to_uni_fold(j, foldbuf, &foldlen);
+
+               if (foldlen > (STRLEN)UNISKIP(f)) {
+
+                   /* Any multicharacter foldings (disallowed in
+                       * lookbehind patterns) require the following
+                       * transform: [ABCDEF] -> (?:[ABCabcDEFd]|pq|rst) where
+                       * E folds into "pq" and F folds into "rst", all other
+                       * characters fold to single characters.  We save away
+                       * these multicharacter foldings, to be later saved as
+                       * part of the additional "s" data. */
+                   if (! RExC_in_lookbehind) {
+                       U8* loc = foldbuf;
+                       U8* e = foldbuf + foldlen;
+
+                       /* If any of the folded characters of this are in
+                           * the Latin1 range, tell the regex engine that
+                           * this can match a non-utf8 target string.  The
+                           * only multi-byte fold whose source is in the
+                           * Latin1 range (U+00DF) applies only when the
+                           * target string is utf8, or under unicode rules */
+                       if (j > 255 || AT_LEAST_UNI_SEMANTICS) {
+                           while (loc < e) {
+
+                               /* Can't mix ascii with non- under /aa */
+                               if (MORE_ASCII_RESTRICTED
+                                   && (isASCII(*loc) != isASCII(j)))
+                               {
+                                   goto end_multi_fold;
+                               }
+                               if (UTF8_IS_INVARIANT(*loc)
+                                   || UTF8_IS_DOWNGRADEABLE_START(*loc))
+                               {
+                                   /* Can't mix above and below 256 under
+                                       * LOC */
+                                   if (LOC) {
                                         goto end_multi_fold;
                                     }
-                                   if (UTF8_IS_INVARIANT(*loc)
-                                       || UTF8_IS_DOWNGRADEABLE_START(*loc))
-                                   {
-                                       /* Can't mix above and below 256 under
-                                        * LOC */
-                                       if (LOC) {
-                                           goto end_multi_fold;
-                                       }
-                                       ANYOF_FLAGS(ret)
-                                               |= ANYOF_NONBITMAP_NON_UTF8;
-                                       break;
-                                   }
-                                   loc += UTF8SKIP(loc);
+                                   ANYOF_FLAGS(ret)
+                                           |= ANYOF_NONBITMAP_NON_UTF8;
+                                   break;
                                 }
+                               loc += UTF8SKIP(loc);
                             }
-
-                           add_alternate(&unicode_alternate, foldbuf, foldlen);
-                       end_multi_fold: ;
                         }
-                   }
-                   else {
-                       /* Single character fold.  Add everything in its fold
-                        * closure to the list that this node should match */
-                       SV** listp;
-
-                       /* The fold closures data structure is a hash with the
-                        * keys being every character that is folded to, like
-                        * 'k', and the values each an array of everything that
-                        * folds to its key.  e.g. [ 'k', 'K', KELVIN_SIGN ] */
-                       if ((listp = hv_fetch(PL_utf8_foldclosures,
-                                     (char *) foldbuf, foldlen, FALSE)))
-                       {
-                           AV* list = (AV*) *listp;
-                           IV k;
-                           for (k = 0; k <= av_len(list); k++) {
-                               SV** c_p = av_fetch(list, k, FALSE);
-                               UV c;
-                               if (c_p == NULL) {
-                                   Perl_croak(aTHX_ "panic: invalid PL_utf8_foldclosures structure");
-                               }
-                               c = SvUV(*c_p);
  
-                               /* /aa doesn't allow folds between ASCII and
-                                * non-; /l doesn't allow them between above
-                                * and below 256 */
-                               if ((MORE_ASCII_RESTRICTED && (isASCII(c) != isASCII(j)))
-                                    || (LOC && ((c < 256) != (j < 256))))
-                               {
-                                   continue;
-                               }
+                       add_alternate(&unicode_alternate, foldbuf, foldlen);
+                   end_multi_fold: ;
+                   }
+               }
+               else {
+                   /* Single character fold.  Add everything in its fold
+                       * closure to the list that this node should match */
+                   SV** listp;
+
+                   /* The fold closures data structure is a hash with the
+                       * keys being every character that is folded to, like
+                       * 'k', and the values each an array of everything that
+                       * folds to its key.  e.g. [ 'k', 'K', KELVIN_SIGN ] */
+                   if ((listp = hv_fetch(PL_utf8_foldclosures,
+                                   (char *) foldbuf, foldlen, FALSE)))
+                   {
+                       AV* list = (AV*) *listp;
+                       IV k;
+                       for (k = 0; k <= av_len(list); k++) {
+                           SV** c_p = av_fetch(list, k, FALSE);
+                           UV c;
+                           if (c_p == NULL) {
+                               Perl_croak(aTHX_ "panic: invalid PL_utf8_foldclosures structure");
+                           }
+                           c = SvUV(*c_p);
+
+                           /* /aa doesn't allow folds between ASCII and
+                               * non-; /l doesn't allow them between above
+                               * and below 256 */
+                           if ((MORE_ASCII_RESTRICTED
+                                && (isASCII(c) != isASCII(j)))
+                                   || (LOC && ((c < 256) != (j < 256))))
+                           {
+                               continue;
+                           }
  
-                               if (c < 256 && AT_LEAST_UNI_SEMANTICS) {
-                                   stored += set_regclass_bit(pRExC_state, ret, (U8) c, &l1_fold_invlist, &unicode_alternate);
-                               }
-                                   /* It may be that the code point is already
-                                    * in this range or already in the bitmap,
-                                    * in which case we need do nothing */
-                               else if ((c < start || c > end)
-                                        && (c > 255
-                                            || ! ANYOF_BITMAP_TEST(ret, c)))
-                               {
-                                   nonbitmap = add_cp_to_invlist(nonbitmap, c);
-                               }
+                           if (c < 256 && AT_LEAST_UNI_SEMANTICS) {
+                               stored += set_regclass_bit(pRExC_state,
+                                       ret,
+                                       (U8) c,
+                                       &l1_fold_invlist, &unicode_alternate);
+                           }
+                               /* It may be that the code point is already
+                                   * in this range or already in the bitmap,
+                                   * in which case we need do nothing */
+                           else if ((c < start || c > end)
+                                       && (c > 255
+                                           || ! ANYOF_BITMAP_TEST(ret, c)))
+                           {
+                               nonbitmap = add_cp_to_invlist(nonbitmap, c);
                             }
                         }
                     }
                 }
             }
-           invlist_destroy(fold_intersection);
+       }
+       invlist_destroy(fold_intersection);
      }
  
      /* Combine the two lists into one. */
author	Karl Williamson <public@khwilliamson.com>
	Mon, 28 Feb 2011 16:25:03 +0000 (09:25 -0700)
committer	Karl Williamson <public@khwilliamson.com>
	Mon, 28 Feb 2011 16:38:15 +0000 (09:38 -0700)