regcomp.c: Revise inversion list API

author Karl Williamson <public@khwilliamson.com>

Fri, 27 May 2011 22:43:28 +0000 (16:43 -0600)

committer Karl Williamson <public@khwilliamson.com>

Sun, 3 Jul 2011 20:05:45 +0000 (14:05 -0600)
author Karl Williamson <public@khwilliamson.com>
Fri, 27 May 2011 22:43:28 +0000 (16:43 -0600)
committer Karl Williamson <public@khwilliamson.com>
Sun, 3 Jul 2011 20:05:45 +0000 (14:05 -0600)
diff --git a/embed.fnc b/embed.fnc

index 1057bfc93cc310a3d1d1dd36346319d3cd386ee1..bee3ef887099d5b17bccd3ea8acf91eebb581065 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -1310,13 +1310,13 @@ EsMR    |SV*    |add_range_to_invlist   |NULLOK SV* invlist|const UV start|const UV end
  EiMR   |UV*    |invlist_array  |NN SV* const invlist
  EiM    |void   |invlist_destroy        |NN SV* const invlist
  EsM    |void   |invlist_extend    |NN SV* const invlist|const UV len
-EsMR   |SV*    |invlist_intersection   |NN SV* const a|NN SV* const b
+EsM    |void   |invlist_intersection   |NN SV* const a|NN SV* const b|NN SV** i
  EiMR   |UV     |invlist_len    |NN SV* const invlist
  EiMR   |UV     |invlist_max    |NN SV* const invlist
  EiM    |void   |invlist_set_len        |NN SV* const invlist|const UV len
  EiM    |void   |invlist_set_max        |NN SV* const invlist|const UV max
  EiM    |void   |invlist_trim   |NN SV* const invlist
-EsMR   |SV*    |invlist_union  |NN SV* const a|NN SV* const b
+EsM    |void   |invlist_union  |NN SV* const a|NN SV* const b|NN SV** output
  #endif
  Ap     |void   |taint_env
  Ap     |void   |taint_proper   |NULLOK const char* f|NN const char *const s
diff --git a/embed.h b/embed.h

index dd759a82550dce94fbded87b7f14ab2384bd165d..ae09513fe5c91db21119feb11c032eb5c4b75578 100644 (file)
--- a/embed.h
+++ b/embed.h
@@ -878,13 +878,13 @@
  #define invlist_array(a)       S_invlist_array(aTHX_ a)
  #define invlist_destroy(a)     S_invlist_destroy(aTHX_ a)
  #define invlist_extend(a,b)    S_invlist_extend(aTHX_ a,b)
-#define invlist_intersection(a,b)      S_invlist_intersection(aTHX_ a,b)
+#define invlist_intersection(a,b,c)    S_invlist_intersection(aTHX_ a,b,c)
  #define invlist_len(a)         S_invlist_len(aTHX_ a)
  #define invlist_max(a)         S_invlist_max(aTHX_ a)
  #define invlist_set_len(a,b)   S_invlist_set_len(aTHX_ a,b)
  #define invlist_set_max(a,b)   S_invlist_set_max(aTHX_ a,b)
  #define invlist_trim(a)                S_invlist_trim(aTHX_ a)
-#define invlist_union(a,b)     S_invlist_union(aTHX_ a,b)
+#define invlist_union(a,b,c)   S_invlist_union(aTHX_ a,b,c)
  #define join_exact(a,b,c,d,e,f)        S_join_exact(aTHX_ a,b,c,d,e,f)
  #define make_trie(a,b,c,d,e,f,g,h)     S_make_trie(aTHX_ a,b,c,d,e,f,g,h)
  #define make_trie_failtable(a,b,c,d)   S_make_trie_failtable(aTHX_ a,b,c,d)
diff --git a/proto.h b/proto.h

index 3a97fa848d30d5f05e58cd6a28a1ae6225e1c095..a93a7b07c65dab1578bd2c72c730b006cfa48055 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -6057,12 +6057,12 @@ STATIC void     S_invlist_extend(pTHX_ SV* const invlist, const UV len)
  #define PERL_ARGS_ASSERT_INVLIST_EXTEND        \
         assert(invlist)
  
-STATIC SV*     S_invlist_intersection(pTHX_ SV* const a, SV* const b)
-                       __attribute__warn_unused_result__
+STATIC void    S_invlist_intersection(pTHX_ SV* const a, SV* const b, SV** i)
                         __attribute__nonnull__(pTHX_1)
-                       __attribute__nonnull__(pTHX_2);
+                       __attribute__nonnull__(pTHX_2)
+                       __attribute__nonnull__(pTHX_3);
  #define PERL_ARGS_ASSERT_INVLIST_INTERSECTION  \
-       assert(a); assert(b)
+       assert(a); assert(b); assert(i)
  
  PERL_STATIC_INLINE UV  S_invlist_len(pTHX_ SV* const invlist)
                         __attribute__warn_unused_result__
@@ -6091,12 +6091,12 @@ PERL_STATIC_INLINE void S_invlist_trim(pTHX_ SV* const invlist)
  #define PERL_ARGS_ASSERT_INVLIST_TRIM  \
         assert(invlist)
  
-STATIC SV*     S_invlist_union(pTHX_ SV* const a, SV* const b)
-                       __attribute__warn_unused_result__
+STATIC void    S_invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
                         __attribute__nonnull__(pTHX_1)
-                       __attribute__nonnull__(pTHX_2);
+                       __attribute__nonnull__(pTHX_2)
+                       __attribute__nonnull__(pTHX_3);
  #define PERL_ARGS_ASSERT_INVLIST_UNION \
-       assert(a); assert(b)
+       assert(a); assert(b); assert(output)
  
  STATIC U32     S_join_exact(pTHX_ struct RExC_state_t *pRExC_state, regnode *scan, I32 *min, U32 flags, regnode *val, U32 depth)
                         __attribute__nonnull__(pTHX_1)
diff --git a/regcomp.c b/regcomp.c

index 8475d9dac957da509ff0ad86d542983b8d86f6dd..b256203ae6d4aa0c77fbda84eaf00ad97c70ac45 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -6023,10 +6023,12 @@ Perl__append_range_to_invlist(pTHX_ SV* const invlist, const UV start, const UV
  }
  #endif
  
-STATIC SV*
-S_invlist_union(pTHX_ SV* const a, SV* const b)
+STATIC void
+S_invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
  {
-    /* Return a new inversion list which is the union of two inversion lists.
+    /* Take the union of two inversion lists and point 'result' to it.  If
+     * 'result' on input points to one of the two lists, the reference count to
+     * that list will be decremented.
       * The basis for this comes from "Unicode Demystified" Chapter 13 by
       * Richard Gillam, published by Addison-Wesley, and explained at some
       * length there.  The preface says to incorporate its examples into your
@@ -6037,7 +6039,8 @@ S_invlist_union(pTHX_ SV* const a, SV* const b)
       * XXX A potential performance improvement is to keep track as we go along
       * if only one of the inputs contributes to the result, meaning the other
       * is a subset of that one.  In that case, we can skip the final copy and
-     * return the larger of the input lists */
+     * return the larger of the input lists, but then outside code might need
+     * to keep track of whether to free the input list or not */
  
      UV* array_a = invlist_array(a);   /* a's array */
      UV* array_b = invlist_array(b);
@@ -6171,17 +6174,25 @@ S_invlist_union(pTHX_ SV* const a, SV* const b)
         }
      }
  
-    return u;
+    /*  We may be removing a reference to one of the inputs */
+    if (&a == output || &b == output) {
+       SvREFCNT_dec(*output);
+    }
+
+    *output = u;
+    return;
  }
  
-STATIC SV*
-S_invlist_intersection(pTHX_ SV* const a, SV* const b)
+STATIC void
+S_invlist_intersection(pTHX_ SV* const a, SV* const b, SV** i)
  {
-    /* Return the intersection of two inversion lists.  The basis for this
-     * comes from "Unicode Demystified" Chapter 13 by Richard Gillam, published
-     * by Addison-Wesley, and explained at some length there.  The preface says
-     * to incorporate its examples into your code at your own risk.  In fact,
-     * it had bugs
+    /* Take the intersection of two inversion lists and point 'i' to it.  If
+     * 'i' on input points to one of the two lists, the reference count to that
+     * list will be decremented.
+     * The basis for this comes from "Unicode Demystified" Chapter 13 by
+     * Richard Gillam, published by Addison-Wesley, and explained at some
+     * length there.  The preface says to incorporate its examples into your
+     * code at your own risk.  In fact, it had bugs
       *
       * The algorithm is like a merge sort, and is essentially the same as the
       * union above
@@ -6309,7 +6320,13 @@ S_invlist_intersection(pTHX_ SV* const a, SV* const b)
         }
      }
  
-    return r;
+    /*  We may be removing a reference to one of the inputs */
+    if (&a == i || &b == i) {
+       SvREFCNT_dec(*i);
+    }
+
+    *i = r;
+    return;
  }
  
  STATIC SV*
@@ -6347,7 +6364,7 @@ S_add_range_to_invlist(pTHX_ SV* invlist, const UV start, const UV end)
      range_invlist = _new_invlist(2);
      _append_range_to_invlist(range_invlist, start, end);
  
-    added_invlist = invlist_union(invlist, range_invlist);
+    invlist_union(invlist, range_invlist, &added_invlist);
  
      /* The passed in list can be freed, as well as our temporary */
      invlist_destroy(range_invlist);
@@ -10076,7 +10093,7 @@ parseit:
             * be checked.  Get the intersection of this class and all the
             * possible characters that are foldable.  This can quickly narrow
             * down a large class */
-       fold_intersection = invlist_intersection(PL_utf8_foldable, nonbitmap);
+       invlist_intersection(PL_utf8_foldable, nonbitmap, &fold_intersection);
  
         /* Now look at the foldable characters in this class individually */
         fold_list = invlist_array(fold_intersection);
@@ -10220,9 +10237,7 @@ parseit:
      /* Combine the two lists into one. */
      if (l1_fold_invlist) {
         if (nonbitmap) {
-           SV* temp = invlist_union(nonbitmap, l1_fold_invlist);
-           invlist_destroy(nonbitmap);
-           nonbitmap = temp;
+           invlist_union(nonbitmap, l1_fold_invlist, &nonbitmap);
             invlist_destroy(l1_fold_invlist);
         }
         else {
author	Karl Williamson <public@khwilliamson.com>
	Fri, 27 May 2011 22:43:28 +0000 (16:43 -0600)
committer	Karl Williamson <public@khwilliamson.com>
	Sun, 3 Jul 2011 20:05:45 +0000 (14:05 -0600)
embed.fnc		patch \| blob \| history
embed.h		patch \| blob \| history
proto.h		patch \| blob \| history
regcomp.c		patch \| blob \| history