regex.h: Remove virtual class _Automaton.
authorTim Shen <timshen91@gmail.com>
Sun, 20 Oct 2013 10:07:29 +0000 (10:07 +0000)
committerTim Shen <timshen@gcc.gnu.org>
Sun, 20 Oct 2013 10:07:29 +0000 (10:07 +0000)
2013-10-20  Tim Shen  <timshen91@gmail.com>

* include/bits/regex.h: Remove virtual class _Automaton.
* include/bits/regex_automaton.h: Likewise.
* include/bits/regex.tcc: Adjust comment for policy changing.
* include/bits/regex_executor.h: Update comments of complexity.
* include/bits/regex_executor.tcc: Adjust executor choosing
policy. Now DFS executor is the default one.
* testsuite/util/testsuite_regex.h (regex_match_debug,
regex_search_debug): Adjust for policy changing.

From-SVN: r203875

libstdc++-v3/ChangeLog
libstdc++-v3/include/bits/regex.h
libstdc++-v3/include/bits/regex.tcc
libstdc++-v3/include/bits/regex_automaton.h
libstdc++-v3/include/bits/regex_executor.h
libstdc++-v3/include/bits/regex_executor.tcc
libstdc++-v3/testsuite/util/testsuite_regex.h

index 7717da0..0957953 100644 (file)
@@ -1,3 +1,14 @@
+2013-10-20  Tim Shen  <timshen91@gmail.com>
+
+       * include/bits/regex.h: Remove virtual class _Automaton.
+       * include/bits/regex_automaton.h: Likewise.
+       * include/bits/regex.tcc: Adjust comment for policy changing.
+       * include/bits/regex_executor.h: Update comments of complexity.
+       * include/bits/regex_executor.tcc: Adjust executor choosing
+       policy. Now DFS executor is the default one.
+       * testsuite/util/testsuite_regex.h (regex_match_debug,
+       regex_search_debug): Adjust for policy changing.
+
 2013-10-20  Chris Jefferson  <chris@bubblescope.net>
            Paolo Carlini  <paolo.carlini@oracle.com>
 
index 5d1a8f4..32d38b4 100644 (file)
@@ -727,7 +727,7 @@ _GLIBCXX_END_NAMESPACE_VERSION
 #endif
 
     protected:
-      typedef std::shared_ptr<__detail::_Automaton<_Ch_type, _Rx_traits>>
+      typedef std::shared_ptr<__detail::_NFA<_Ch_type, _Rx_traits>>
        _AutomatonPtr;
 
       template<typename _BiIter, typename _Alloc,
index b3b5314..7028480 100644 (file)
@@ -38,8 +38,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   // Result of merging regex_match and regex_search.
   //
-  // __policy now can be _S_auto(auto dispatch by checking back-references)
-  // and _S_force_dfs(just use _DFSExecutor).
+  // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
+  // the other one if possible, for test purpose).
   //
   // That __match_mode is true means regex_match, else regex_search.
   template<typename _BiIter, typename _Alloc,
index cb94499..4fb5556 100644 (file)
@@ -104,31 +104,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif
     };
 
-  /// Base class for, um, automata.  Could be an NFA or a DFA.  Your choice.
-  template<typename _CharT, typename _TraitsT>
-    class _Automaton
-    {
-    public:
-      typedef size_t _SizeT;
-
-    public:
-      virtual
-      ~_Automaton()
-      { }
-
-      virtual _SizeT
-      _M_sub_count() const = 0;
-
-#ifdef _GLIBCXX_DEBUG
-      virtual std::ostream&
-      _M_dot(std::ostream& __ostr) const = 0;
-#endif
-    };
-
   template<typename _CharT, typename _TraitsT>
     class _NFA
-    : public _Automaton<_CharT, _TraitsT>,
-      public std::vector<_State<_CharT, _TraitsT>>
+    : public std::vector<_State<_CharT, _TraitsT>>
     {
     public:
       typedef _State<_CharT, _TraitsT>            _StateT;
index 018b649..6d9a83e 100644 (file)
@@ -179,8 +179,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   // TODO: This approach is exponentially slow for certain input.
   //       Try to compile the NFA to a DFA.
   //
-  // Time complexity: exponential
-  // Space complexity: O(__end - __begin)
+  // Time complexity: o(match_length), O(2^(_M_nfa->size()))
+  // Space complexity: \theta(match_results.size() + match_length)
   template<typename _BiIter, typename _Alloc,
     typename _CharT, typename _TraitsT>
     class _DFSExecutor
@@ -200,16 +200,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                   const _RegexT&  __re,
                   _FlagT          __flags)
       : _BaseT(__begin, __end, __results, __re, __flags),
-      _M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
-            (__re._M_automaton)),
-      _M_start_state(_M_nfa._M_start())
+      _M_nfa(__re._M_automaton), _M_start_state(_M_nfa->_M_start())
       { }
 
     private:
       void
       _M_init(_BiIter __cur)
       {
-       _M_cur_results.resize(_M_nfa._M_sub_count() + 2);
+       _M_cur_results.resize(_M_nfa->_M_sub_count() + 2);
        this->_M_current = __cur;
       }
 
@@ -235,9 +233,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       }
 
       // To record current solution.
-      _ResultsVec     _M_cur_results;
-      const _NFAT&    _M_nfa;
-      _StateIdT       _M_start_state;
+      std::shared_ptr<_NFAT> _M_nfa;
+      _ResultsVec            _M_cur_results;
+      _StateIdT              _M_start_state;
     };
 
   // Like the DFS approach, it try every possible state transition; Unlike DFS,
@@ -251,8 +249,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   // matching head. When states transit, solutions will be compared and
   // deduplicated(based on which greedy mode we have).
   //
-  // Time complexity: O((__end - __begin) * _M_nfa.size())
-  // Space complexity: O(_M_nfa.size() * _M_nfa.mark_count())
+  // Time complexity: o(match_length * (quantifier_number
+  //                                    + match_results.size())
+  //                  O(match_length * _M_nfa->size()
+  //                    * (quantifier_number + match_results.size())
+  // Space complexity: o(quantifier_number + match_results.size())
+  //                   O(_M_nfa->size()
+  //                     * (quantifier_number + match_results.size())
   template<typename _BiIter, typename _Alloc,
     typename _CharT, typename _TraitsT>
     class _BFSExecutor
@@ -382,11 +385,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                   const _RegexT&  __re,
                   _FlagT          __flags)
       : _BaseT(__begin, __end, __results, __re, __flags),
-      _M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
-            (__re._M_automaton)),
-      _M_match_stack(_M_nfa.size()),
-      _M_stack(_M_nfa.size()),
-      _M_start_state(_M_nfa._M_start())
+      _M_nfa(__re._M_automaton), _M_match_stack(_M_nfa->size()),
+      _M_stack(_M_nfa->size()), _M_start_state(_M_nfa->_M_start())
       { }
 
     private:
@@ -398,7 +398,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        _ResultsVec& __res(this->_M_results);
        _M_covered[this->_M_start_state] =
          _ResultsPtr(new _ResultsEntry(__res.size(),
-                                       _M_nfa._M_quant_count));
+                                       _M_nfa->_M_quant_count));
        _M_stack._M_push(this->_M_start_state);
       }
 
@@ -428,7 +428,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                                                        this->_M_flags));
       }
 
-      const _NFAT&                     _M_nfa;
+      std::shared_ptr<_NFAT>           _M_nfa;
       std::map<_StateIdT, _ResultsPtr> _M_covered;
       _TodoList                        _M_match_stack;
       _TodoList                        _M_stack;
index b875573..7c084ad 100644 (file)
  *  Do not attempt to use it directly. @headername{regex}
  */
 
+// See below __get_executor to get what this is talking about. The default
+// value 1 indicated a conservative optimization without giving up worst case
+// performance.
+#ifndef _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT
+#define _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT 1
+#endif
+
 namespace std _GLIBCXX_VISIBILITY(default)
 {
 namespace __detail
@@ -60,7 +67,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     _M_dfs(_StateIdT __i)
     {
       auto& __current = this->_M_current;
-      const auto& __state = _M_nfa[__i];
+      const auto& __state = (*_M_nfa)[__i];
       bool __ret = false;
       switch (__state._M_opcode)
        {
@@ -216,7 +223,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        {
          auto __u = _M_stack._M_pop();
          _GLIBCXX_DEBUG_ASSERT(_M_covered.count(__u));
-         const auto& __state = _M_nfa[__u];
+         const auto& __state = (*_M_nfa)[__u];
 
          // Can be implemented using method, but there will be too many
          // arguments. I would use macro function before C++11, but lambda is
@@ -314,7 +321,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       while (!_M_match_stack._M_empty())
        {
          auto __u = _M_match_stack._M_pop();
-         const auto& __state = _M_nfa[__u];
+         const auto& __state = (*_M_nfa)[__u];
          auto& __cu = _M_covered[__u];
          if (__state._M_matches(*this->_M_current)
              && (__next.count(__state._M_next) == 0
@@ -333,7 +340,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     _M_includes_some()
     {
       bool __succ = false;
-      for (auto __u : _M_nfa._M_final_states())
+      for (auto __u : _M_nfa->_M_final_states())
        if (_M_covered.count(__u))
          {
            __succ = true;
@@ -380,8 +387,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     }
 
   enum class _RegexExecutorPolicy : int
-    { _S_auto, _S_force_dfs };
+    { _S_auto, _S_alternate };
 
+  // This function decide which executor to use under given circumstances.
+  // The _S_auto policy now is the following: if a NFA has no back-references
+  // and has more than _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT quantifiers
+  // (*, +, ?), the _BFSExecutor will be used, other wise _DFSExecutor. This is
+  // because _DFSExecutor has a exponential upper bound, but better best-case
+  // performace. Meanwhile, _BFSExecutor can effectively prevent from
+  // exponential-long time matching (which must contains many quantifiers), but
+  // it's slower in average.
+  //
+  // For simple regex, _BFSExecutor could be 2 or more times slower than
+  // _DFSExecutor.
+  //
+  // Of course, _BFSExecutor cannot handle back-references.
   template<typename _BiIter, typename _Alloc,
     typename _CharT, typename _TraitsT,
     _RegexExecutorPolicy __policy>
@@ -396,12 +416,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        _ExecutorPtr;
       typedef _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _DFSExecutorT;
       typedef _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _BFSExecutorT;
-      auto __p = std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
-       (__re._M_automaton);
-      if (__policy == _RegexExecutorPolicy::_S_force_dfs
-         || (__policy == _RegexExecutorPolicy::_S_auto && __p->_M_has_backref))
-       return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, __re, __flags));
-      return _ExecutorPtr(new _BFSExecutorT(__b, __e, __m, __re, __flags));
+      if (!__re._M_automaton->_M_has_backref
+         && (__policy == _RegexExecutorPolicy::_S_alternate
+             || __re._M_automaton->_M_quant_count
+               > _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT))
+       return _ExecutorPtr(new _BFSExecutorT(__b, __e, __m, __re, __flags));
+      return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, __re, __flags));
     }
 
 _GLIBCXX_END_NAMESPACE_VERSION
index 17be78d..bc0f3d5 100644 (file)
@@ -148,7 +148,7 @@ namespace __gnu_test
        (__s, __e, __m, __re, __flags);
       match_results<_Bi_iter, _Alloc> __mm;
       auto __res2 = __regex_algo_impl<_Bi_iter, _Alloc, _Ch_type, _Rx_traits,
-          _RegexExecutorPolicy::_S_force_dfs, true>
+          _RegexExecutorPolicy::_S_alternate, true>
        (__s, __e, __mm, __re, __flags);
       if (__res1 == __res2 && __m == __mm)
        return __res1;
@@ -234,7 +234,7 @@ namespace __gnu_test
         (__s, __e, __m, __re, __flags);
       match_results<_Bi_iter, _Alloc> __mm;
       auto __res2 = __regex_algo_impl<_Bi_iter, _Alloc, _Ch_type, _Rx_traits,
-          _RegexExecutorPolicy::_S_force_dfs, false>
+          _RegexExecutorPolicy::_S_alternate, false>
         (__s, __e, __mm, __re, __flags);
       if (__res1 == __res2 && __m == __mm)
         return __res1;