Reimplment matcher using Depth-first search(backtracking).
authorTim Shen <timshen91@gmail.com>
Wed, 24 Jul 2013 14:39:54 +0000 (14:39 +0000)
committerTim Shen <timshen@gcc.gnu.org>
Wed, 24 Jul 2013 14:39:54 +0000 (14:39 +0000)
2013-07-24  Tim Shen  <timshen91@gmail.com>

Reimplment matcher using Depth-first search(backtracking).
PR libstdc++/53622
PR libstdc++/57173
* include/bits/regex.h: regex_match() and regex_search().
* include/bits/regex_cursor.h: Fix _M_set_pos().
* include/bits/regex_grep_matcher.h: add _M_dfs_match().
* include/bits/regex_grep_matcher.tcc: Implement it.
* testsuite/28_regex/algorithms/regex_match/extended/string_group_01.cc:
New.
* testsuite/28_regex/algorithms/regex_match/extended/string_group_02.cc:
New.
* testsuite/28_regex/algorithms/regex_search/basic/string_01.cc:
Remove xfail.

From-SVN: r201213

libstdc++-v3/ChangeLog
libstdc++-v3/include/bits/regex.h
libstdc++-v3/include/bits/regex_cursor.h
libstdc++-v3/include/bits/regex_grep_matcher.h
libstdc++-v3/include/bits/regex_grep_matcher.tcc
libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc [new file with mode: 0644]
libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/57173.cc [new file with mode: 0644]
libstdc++-v3/testsuite/28_regex/algorithms/regex_search/basic/string_01.cc

index ab21abe..45dc945 100644 (file)
@@ -1,3 +1,19 @@
+2013-07-24  Tim Shen  <timshen91@gmail.com>
+
+       Reimplment matcher using Depth-first search(backtracking).
+       PR libstdc++/53622
+       PR libstdc++/57173
+       * include/bits/regex.h: regex_match() and regex_search().
+       * include/bits/regex_cursor.h: Fix _M_set_pos().
+       * include/bits/regex_grep_matcher.h: add _M_dfs_match().
+       * include/bits/regex_grep_matcher.tcc: Implement it.
+       * testsuite/28_regex/algorithms/regex_match/extended/string_group_01.cc:
+       New.
+       * testsuite/28_regex/algorithms/regex_match/extended/string_group_02.cc:
+       New.
+       * testsuite/28_regex/algorithms/regex_search/basic/string_01.cc:
+       Remove xfail.
+
 2013-07-23  Tim Shen  <timshen91@gmail.com>
 
        Implement regex_iterator and regex_token_iterator.
index 9848f71..64a1807 100644 (file)
@@ -2185,8 +2185,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       __detail::_SpecializedCursor<_Bi_iter> __cs(__s, __e);
       __detail::_SpecializedResults<_Bi_iter, _Alloc> __r(__sz, __cs, __m);
       __detail::_Grep_matcher __matcher(__cs, __r, __a, __flags);
-      __matcher._M_match();
-      return __m[0].matched;
+      return __matcher._M_dfs_match();
     }
 
   /**
@@ -2338,8 +2337,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
         {
           __detail::_SpecializedCursor<_Bi_iter> __curs(__cur, __last);
           __detail::_Grep_matcher __matcher(__curs, __r, __a, __flags);
-          __matcher._M_search_from_first();
-          if (__m[0].matched)
+          if (__matcher._M_dfs_search_from_first())
             {
               __r._M_set_range(__m.size(),
                                __detail::_SpecializedCursor<_Bi_iter>
index 44e9d1d..444d07a 100644 (file)
@@ -45,6 +45,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   {
     virtual ~_PatternCursor() { };
     virtual void _M_next() = 0;
+    virtual void _M_prev() = 0;
     virtual bool _M_at_end() const = 0;
   };
 
@@ -66,6 +67,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_next()
       { ++_M_c; }
 
+      void
+      _M_prev()
+      { --_M_c; }
+
       _FwdIterT
       _M_pos() const
       { return _M_c; }
index da9264d..c402c78 100644 (file)
@@ -107,7 +107,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       if (__j == 0)
        _M_results.at(__i).first = __c._M_pos();
       else
-       _M_results.at(__i).second = __c._M_pos()+1;
+        _M_results.at(__i).second = __c._M_pos();
     }
 
   /// A stack of states used in evaluating the NFA.
@@ -127,9 +127,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_pattern(__p), _M_results(__r)
     { }
 
-    void _M_match();
+    void
+    _M_match();
+
+    void
+    _M_search_from_first();
 
-    void _M_search_from_first();
+    bool
+    _M_dfs_match()
+    { return _M_dfs<true>(_M_nfa->_M_start()); }
+
+    bool
+    _M_dfs_search_from_first()
+    { return _M_dfs<false>(_M_nfa->_M_start()); }
 
   private:
     _StateSet
@@ -141,6 +151,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     _StateSet
     _M_e_closure(_StateStack& __stack, const _StateSet& __s);
 
+    template<bool __match_mode>
+      bool
+      _M_dfs(_StateIdT __i);
+
     const std::shared_ptr<_Nfa>        _M_nfa;
     _PatternCursor&                    _M_pattern;
     _Results&                          _M_results;
index 2d4a9a6..46b9817 100644 (file)
@@ -103,6 +103,56 @@ namespace __detail
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
+  // _M_dfs() take a state, along with current string cursor(_M_pattern),
+  // trying to match current state with current character.
+  // Only _S_opcode_match will consume a character.
+  // TODO: This is too slow. Try to compile the NFA to a DFA.
+  template<bool __match_mode>
+    bool _Grep_matcher::
+    _M_dfs(_StateIdT __i)
+    {
+      if (__i == _S_invalid_state_id)
+        // This is not that certain. Need deeper investigate.
+        return false;
+      const auto& __state = (*_M_nfa)[__i];
+      bool __ret = false;
+      switch (__state._M_opcode)
+        {
+        case _S_opcode_alternative:
+          // Greedy mode by default. For non-greedy mode,
+          // swap _M_alt and _M_next.
+          __ret = _M_dfs<__match_mode>(__state._M_alt)
+            || _M_dfs<__match_mode>(__state._M_next);
+          break;
+        case _S_opcode_subexpr_begin:
+          __state._M_tagger(_M_pattern, _M_results);
+          __ret = _M_dfs<__match_mode>(__state._M_next);
+          break;
+        case _S_opcode_subexpr_end:
+          __state._M_tagger(_M_pattern, _M_results);
+          __ret = _M_dfs<__match_mode>(__state._M_next);
+          _M_results._M_set_matched(__state._M_subexpr, __ret);
+          break;
+        case _S_opcode_match:
+          if (!_M_pattern._M_at_end() && __state._M_matches(_M_pattern))
+            {
+              _M_pattern._M_next();
+              __ret = _M_dfs<__match_mode>(__state._M_next);
+              _M_pattern._M_prev();
+            }
+          break;
+        case _S_opcode_accept:
+          if (__match_mode)
+            __ret = _M_pattern._M_at_end();
+          else
+            __ret = true;
+          break;
+        default:
+          _GLIBCXX_DEBUG_ASSERT( false );
+        }
+      return __ret;
+    }
+
   inline void _Grep_matcher::
   _M_match()
   {
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc
new file mode 100644 (file)
index 0000000..383ed05
--- /dev/null
@@ -0,0 +1,52 @@
+// { dg-options "-std=gnu++11" }
+
+//
+// 2013-07-23  Tim Shen <timshen91@gmail.com>
+//
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.11.2 regex_match
+// Tests Extended grouping against a std::string target.
+
+#include <regex>
+#include <testsuite_hooks.h>
+
+// libstdc++/53622
+void
+test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  std::regex  re("zxcv/(one.*)abc", std::regex::extended);
+  std::string target("zxcv/onetwoabc");
+  std::smatch m;
+
+  VERIFY( std::regex_search(target, m, re) );
+  VERIFY( m.size() == 2 );
+  VERIFY( m[0].matched == true );
+  VERIFY( std::string(m[0].first, m[0].second) == "zxcv/onetwoabc" );
+  VERIFY( m[1].matched == true );
+  VERIFY( std::string(m[1].first, m[1].second) == "onetwo" );
+}
+
+int
+main()
+{
+  test01();
+  return 0;
+}
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/57173.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/57173.cc
new file mode 100644 (file)
index 0000000..3031c43
--- /dev/null
@@ -0,0 +1,50 @@
+// { dg-options "-std=gnu++11" }
+
+//
+// 2013-07-23  Tim Shen <timshen91@gmail.com>
+//
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.11.3 regex_search
+// Tests Extended against a std::string target.
+
+#include <regex>
+#include <testsuite_hooks.h>
+#include <iostream>
+
+// libstdc++/57173
+void
+test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  std::regex  re("/asdf(/.*)", std::regex::extended);
+  std::string target("/asdf/qwerty");
+  std::smatch m;
+
+  VERIFY( std::regex_match(target, m, re) );
+  VERIFY( m.size() == 2 );
+  VERIFY( std::string(m[1].first, m[1].second) == "/qwerty");
+}
+
+int
+main()
+{
+  test01();
+  return 0;
+}
index f2a7f21..ee487f1 100644 (file)
@@ -1,5 +1,4 @@
 // { dg-options "-std=gnu++11" }
-// { dg-do run { xfail *-*-* } }
 
 //
 // 2013-07-17  Tim Shen <timshen91@gmail.com>
@@ -32,7 +31,7 @@ test01()
 {
   bool test __attribute__((unused)) = true;
 
-  std::regex  re("as(df)", std::regex::basic);
+  std::regex  re("as\\(df\\)", std::regex::basic);
   std::string target("xxasdfyy");
   std::smatch m;