Implement C++17 string searchers.
authorVille Voutilainen <ville.voutilainen@gmail.com>
Mon, 12 Sep 2016 15:48:07 +0000 (18:48 +0300)
committerVille Voutilainen <ville@gcc.gnu.org>
Mon, 12 Sep 2016 15:48:07 +0000 (18:48 +0300)
* include/std/functional: (unordered_map, vector): New includes
in C++17 mode.
(array, bits/stl_algo.h): Likewise.
(default_searcher, __boyer_moore_map_base): New.
(__boyer_moore_array_base, __is_std_equal_to): Likewise.
(__boyer_moore_base_t, boyer_moore_searcher): Likewise.
(boyer_moore_horspool_searcher, make_default_searcher): Likewise.
(make_boyer_moore_searcher): Likewise.
(make_boyer_moore_horspool_searcher): Likewise.
* testsuite/20_util/function_objects/searchers.cc: New.

From-SVN: r240093

libstdc++-v3/ChangeLog
libstdc++-v3/include/std/functional
libstdc++-v3/testsuite/20_util/function_objects/searchers.cc [new file with mode: 0644]

index 7d71749..594cf13 100644 (file)
@@ -1,3 +1,17 @@
+2016-09-12  Ville Voutilainen  <ville.voutilainen@gmail.com>
+
+       Implement C++17 string searchers.
+       * include/std/functional: (unordered_map, vector): New includes
+       in C++17 mode.
+       (array, bits/stl_algo.h): Likewise.
+       (default_searcher, __boyer_moore_map_base): New.
+       (__boyer_moore_array_base, __is_std_equal_to): Likewise.
+       (__boyer_moore_base_t, boyer_moore_searcher): Likewise.
+       (boyer_moore_horspool_searcher, make_default_searcher): Likewise.
+       (make_boyer_moore_searcher): Likewise.
+       (make_boyer_moore_horspool_searcher): Likewise.
+       * testsuite/20_util/function_objects/searchers.cc: New.
+
 2016-09-12  Matthew Wahab  <matthew.wahab@arm.com>
 
        * testsuite/22_locale/codecvt/codecvt_utf16/requirements/1.cc:
index 05d4282..3f0c1a8 100644 (file)
 #include <bits/functional_hash.h>
 #include <bits/invoke.h>
 
+#if __cplusplus > 201402L
+#include <unordered_map>
+#include <vector>
+#include <array>
+#include <bits/stl_algo.h>
+#endif
+
 namespace std _GLIBCXX_VISIBILITY(default)
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
@@ -2197,6 +2204,308 @@ _GLIBCXX_MEM_FN_TRAITS(&&, false_type, true_type)
       return _Not_fn<std::decay_t<_Fn>>{std::forward<_Fn>(__fn)};
     }
 
+  // Searchers
+
+  template<typename _ForwardIterator1, typename _BinaryPredicate = equal_to<>>
+    class default_searcher
+    {
+    public:
+      default_searcher(_ForwardIterator1 __pat_first,
+                      _ForwardIterator1 __pat_last,
+                      _BinaryPredicate __pred = _BinaryPredicate())
+      : _M_m(__pat_first, __pat_last, std::move(__pred))
+      { }
+
+      template<typename _ForwardIterator2>
+       _ForwardIterator2
+       operator()(_ForwardIterator2 __first, _ForwardIterator2 __last) const
+       {
+         return std::search(__first, __last,
+                            std::get<0>(_M_m), std::get<1>(_M_m),
+                            std::get<2>(_M_m));
+       }
+
+    private:
+      std::tuple<_ForwardIterator1, _ForwardIterator1, _BinaryPredicate> _M_m;
+    };
+
+  template<typename _Key, typename _Tp, typename _Hash, typename _Pred>
+    struct __boyer_moore_map_base
+    {
+      template<typename _RAIter>
+       __boyer_moore_map_base(_RAIter __pat, size_t __patlen,
+                              _Hash&& __hf, _Pred&& __pred)
+       : _M_bad_char{ __patlen, std::move(__hf), std::move(__pred) }
+       {
+         if (__patlen > 0)
+           for (__diff_type __i = 0; __i < __patlen - 1; ++__i)
+             _M_bad_char[__pat[__i]] = __patlen - 1 - __i;
+       }
+
+      using __diff_type = _Tp;
+
+      __diff_type
+      _M_lookup(_Key __key, __diff_type __not_found) const
+      {
+       auto __iter = _M_bad_char.find(__key);
+       if (__iter == _M_bad_char.end())
+         return __not_found;
+       return __iter->second;
+      }
+
+      _Pred
+      _M_pred() const { return _M_bad_char.key_eq(); }
+
+      std::unordered_map<_Key, _Tp, _Hash, _Pred> _M_bad_char;
+    };
+
+  template<typename _Tp, size_t _Len, typename _Pred>
+    struct __boyer_moore_array_base
+    {
+      template<typename _RAIter, typename _Unused>
+       __boyer_moore_array_base(_RAIter __pat, size_t __patlen,
+                                _Unused&&, _Pred&& __pred)
+       : _M_bad_char{ std::array<_Tp, _Len>{}, std::move(__pred) }
+       {
+         std::get<0>(_M_bad_char).fill(__patlen);
+         if (__patlen > 0)
+           for (__diff_type __i = 0; __i < __patlen - 1; ++__i)
+             {
+               auto __ch = __pat[__i];
+               using _UCh = std::make_unsigned_t<decltype(__ch)>;
+               auto __uch = static_cast<_UCh>(__ch);
+               std::get<0>(_M_bad_char)[__uch] = __patlen - 1 - __i;
+             }
+       }
+
+      using __diff_type = _Tp;
+
+      template<typename _Key>
+       __diff_type
+       _M_lookup(_Key __key, __diff_type __not_found) const
+       {
+         auto __ukey = static_cast<std::make_unsigned_t<_Key>>(__key);
+         if (__ukey >= _Len)
+           return __not_found;
+         return std::get<0>(_M_bad_char)[__ukey];
+       }
+
+      const _Pred&
+      _M_pred() const { return std::get<1>(_M_bad_char); }
+
+      std::tuple<std::array<_Tp, _Len>, _Pred> _M_bad_char;
+    };
+
+  template<typename _Pred>
+    struct __is_std_equal_to : std::false_type { };
+
+  template<>
+    struct __is_std_equal_to<std::equal_to<void>> : std::true_type { };
+
+  // Use __boyer_moore_array_base when pattern consists of narrow characters
+  // and uses std::equal_to as the predicate.
+  template<typename _RAIter, typename _Hash, typename _Pred,
+           typename _Val = typename iterator_traits<_RAIter>::value_type,
+          typename _Diff = typename iterator_traits<_RAIter>::difference_type>
+    using __boyer_moore_base_t
+      = std::conditional_t<sizeof(_Val) == 1 && is_integral<_Val>::value
+                          && __is_std_equal_to<_Pred>::value,
+                          __boyer_moore_array_base<_Diff, 256, _Pred>,
+                          __boyer_moore_map_base<_Val, _Diff, _Hash, _Pred>>;
+
+  template<typename _RAIter, typename _Hash
+            = std::hash<typename std::iterator_traits<_RAIter>::value_type>,
+          typename _BinaryPredicate = std::equal_to<>>
+    class boyer_moore_searcher
+    : __boyer_moore_base_t<_RAIter, _Hash, _BinaryPredicate>
+    {
+      using _Base = __boyer_moore_base_t<_RAIter, _Hash, _BinaryPredicate>;
+      using typename _Base::__diff_type;
+
+    public:
+      boyer_moore_searcher(_RAIter __pat_first, _RAIter __pat_last,
+                          _Hash __hf = _Hash(),
+                          _BinaryPredicate __pred = _BinaryPredicate());
+
+      template<typename _RandomAccessIterator2>
+       _RandomAccessIterator2
+       operator()(_RandomAccessIterator2 __first,
+                  _RandomAccessIterator2 __last) const;
+
+    private:
+      bool
+      _M_is_prefix(_RAIter __word, __diff_type __len,
+                  __diff_type __pos)
+      {
+       const auto& __pred = this->_M_pred();
+       __diff_type __suffixlen = __len - __pos;
+       for (__diff_type __i = 0; __i < __suffixlen; ++__i)
+         if (!__pred(__word[__i], __word[__pos + __i]))
+           return false;
+       return true;
+      }
+
+      __diff_type
+      _M_suffix_length(_RAIter __word, __diff_type __len,
+                      __diff_type __pos)
+      {
+       const auto& __pred = this->_M_pred();
+       __diff_type __i = 0;
+       while (__pred(__word[__pos - __i], __word[__len - 1 - __i])
+              && __i < __pos)
+         {
+           ++__i;
+         }
+       return __i;
+      }
+
+      template<typename _Tp>
+       __diff_type
+       _M_bad_char_shift(_Tp __c) const
+       { return this->_M_lookup(__c, _M_pat_end - _M_pat); }
+
+      _RAIter _M_pat;
+      _RAIter _M_pat_end;
+      std::vector<__diff_type> _M_good_suffix;
+    };
+
+  template<typename _RAIter, typename _Hash
+            = std::hash<typename std::iterator_traits<_RAIter>::value_type>,
+          typename _BinaryPredicate = std::equal_to<>>
+    class boyer_moore_horspool_searcher
+    : __boyer_moore_base_t<_RAIter, _Hash, _BinaryPredicate>
+    {
+      using _Base = __boyer_moore_base_t<_RAIter, _Hash, _BinaryPredicate>;
+      using typename _Base::__diff_type;
+
+    public:
+      boyer_moore_horspool_searcher(_RAIter __pat,
+                                   _RAIter __pat_end,
+                                   _Hash __hf = _Hash(),
+                                   _BinaryPredicate __pred
+                                   = _BinaryPredicate())
+      : _Base(__pat, __pat_end - __pat, std::move(__hf), std::move(__pred)),
+       _M_pat(__pat), _M_pat_end(__pat_end)
+      { }
+
+      template<typename _RandomAccessIterator2>
+       _RandomAccessIterator2
+       operator()(_RandomAccessIterator2 __first,
+                  _RandomAccessIterator2 __last) const
+       {
+         const auto& __pred = this->_M_pred();
+         auto __patlen = _M_pat_end - _M_pat;
+         if (__patlen == 0)
+           return __first;
+         auto __len = __last - __first;
+         while (__len >= __patlen)
+           {
+             for (auto __scan = __patlen - 1;
+                  __pred(__first[__scan], _M_pat[__scan]); --__scan)
+               if (__scan == 0)
+                 return __first;
+             auto __shift = _M_bad_char_shift(__first[__patlen - 1]);
+             __len -= __shift;
+             __first += __shift;
+           }
+         return __last;
+       }
+
+    private:
+      template<typename _Tp>
+       __diff_type
+       _M_bad_char_shift(_Tp __c) const
+       { return this->_M_lookup(__c, _M_pat_end - _M_pat); }
+
+      _RAIter _M_pat;
+      _RAIter _M_pat_end;
+    };
+
+  /// Generator function for default_searcher
+  template<typename _ForwardIterator,
+          typename _BinaryPredicate = std::equal_to<>>
+    inline default_searcher<_ForwardIterator, _BinaryPredicate>
+    make_default_searcher(_ForwardIterator __pat_first,
+                         _ForwardIterator __pat_last,
+                         _BinaryPredicate __pred = _BinaryPredicate())
+    { return { __pat_first, __pat_last, __pred }; }
+
+  /// Generator function for boyer_moore_searcher
+  template<typename _RAIter, typename _Hash
+            = std::hash<typename std::iterator_traits<_RAIter>::value_type>,
+          typename _BinaryPredicate = equal_to<>>
+    inline boyer_moore_searcher<_RAIter, _Hash, _BinaryPredicate>
+    make_boyer_moore_searcher(_RAIter __pat_first, _RAIter __pat_last,
+                             _Hash __hf = _Hash(),
+                             _BinaryPredicate __pred = _BinaryPredicate())
+    { return { __pat_first, __pat_last, std::move(__hf), std::move(__pred) }; }
+
+  /// Generator function for boyer_moore_horspool_searcher
+  template<typename _RAIter, typename _Hash
+            = std::hash<typename std::iterator_traits<_RAIter>::value_type>,
+          typename _BinaryPredicate = equal_to<>>
+    inline boyer_moore_horspool_searcher<_RAIter, _Hash, _BinaryPredicate>
+    make_boyer_moore_horspool_searcher(_RAIter __pat_first, _RAIter __pat_last,
+                                      _Hash __hf = _Hash(),
+                                      _BinaryPredicate __pred
+                                      = _BinaryPredicate())
+    { return { __pat_first, __pat_last, std::move(__hf), std::move(__pred) }; }
+
+  template<typename _RAIter, typename _Hash, typename _BinaryPredicate>
+    boyer_moore_searcher<_RAIter, _Hash, _BinaryPredicate>::
+    boyer_moore_searcher(_RAIter __pat, _RAIter __pat_end,
+                        _Hash __hf, _BinaryPredicate __pred)
+    : _Base(__pat, __pat_end - __pat, std::move(__hf), std::move(__pred)),
+      _M_pat(__pat), _M_pat_end(__pat_end), _M_good_suffix(__pat_end - __pat)
+    {
+      auto __patlen = __pat_end - __pat;
+      if (__patlen == 0)
+       return;
+      __diff_type __last_prefix = __patlen - 1;
+      for (__diff_type __p = __patlen - 1; __p >= 0; --__p)
+       {
+         if (_M_is_prefix(__pat, __patlen, __p + 1))
+           __last_prefix = __p + 1;
+         _M_good_suffix[__p] = __last_prefix + (__patlen - 1 - __p);
+       }
+      for (__diff_type __p = 0; __p < __patlen - 1; ++__p)
+       {
+         auto __slen = _M_suffix_length(__pat, __patlen, __p);
+         auto __pos = __patlen - 1 - __slen;
+         if (!__pred(__pat[__p - __slen], __pat[__pos]))
+           _M_good_suffix[__pos] = __patlen - 1 - __p + __slen;
+       }
+    }
+
+  template<typename _RAIter, typename _Hash, typename _BinaryPredicate>
+  template<typename _RandomAccessIterator2>
+    _RandomAccessIterator2
+    boyer_moore_searcher<_RAIter, _Hash, _BinaryPredicate>::
+    operator()(_RandomAccessIterator2 __first,
+              _RandomAccessIterator2 __last) const
+    {
+      auto __patlen = _M_pat_end - _M_pat;
+      if (__patlen == 0)
+       return __first;
+      const auto& __pred = this->_M_pred();
+      __diff_type __i = __patlen - 1;
+      auto __stringlen = __last - __first;
+      while (__i < __stringlen)
+       {
+         __diff_type __j = __patlen - 1;
+         while (__j >= 0 && __pred(__first[__i], _M_pat[__j]))
+           {
+             --__i;
+             --__j;
+           }
+         if (__j < 0)
+           return __first + __i + 1;
+         __i += std::max(_M_bad_char_shift(__first[__i]),
+                         _M_good_suffix[__j]);
+       }
+      return __last;
+    }
+
 #endif
 
 _GLIBCXX_END_NAMESPACE_VERSION
diff --git a/libstdc++-v3/testsuite/20_util/function_objects/searchers.cc b/libstdc++-v3/testsuite/20_util/function_objects/searchers.cc
new file mode 100644 (file)
index 0000000..62a636a
--- /dev/null
@@ -0,0 +1,138 @@
+// Copyright (C) 2014-2016 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// { dg-options "-std=gnu++17" }
+
+#include <functional>
+#include <cstring>
+#ifdef _GLIBCXX_USE_WCHAR_T
+# include <cwchar>
+#endif
+#include <algorithm>
+#include <testsuite_hooks.h>
+
+using std::make_default_searcher;
+using std::make_boyer_moore_searcher;
+using std::make_boyer_moore_horspool_searcher;
+
+void
+test01()
+{
+  const char s[] = { 'a', (char)-97, 'a', '\0' };
+  const char* needles[] = {
+    s, "", "a", "aa", "aaa", "ab", "cd", "abcd", "abcdabcd", "abcabcd"
+  };
+  const char* haystacks[] = {
+    s, "", "a", "aa", "aaa", "ab", "cd", "abcd", "abcdabcd", "abcabcd",
+    "aaaaaaa", "aabaa", "aaacab", "cdabcdab", "abcdabcd", "xyzabcdxyz"
+  };
+
+  for (auto n : needles)
+  {
+    auto ne = n + std::strlen(n);
+    auto d = make_default_searcher(n, ne);
+    auto bm = make_boyer_moore_searcher(n, ne);
+    auto bmh = make_boyer_moore_horspool_searcher(n, ne);
+    for (auto h : haystacks)
+    {
+      auto he = h + std::strlen(h);
+      auto res = std::search(h, he, n, ne);
+      auto d_res = d(h, he);
+      VERIFY( d_res == res );
+      auto bm_res = bm(h, he);
+      VERIFY( bm_res == res );
+      auto bmh_res = bmh(h, he);
+      VERIFY( bmh_res == res );
+    }
+  }
+}
+
+void
+test02()
+{
+#ifdef _GLIBCXX_USE_WCHAR_T
+  const wchar_t s[] = { L'a', (wchar_t)-97, L'a', L'\0' };
+  const wchar_t* needles[] = {
+    s, L"", L"a", L"aa", L"aaa", L"ab", L"cd", L"abcd", L"abcdabcd", L"abcabcd"
+  };
+  const wchar_t* haystacks[] = {
+    s, L"", L"a", L"aa", L"aaa", L"ab", L"cd", L"abcd", L"abcdabcd", L"abcabcd",
+    L"aaaaaaa", L"aabaa", L"aaacab", L"cdabcdab", L"abcdabcd", L"xyzabcdxyz"
+  };
+
+  for (auto n : needles)
+  {
+    auto ne = n + std::wcslen(n);
+    auto d = make_default_searcher(n, ne);
+    auto bm = make_boyer_moore_searcher(n, ne);
+    auto bmh = make_boyer_moore_horspool_searcher(n, ne);
+    for (auto h : haystacks)
+    {
+      auto he = h + std::wcslen(h);
+      auto res = std::search(h, he, n, ne);
+      auto d_res = d(h, he);
+      VERIFY( d_res == res );
+      auto bm_res = bm(h, he);
+      VERIFY( bm_res == res );
+      auto bmh_res = bmh(h, he);
+      VERIFY( bmh_res == res );
+    }
+  }
+#endif
+}
+
+void
+test03()
+{
+  // custom predicate
+  struct
+  {
+    static unsigned char
+    norm(unsigned char c) { return std::isalnum(c) ? c : '#'; }
+
+    // equality
+    bool operator()(char l, char r) const { return norm(l) == norm(r); }
+
+    // hash
+    std::size_t operator()(char c) const { return std::hash<char>{}(norm(c)); }
+  } eq;
+
+  const char* needle = " foo 123 ";
+  const char* haystack = "*****foo*123******";
+  const char* ne = needle + std::strlen(needle);
+  const char* he = haystack + std::strlen(haystack);
+
+  auto d = make_default_searcher(needle, ne, eq);
+  auto bm = make_boyer_moore_searcher(needle, ne, eq, eq);
+  auto bmh = make_boyer_moore_horspool_searcher(needle, ne, eq, eq);
+
+  auto res = std::search(haystack, he, needle, ne, eq);
+  auto d_res = d(haystack, he);
+  VERIFY( d_res == res );
+  auto bm_res = bm(haystack, he);
+  VERIFY( bm_res == res );
+  auto bmh_res = bmh(haystack, he);
+  VERIFY( bmh_res == res );
+}
+
+int
+main()
+{
+  test01();
+  test02();
+  test03();
+}