+2013-08-07 Tim Shen <timshen91@gmail.com>
+
+ * include/Makefile.am: Adjust to new files.
+ * include/Makefile.in: Regenerate.
+ * include/bits/regex.h: Adjust to new interfaces.
+ * include/bits/regex_automaton.h: New.
+ * include/bits/regex_automaton.tcc: New.
+ * include/bits/regex_compiler.h: Adjust to new files.
+ * include/bits/regex_compiler.tcc: New.
+ * include/bits/regex_constants.h: Tail spaces.
+ * include/bits/regex_error.h: Likewise.
+ * include/bits/regex_executor.h: New.
+ * include/bits/regex_executor.tcc: New.
+ * include/std/regex: Adjust to new files.
+ * testsuite/28_regex/algorithms/regex_match/extended/
+ string_dispatch_01.cc: Adjust to new interfaces.
+
2013-08-07 Paolo Carlini <paolo.carlini@oracle.com>
* include/ext/atomicity.h: Add #pragma GCC system_header.
${bits_srcdir}/random.tcc \
${bits_srcdir}/range_access.h \
${bits_srcdir}/regex.h \
- ${bits_srcdir}/regex_compiler.h \
${bits_srcdir}/regex_constants.h \
- ${bits_srcdir}/regex_cursor.h \
${bits_srcdir}/regex_error.h \
- ${bits_srcdir}/regex_grep_matcher.h \
- ${bits_srcdir}/regex_grep_matcher.tcc \
- ${bits_srcdir}/regex_nfa.h \
- ${bits_srcdir}/regex_nfa.tcc \
+ ${bits_srcdir}/regex_automaton.h \
+ ${bits_srcdir}/regex_automaton.tcc \
+ ${bits_srcdir}/regex_compiler.h \
+ ${bits_srcdir}/regex_compiler.tcc \
+ ${bits_srcdir}/regex_executor.h \
+ ${bits_srcdir}/regex_executor.tcc \
${bits_srcdir}/stream_iterator.h \
${bits_srcdir}/streambuf_iterator.h \
${bits_srcdir}/shared_ptr.h \
${bits_srcdir}/random.tcc \
${bits_srcdir}/range_access.h \
${bits_srcdir}/regex.h \
- ${bits_srcdir}/regex_compiler.h \
${bits_srcdir}/regex_constants.h \
- ${bits_srcdir}/regex_cursor.h \
${bits_srcdir}/regex_error.h \
- ${bits_srcdir}/regex_grep_matcher.h \
- ${bits_srcdir}/regex_grep_matcher.tcc \
- ${bits_srcdir}/regex_nfa.h \
- ${bits_srcdir}/regex_nfa.tcc \
+ ${bits_srcdir}/regex_automaton.h \
+ ${bits_srcdir}/regex_automaton.tcc \
+ ${bits_srcdir}/regex_compiler.h \
+ ${bits_srcdir}/regex_compiler.tcc \
+ ${bits_srcdir}/regex_executor.h \
+ ${bits_srcdir}/regex_executor.tcc \
${bits_srcdir}/stream_iterator.h \
${bits_srcdir}/streambuf_iterator.h \
${bits_srcdir}/shared_ptr.h \
/**
* @brief Class regex_traits. Describes aspects of a regular expression.
*
- * A regular expression traits class that satisfies the requirements of
+ * A regular expression traits class that satisfies the requirements of
* section [28.7].
*
* The class %regex is parameterized around a set of related types and
_BaseType _M_base;
unsigned char _M_extended;
static constexpr unsigned char _S_under = 1 << 0;
- // FIXME: _S_blank should be removed in the future, when locale's complete.
+ // FIXME: _S_blank should be removed in the future,
+ // when locale's complete.
static constexpr unsigned char _S_blank = 1 << 1;
static constexpr unsigned char _S_valid_mask = 0x3;
* @brief Constructs a default traits object.
*/
regex_traits() { }
-
+
/**
* @brief Gives the length of a C-style string starting at @p __p.
*
char_type
translate(char_type __c) const
{ return __c; }
-
+
/**
* @brief Translates a character into a case-insensitive equivalent.
*
*/
char_type
translate_nocase(char_type __c) const
- {
+ {
typedef std::ctype<char_type> __ctype_type;
const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
- return __fctyp.tolower(__c);
+ return __fctyp.tolower(__c);
}
-
+
/**
* @brief Gets a sort key for a character sequence.
*
*
* Effects: if typeid(use_facet<collate<_Ch_type> >) ==
* typeid(collate_byname<_Ch_type>) and the form of the sort key
- * returned by collate_byname<_Ch_type>::transform(__first, __last)
+ * returned by collate_byname<_Ch_type>::transform(__first, __last)
* is known and can be converted into a primary sort key
* then returns that key, otherwise returns an empty string.
*
*
* @param __first beginning of the collation element name.
* @param __last one-past-the-end of the collation element name.
- *
+ *
* @returns a sequence of one or more characters that represents the
* collating element consisting of the character sequence designated by
* the iterator range [__first, __last). Returns an empty string if the
* @param __ch a character representing a digit.
* @param __radix the radix if the numeric conversion (limited to 8, 10,
* or 16).
- *
+ *
* @returns the value represented by the digit __ch in base radix if the
* character __ch is a valid digit in base radix; otherwise returns -1.
*/
int
value(_Ch_type __ch, int __radix) const;
-
+
/**
* @brief Imbues the regex_traits object with a copy of a new locale.
*
std::swap(_M_locale, __loc);
return __loc;
}
-
+
/**
* @brief Gets a copy of the current locale in use by the regex_traits
* object.
locale_type
getloc() const
{ return _M_locale; }
-
+
protected:
locale_type _M_locale;
};
if (__s == __it->first)
{
if (__icase
- && ((__it->second & (ctype_base::lower | ctype_base::upper)) != 0))
+ && ((__it->second
+ & (ctype_base::lower | ctype_base::upper)) != 0))
return ctype_base::alpha;
return __it->second;
}
{
typedef std::ctype<char_type> __ctype_type;
const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
-
+
return __fctyp.is(__f._M_base, __c)
// [[:w:]]
|| ((__f._M_extended & _RegexMask::_S_under)
* character sequence.
*/
basic_regex()
- : _M_flags(ECMAScript),
- _M_automaton(__detail::__compile<const _Ch_type*, _Rx_traits>(0, 0,
- _M_traits, _M_flags))
+ : _M_flags(ECMAScript), _M_automaton(nullptr)
{ }
/**
*/
explicit
basic_regex(const _Ch_type* __p, flag_type __f = ECMAScript)
- : _M_flags(__f),
- _M_automaton(__detail::__compile(__p, __p + _Rx_traits::length(__p),
- _M_traits, _M_flags))
+ : basic_regex(__p, __p + _Rx_traits::length(__p), __f)
{ }
/**
*
* @throws regex_error if @p __p is not a valid regular expression.
*/
- basic_regex(const _Ch_type* __p, std::size_t __len, flag_type __f)
- : _M_flags(__f),
- _M_automaton(__detail::__compile(__p, __p + __len, _M_traits, _M_flags))
+ basic_regex(const _Ch_type* __p,
+ std::size_t __len, flag_type __f = ECMAScript)
+ : basic_regex(__p, __p + __len, __f)
{ }
/**
*
* @param __rhs A @p regex object.
*/
- basic_regex(const basic_regex& __rhs)
- : _M_flags(__rhs._M_flags), _M_traits(__rhs._M_traits),
- _M_automaton(__rhs._M_automaton)
- { }
+ basic_regex(const basic_regex& __rhs) = default;
/**
* @brief Move-constructs a basic regular expression.
*/
template<typename _Ch_traits, typename _Ch_alloc>
explicit
- basic_regex(const std::basic_string<_Ch_type, _Ch_traits,
+ basic_regex(const std::basic_string<_Ch_type, _Ch_traits,
_Ch_alloc>& __s,
flag_type __f = ECMAScript)
- : _M_flags(__f),
- _M_automaton(__detail::__compile(__s.begin(), __s.end(),
- _M_traits, _M_flags))
+ : basic_regex(__s.begin(), __s.end(), __f)
{ }
/**
* expression.
*/
template<typename _InputIterator>
- basic_regex(_InputIterator __first, _InputIterator __last,
+ basic_regex(_InputIterator __first, _InputIterator __last,
flag_type __f = ECMAScript)
: _M_flags(__f),
- _M_automaton(__detail::__compile(__first, __last, _M_traits, _M_flags))
+ _M_automaton(__detail::_Compiler<_InputIterator, _Ch_type, _Rx_traits>
+ (__first, __last, _M_traits, _M_flags)._M_get_nfa())
{ }
/**
*/
basic_regex(initializer_list<_Ch_type> __l,
flag_type __f = ECMAScript)
- : _M_flags(__f),
- _M_automaton(__detail::__compile(__l.begin(), __l.end(),
- _M_traits, _M_flags))
+ : basic_regex(__l.begin(), __l.end(), __f)
{ }
/**
*/
~basic_regex()
{ }
-
+
/**
* @brief Assigns one regular expression to another.
*/
basic_regex&
operator=(const _Ch_type* __p)
{ return this->assign(__p, flags()); }
-
+
/**
* @brief Replaces a regular expression with a new one constructed from
* a string.
this->swap(__tmp);
return *this;
}
-
+
/**
* @brief The move-assignment operator.
*
{ return this->assign(string_type(__p, __len), __flags); }
/**
- * @brief Assigns a new regular expression to a regex object from a
+ * @brief Assigns a new regular expression to a regex object from a
* string containing a regular expression pattern.
*
* @param __s A string containing a regular expression pattern.
basic_regex&
assign(const basic_string<_Ch_type, _Ch_typeraits, _Alloc>& __s,
flag_type __flags = ECMAScript)
- {
+ {
basic_regex __tmp(__s, __flags);
this->swap(__tmp);
return *this;
unsigned int
mark_count() const
{ return _M_automaton->_M_sub_count() - 1; }
-
+
/**
* @brief Gets the flags used to construct the regular expression
* or in the last call to assign().
flag_type
flags() const
{ return _M_flags; }
-
+
// [7.8.5] locale
/**
* @brief Imbues the regular expression object with the given locale.
locale_type
imbue(locale_type __loc)
{ return _M_traits.imbue(__loc); }
-
+
/**
* @brief Gets the locale currently imbued in the regular expression
* object.
locale_type
getloc() const
{ return _M_traits.getloc(); }
-
+
// [7.8.6] swap
/**
* @brief Swaps the contents of two regular expression objects.
_M_dot(std::ostream& __ostr)
{ _M_automaton->_M_dot(__ostr); }
#endif
-
- const __detail::_AutomatonPtr&
- _M_get_automaton() const
- { return _M_automaton; }
protected:
- flag_type _M_flags;
- _Rx_traits _M_traits;
- __detail::_AutomatonPtr _M_automaton;
+ typedef std::shared_ptr<__detail::_Automaton<_Ch_type, _Rx_traits>>
+ _AutomatonPtr;
+
+ template<typename _BiIter, typename _Alloc,
+ typename _CharT, typename _TraitsT>
+ friend std::unique_ptr<
+ __detail::_Executor<_BiIter, _Alloc, _CharT, _TraitsT>>
+ __detail::__get_executor(_BiIter,
+ _BiIter,
+ match_results<_BiIter, _Alloc>&,
+ const basic_regex<_CharT, _TraitsT>&,
+ regex_constants::match_flag_type);
+
+ template<typename _B, typename _A, typename _C, typename _R>
+ friend bool
+ regex_match(_B, _B,
+ match_results<_B, _A>&,
+ const basic_regex<_C, _R>&,
+ regex_constants::match_flag_type);
+
+ template<typename _B, typename _A, typename _C, typename _R>
+ friend bool
+ regex_search(_B, _B,
+ match_results<_B, _A>&,
+ const basic_regex<_C, _R>&,
+ regex_constants::match_flag_type);
+
+ flag_type _M_flags;
+ _Rx_traits _M_traits;
+ _AutomatonPtr _M_automaton;
};
-
+
/** @brief Standard regular expressions. */
typedef basic_regex<char> regex;
typedef std::basic_string<value_type> string_type;
bool matched;
-
+
constexpr sub_match() : matched() { }
/**
? string_type(this->first, this->second)
: string_type();
}
-
+
/**
* @brief Gets the matching sequence as a string.
*
? string_type(this->first, this->second)
: string_type();
}
-
+
/**
* @brief Compares this and another matched sequence.
*
int
compare(const string_type& __s) const
{ return this->str().compare(__s); }
-
+
/**
* @brief Compares this sub_match to a C-style string.
*
compare(const value_type* __s) const
{ return this->str().compare(__s); }
};
-
-
+
+
/** @brief Standard regex submatch over a C-style null-terminated string. */
typedef sub_match<const char*> csub_match;
#endif
// [7.9.2] sub_match non-member operators
-
+
/**
* @brief Tests the equivalence of two regular expression submatches.
* @param __lhs First regular expression submatch.
typedef std::basic_string<char_type> string_type;
//@}
-
+
public:
/**
* @name 28.10.1 Construction, Copying, and Destruction
*/
~match_results()
{ }
-
+
//@}
// 28.10.2, state:
size_type __size = _Base_type::size();
return (__size && _Base_type::operator[](0).matched) ? __size - 2 : 0;
}
-
+
size_type
max_size() const
{ return _Base_type::max_size(); }
bool
empty() const
{ return size() == 0; }
-
+
//@}
/**
string_type
str(size_type __sub = 0) const
{ return (*this)[__sub].str(); }
-
+
/**
* @brief Gets a %sub_match reference for the match or submatch.
* @param __sub indicates the submatch.
*/
const_reference
operator[](size_type __sub) const
- {
+ {
_GLIBCXX_DEBUG_ASSERT( ready() );
return __sub < size()
? _Base_type::operator[](__sub)
const_iterator
begin() const
{ return _Base_type::begin(); }
-
+
/**
* @brief Gets an iterator to the start of the %sub_match collection.
*/
const_iterator
end() const
{ return !empty() ? _Base_type::end() - 2 : _Base_type::end(); }
-
+
/**
* @brief Gets an iterator to one-past-the-end of the collection.
*/
*/
string_type
format(const char_type* __fmt,
- match_flag_type __flags = regex_constants::format_default) const
+ match_flag_type __flags = regex_constants::format_default) const
{
string_type __result;
format(std::back_inserter(__result),
+ __fmt,
__fmt + char_traits<char_type>::length(__fmt),
__flags);
return __result;
}
- //@}
+ //@}
/**
* @name 10.5 Allocator
*/
- //@{
+ //@{
/**
* @brief Gets a copy of the allocator.
allocator_type
get_allocator() const
{ return _Base_type::get_allocator(); }
-
- //@}
+
+ //@}
/**
* @name 10.6 Swap
*/
- //@{
+ //@{
/**
* @brief Swaps the contents of two match_results.
void
swap(match_results& __that)
{ _Base_type::swap(__that); }
- //@}
-
+ //@}
+
private:
- friend class __detail::_SpecializedResults<_Bi_iter, _Alloc>;
+ template<typename, typename, typename, typename>
+ friend class __detail::_Executor;
+
+ template<typename, typename, typename, typename>
+ friend class __detail::_DFSExecutor;
+
+ template<typename, typename, typename, typename>
+ friend class __detail::_BFSExecutor;
+
+ template<typename _B, typename _A, typename _Ch_type, typename _Rx_traits>
+ friend bool
+ regex_match(_B, _B, match_results<_B, _A>&,
+ const basic_regex<_Ch_type,
+ _Rx_traits>&,
+ regex_constants::match_flag_type);
+
+ template<typename _B, typename _A, typename _Ch_type, typename _Rx_traits>
+ friend bool
+ regex_search(_B, _B, match_results<_B, _A>&,
+ const basic_regex<_Ch_type,
+ _Rx_traits>&,
+ regex_constants::match_flag_type);
};
-
+
typedef match_results<const char*> cmatch;
typedef match_results<string::const_iterator> smatch;
#ifdef _GLIBCXX_USE_WCHAR_T
regex_constants::match_flag_type __flags
= regex_constants::match_default)
{
- __detail::_AutomatonPtr __a = __re._M_get_automaton();
- __detail::_Automaton::_SizeT __sz = __a->_M_sub_count();
- __detail::_SpecializedCursor<_Bi_iter> __cs(__s, __e);
- __detail::_SpecializedResults<_Bi_iter, _Alloc> __r(__sz, __cs, __m);
- return __a->_M_get_matcher(__cs, __r, __a, __flags)->_M_match();
+ if (__re._M_automaton == nullptr)
+ return false;
+ if (__detail::__get_executor(__s, __e, __m, __re, __flags)->_M_match())
+ {
+ for (auto __it : __m)
+ if (!__it.matched)
+ __it.first = __it.second = __e;
+ __m.at(__m.size()).matched = false;
+ __m.at(__m.size()).first = __s;
+ __m.at(__m.size()).second = __s;
+ __m.at(__m.size()+1).matched = false;
+ __m.at(__m.size()+1).first = __e;
+ __m.at(__m.size()+1).second = __e;
+ return true;
+ }
+ return false;
}
/**
const basic_regex<_Ch_type, _Rx_traits>& __re,
regex_constants::match_flag_type __flags
= regex_constants::match_default)
- {
+ {
match_results<_Bi_iter> __what;
return regex_match(__first, __last, __what, __re, __flags);
}
typename _Alloc, typename _Ch_type, typename _Rx_traits>
inline bool
regex_match(const basic_string<_Ch_type, _Ch_traits, _Ch_alloc>& __s,
- match_results<typename basic_string<_Ch_type,
+ match_results<typename basic_string<_Ch_type,
_Ch_traits, _Ch_alloc>::const_iterator, _Alloc>& __m,
const basic_regex<_Ch_type, _Rx_traits>& __re,
regex_constants::match_flag_type __flags
regex_constants::match_flag_type __flags
= regex_constants::match_default)
{
- __detail::_AutomatonPtr __a = __re._M_get_automaton();
- __detail::_Automaton::_SizeT __sz = __a->_M_sub_count();
- __detail::_SpecializedCursor<_Bi_iter> __cs(__first, __last);
- __detail::_SpecializedResults<_Bi_iter, _Alloc> __r(__sz, __cs, __m);
+ if (__re._M_automaton == nullptr)
+ return false;
for (auto __cur = __first; __cur != __last; ++__cur) // Any KMP-like algo?
- {
- __detail::_SpecializedCursor<_Bi_iter> __curs(__cur, __last);
- auto __matcher = __a->_M_get_matcher(__curs, __r, __a, __flags);
- if (__matcher->_M_search_from_first())
- {
- __r._M_set_range(__m.size(),
- __detail::_SpecializedCursor<_Bi_iter>
- {__first, __m[0].first});
- __r._M_set_range(__m.size()+1,
- __detail::_SpecializedCursor<_Bi_iter>
- {__m[0].second, __last});
- __r._M_set_matched(__m.size(),
- __m.prefix().first != __m.prefix().second);
- __r._M_set_matched(__m.size()+1,
- __m.suffix().first != __m.suffix().second);
- return true;
- }
- }
+ if (__detail::__get_executor(__cur, __last, __m, __re, __flags)
+ ->_M_search_from_first())
+ {
+ for (auto __it : __m)
+ if (!__it.matched)
+ __it.first = __it.second = __last;
+ __m.at(__m.size()).first = __first;
+ __m.at(__m.size()).second = __m[0].first;
+ __m.at(__m.size()+1).first = __m[0].second;
+ __m.at(__m.size()+1).second = __last;
+ __m.at(__m.size()).matched =
+ (__m.prefix().first != __m.prefix().second);
+ __m.at(__m.size()+1).matched =
+ (__m.suffix().first != __m.suffix().second);
+ return true;
+ }
return false;
}
// std [28.12] Class template regex_iterator
/**
- * An iterator adaptor that will provide repeated calls of regex_search over
+ * An iterator adaptor that will provide repeated calls of regex_search over
* a range until no more matches remain.
*/
template<typename _Bi_iter,
regex_iterator()
: _M_match()
{ }
-
+
/**
* Constructs a %regex_iterator...
* @param __a [IN] The start of a text range to search.
* Copy constructs a %regex_iterator.
*/
regex_iterator(const regex_iterator& __rhs) = default;
-
+
/**
* @brief Assigns one %regex_iterator to another.
*/
regex_iterator&
operator=(const regex_iterator& __rhs) = default;
-
+
/**
* @brief Tests the equivalence of two regex iterators.
*/
bool
operator==(const regex_iterator& __rhs) const;
-
+
/**
* @brief Tests the inequivalence of two regex iterators.
*/
bool
operator!=(const regex_iterator& __rhs) const
{ return !(*this == __rhs); }
-
+
/**
* @brief Dereferences a %regex_iterator.
*/
const value_type&
operator*() const
{ return _M_match; }
-
+
/**
* @brief Selects a %regex_iterator member.
*/
const value_type*
operator->() const
{ return &_M_match; }
-
+
/**
* @brief Increments a %regex_iterator.
*/
regex_iterator&
operator++();
-
+
/**
* @brief Postincrements a %regex_iterator.
*/
++(*this);
return __tmp;
}
-
+
private:
_Bi_iter _M_begin;
_Bi_iter _M_end;
}
return *this;
}
-
+
typedef regex_iterator<const char*> cregex_iterator;
typedef regex_iterator<string::const_iterator> sregex_iterator;
#ifdef _GLIBCXX_USE_WCHAR_T
typedef const value_type* pointer;
typedef const value_type& reference;
typedef std::forward_iterator_tag iterator_category;
-
+
public:
/**
* @brief Default constructs a %regex_token_iterator.
- *
+ *
* A default-constructed %regex_token_iterator is a singular iterator
* that will compare equal to the one-past-the-end value for any
* iterator of the same type.
regex_token_iterator()
: _M_position(), _M_result(nullptr), _M_suffix(), _M_n(0), _M_subs()
{ }
-
+
/**
* Constructs a %regex_token_iterator...
* @param __a [IN] The start of the text to search.
/** @brief Token iterator for standard wide-character strings. */
typedef regex_token_iterator<wstring::const_iterator> wsregex_token_iterator;
#endif
-
+
//@} // group regex
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace
--- /dev/null
+// class template regex -*- C++ -*-
+
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// Under Section 7 of GPL version 3, you are granted additional
+// permissions described in the GCC Runtime Library Exception, version
+// 3.1, as published by the Free Software Foundation.
+
+// You should have received a copy of the GNU General Public License and
+// a copy of the GCC Runtime Library Exception along with this program;
+// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+// <http://www.gnu.org/licenses/>.
+
+/**
+ * @file bits/regex_automaton.h
+ * This is an internal header file, included by other library headers.
+ * Do not attempt to use it directly. @headername{regex}
+ */
+
+namespace std _GLIBCXX_VISIBILITY(default)
+{
+namespace __detail
+{
+_GLIBCXX_BEGIN_NAMESPACE_VERSION
+
+ /**
+ * @defgroup regex-detail Base and Implementation Classes
+ * @ingroup regex
+ * @{
+ */
+
+ typedef int _StateIdT;
+ typedef std::set<_StateIdT> _StateSet;
+ static const _StateIdT _S_invalid_state_id = -1;
+
+ template<typename _CharT>
+ using _Matcher = std::function<bool (_CharT)>;
+
+ /// Operation codes that define the type of transitions within the base NFA
+ /// that represents the regular expression.
+ enum _Opcode
+ {
+ _S_opcode_unknown = 0,
+ _S_opcode_alternative = 1,
+ _S_opcode_subexpr_begin = 4,
+ _S_opcode_subexpr_end = 5,
+ _S_opcode_match = 100,
+ _S_opcode_accept = 255
+ };
+
+ template<typename _CharT, typename _TraitsT>
+ class _State
+ {
+ public:
+ typedef int _OpcodeT;
+ typedef _Matcher<_CharT> _MatcherT;
+
+ _OpcodeT _M_opcode; // type of outgoing transition
+ _StateIdT _M_next; // outgoing transition
+ _StateIdT _M_alt; // for _S_opcode_alternative
+ unsigned int _M_subexpr; // for _S_opcode_subexpr_*
+ _MatcherT _M_matches; // for _S_opcode_match
+
+ explicit _State(_OpcodeT __opcode)
+ : _M_opcode(__opcode), _M_next(_S_invalid_state_id)
+ { }
+
+ _State(const _MatcherT& __m)
+ : _M_opcode(_S_opcode_match), _M_next(_S_invalid_state_id),
+ _M_matches(__m)
+ { }
+
+ _State(_OpcodeT __opcode, unsigned __index)
+ : _M_opcode(__opcode), _M_next(_S_invalid_state_id), _M_subexpr(__index)
+ { }
+
+ _State(_StateIdT __next, _StateIdT __alt)
+ : _M_opcode(_S_opcode_alternative), _M_next(__next), _M_alt(__alt)
+ { }
+
+#ifdef _GLIBCXX_DEBUG
+ std::ostream&
+ _M_print(std::ostream& ostr) const;
+
+ // Prints graphviz dot commands for state.
+ std::ostream&
+ _M_dot(std::ostream& __ostr, _StateIdT __id) const;
+#endif
+ };
+
+ /// Base class for, um, automata. Could be an NFA or a DFA. Your choice.
+ template<typename _CharT, typename _TraitsT>
+ class _Automaton
+ {
+ public:
+ typedef unsigned int _SizeT;
+
+ public:
+ virtual _SizeT
+ _M_sub_count() const = 0;
+
+#ifdef _GLIBCXX_DEBUG
+ virtual std::ostream&
+ _M_dot(std::ostream& __ostr) const = 0;
+#endif
+ };
+
+ template<typename _CharT, typename _TraitsT>
+ class _NFA
+ : public _Automaton<_CharT, _TraitsT>,
+ public std::vector<_State<_CharT, _TraitsT>>
+ {
+ public:
+ typedef _State<_CharT, _TraitsT> _StateT;
+ typedef const _Matcher<_CharT>& _MatcherT;
+ typedef unsigned int _SizeT;
+ typedef regex_constants::syntax_option_type _FlagT;
+
+ _NFA(_FlagT __f)
+ : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0),
+ _M_has_backref(false)
+ { }
+
+ _FlagT
+ _M_options() const
+ { return _M_flags; }
+
+ _StateIdT
+ _M_start() const
+ { return _M_start_state; }
+
+ const _StateSet&
+ _M_final_states() const
+ { return _M_accepting_states; }
+
+ _SizeT
+ _M_sub_count() const
+ { return _M_subexpr_count; }
+
+ _StateIdT
+ _M_insert_accept()
+ {
+ this->push_back(_StateT(_S_opcode_accept));
+ _M_accepting_states.insert(this->size()-1);
+ return this->size()-1;
+ }
+
+ _StateIdT
+ _M_insert_alt(_StateIdT __next, _StateIdT __alt)
+ {
+ this->push_back(_StateT(__next, __alt));
+ return this->size()-1;
+ }
+
+ _StateIdT
+ _M_insert_matcher(_MatcherT __m)
+ {
+ this->push_back(_StateT(__m));
+ return this->size()-1;
+ }
+
+ _StateIdT
+ _M_insert_subexpr_begin()
+ {
+ auto __id = _M_subexpr_count++;
+ _M_paren_stack.push(__id);
+ this->push_back(_StateT(_S_opcode_subexpr_begin, __id));
+ return this->size()-1;
+ }
+
+ _StateIdT
+ _M_insert_subexpr_end()
+ {
+ this->push_back(_StateT(_S_opcode_subexpr_end, _M_paren_stack.top()));
+ _M_paren_stack.pop();
+ return this->size()-1;
+ }
+
+ void
+ _M_set_backref(bool __b)
+ { _M_has_backref = __b; }
+
+#ifdef _GLIBCXX_DEBUG
+ std::ostream&
+ _M_dot(std::ostream& __ostr) const;
+#endif
+
+ _FlagT _M_flags;
+ _StateIdT _M_start_state;
+ _StateSet _M_accepting_states;
+ _SizeT _M_subexpr_count;
+ bool _M_has_backref;
+ std::stack<unsigned int> _M_paren_stack;
+ };
+
+ /// Describes a sequence of one or more %_State, its current start
+ /// and end(s). This structure contains fragments of an NFA during
+ /// construction.
+ template<typename _CharT, typename _TraitsT>
+ class _StateSeq
+ {
+ public:
+ typedef _NFA<_CharT, _TraitsT> _RegexT;
+ public:
+ // Constructs a single-node sequence
+ _StateSeq(_RegexT& __ss, _StateIdT __s,
+ _StateIdT __e = _S_invalid_state_id)
+ : _M_nfa(__ss), _M_start(__s), _M_end1(__s), _M_end2(__e)
+ { }
+ // Constructs a split sequence from two other sequencces
+ _StateSeq(const _StateSeq& __e1, const _StateSeq& __e2)
+ : _M_nfa(__e1._M_nfa),
+ _M_start(_M_nfa._M_insert_alt(__e1._M_start, __e2._M_start)),
+ _M_end1(__e1._M_end1), _M_end2(__e2._M_end1)
+ { }
+
+ // Constructs a split sequence from a single sequence
+ _StateSeq(const _StateSeq& __e, _StateIdT __id)
+ : _M_nfa(__e._M_nfa),
+ _M_start(_M_nfa._M_insert_alt(__id, __e._M_start)),
+ _M_end1(__id), _M_end2(__e._M_end1)
+ { }
+
+ // Constructs a copy of a %_StateSeq
+ _StateSeq(const _StateSeq& __rhs)
+ : _M_nfa(__rhs._M_nfa), _M_start(__rhs._M_start),
+ _M_end1(__rhs._M_end1), _M_end2(__rhs._M_end2)
+ { }
+
+ _StateSeq& operator=(const _StateSeq& __rhs);
+
+ _StateIdT
+ _M_front() const
+ { return _M_start; }
+
+ // Extends a sequence by one.
+ void
+ _M_push_back(_StateIdT __id);
+
+ // Extends and maybe joins a sequence.
+ void
+ _M_append(_StateIdT __id);
+
+ void
+ _M_append(_StateSeq& __rhs);
+
+ // Clones an entire sequence.
+ _StateIdT
+ _M_clone();
+
+ private:
+ _RegexT& _M_nfa;
+ _StateIdT _M_start;
+ _StateIdT _M_end1;
+ _StateIdT _M_end2;
+ };
+
+ //@} regex-detail
+_GLIBCXX_END_NAMESPACE_VERSION
+} // namespace __detail
+} // namespace std
+
+#include <bits/regex_automaton.tcc>
--- /dev/null
+// class template regex -*- C++ -*-
+
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// Under Section 7 of GPL version 3, you are granted additional
+// permissions described in the GCC Runtime Library Exception, version
+// 3.1, as published by the Free Software Foundation.
+
+// You should have received a copy of the GNU General Public License and
+// a copy of the GCC Runtime Library Exception along with this program;
+// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+// <http://www.gnu.org/licenses/>.
+
+/**
+ * @file bits/regex_automaton.tcc
+ * This is an internal header file, included by other library headers.
+ * Do not attempt to use it directly. @headername{regex}
+ */
+
+namespace std _GLIBCXX_VISIBILITY(default)
+{
+namespace __detail
+{
+_GLIBCXX_BEGIN_NAMESPACE_VERSION
+
+#ifdef _GLIBCXX_DEBUG
+ template<typename _CharT, typename _TraitsT>
+ std::ostream& _State<_CharT, _TraitsT>::
+ _M_print(std::ostream& ostr) const
+ {
+ switch (_M_opcode)
+ {
+ case _S_opcode_alternative:
+ ostr << "alt next=" << _M_next << " alt=" << _M_alt;
+ break;
+ case _S_opcode_subexpr_begin:
+ ostr << "subexpr begin next=" << _M_next << " index=" << _M_subexpr;
+ break;
+ case _S_opcode_subexpr_end:
+ ostr << "subexpr end next=" << _M_next << " index=" << _M_subexpr;
+ break;
+ case _S_opcode_match:
+ ostr << "match next=" << _M_next;
+ break;
+ case _S_opcode_accept:
+ ostr << "accept next=" << _M_next;
+ break;
+ default:
+ ostr << "unknown next=" << _M_next;
+ break;
+ }
+ return ostr;
+ }
+
+ // Prints graphviz dot commands for state.
+ template<typename _CharT, typename _TraitsT>
+ std::ostream& _State<_CharT, _TraitsT>::
+ _M_dot(std::ostream& __ostr, _StateIdT __id) const
+ {
+ switch (_M_opcode)
+ {
+ case _S_opcode_alternative:
+ __ostr << __id << " [label=\"" << __id << "\\nALT\"];\n"
+ << __id << " -> " << _M_next
+ << " [label=\"epsilon\", tailport=\"s\"];\n"
+ << __id << " -> " << _M_alt
+ << " [label=\"epsilon\", tailport=\"n\"];\n";
+ break;
+ case _S_opcode_subexpr_begin:
+ __ostr << __id << " [label=\"" << __id << "\\nSBEGIN "
+ << _M_subexpr << "\"];\n"
+ << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
+ break;
+ case _S_opcode_subexpr_end:
+ __ostr << __id << " [label=\"" << __id << "\\nSEND "
+ << _M_subexpr << "\"];\n"
+ << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
+ break;
+ case _S_opcode_match:
+ __ostr << __id << " [label=\"" << __id << "\\nMATCH\"];\n"
+ << __id << " -> " << _M_next << " [label=\"<match>\"];\n";
+ break;
+ case _S_opcode_accept:
+ __ostr << __id << " [label=\"" << __id << "\\nACC\"];\n" ;
+ break;
+ default:
+ __ostr << __id << " [label=\"" << __id << "\\nUNK\"];\n"
+ << __id << " -> " << _M_next << " [label=\"?\"];\n";
+ break;
+ }
+ return __ostr;
+ }
+
+ template<typename _CharT, typename _TraitsT>
+ std::ostream& _NFA<_CharT, _TraitsT>::
+ _M_dot(std::ostream& __ostr) const
+ {
+ __ostr << "digraph _Nfa {\n"
+ << " rankdir=LR;\n";
+ for (unsigned int __i = 0; __i < this->size(); ++__i)
+ { this->at(__i)._M_dot(__ostr, __i); }
+ __ostr << "}\n";
+ return __ostr;
+ }
+#endif
+
+ template<typename _CharT, typename _TraitsT>
+ _StateSeq<_CharT, _TraitsT>& _StateSeq<_CharT, _TraitsT>::
+ operator=(const _StateSeq& __rhs)
+ {
+ _M_start = __rhs._M_start;
+ _M_end1 = __rhs._M_end1;
+ _M_end2 = __rhs._M_end2;
+ return *this;
+ }
+
+ template<typename _CharT, typename _TraitsT>
+ void _StateSeq<_CharT, _TraitsT>::
+ _M_push_back(_StateIdT __id)
+ {
+ if (_M_end1 != _S_invalid_state_id)
+ _M_nfa[_M_end1]._M_next = __id;
+ _M_end1 = __id;
+ }
+
+ template<typename _CharT, typename _TraitsT>
+ void _StateSeq<_CharT, _TraitsT>::
+ _M_append(_StateIdT __id)
+ {
+ if (_M_end2 != _S_invalid_state_id)
+ {
+ if (_M_end2 == _M_end1)
+ _M_nfa[_M_end2]._M_alt = __id;
+ else
+ _M_nfa[_M_end2]._M_next = __id;
+ _M_end2 = _S_invalid_state_id;
+ }
+ if (_M_end1 != _S_invalid_state_id)
+ _M_nfa[_M_end1]._M_next = __id;
+ _M_end1 = __id;
+ }
+
+ template<typename _CharT, typename _TraitsT>
+ void _StateSeq<_CharT, _TraitsT>::
+ _M_append(_StateSeq& __rhs)
+ {
+ if (_M_end2 != _S_invalid_state_id)
+ {
+ if (_M_end2 == _M_end1)
+ _M_nfa[_M_end2]._M_alt = __rhs._M_start;
+ else
+ _M_nfa[_M_end2]._M_next = __rhs._M_start;
+ _M_end2 = _S_invalid_state_id;
+ }
+ if (__rhs._M_end2 != _S_invalid_state_id)
+ _M_end2 = __rhs._M_end2;
+ if (_M_end1 != _S_invalid_state_id)
+ _M_nfa[_M_end1]._M_next = __rhs._M_start;
+ _M_end1 = __rhs._M_end1;
+ }
+
+ // @todo implement this function.
+ template<typename _CharT, typename _TraitsT>
+ _StateIdT _StateSeq<_CharT, _TraitsT>::
+ _M_clone()
+ { return 0; }
+
+_GLIBCXX_END_NAMESPACE_VERSION
+} // namespace __detail
+} // namespace
* @{
*/
- /// Base class for scanner.
- struct _Scanner_base
- {
- typedef unsigned int _StateT;
+ /// Matches a character range (bracket expression)
+ template<typename _CharT, typename _TraitsT>
+ struct _BracketMatcher
+ {
+ typedef typename _TraitsT::char_class_type _CharClassT;
+ typedef typename _TraitsT::string_type _StringT;
+ typedef regex_constants::syntax_option_type _FlagT;
+
+ explicit
+ _BracketMatcher(bool __is_non_matching,
+ const _TraitsT& __t,
+ _FlagT __flags)
+ : _M_is_non_matching(__is_non_matching), _M_traits(__t),
+ _M_flags(__flags), _M_class_set(0)
+ { }
+
+ bool
+ operator()(_CharT) const;
+
+ void
+ _M_add_char(_CharT __c)
+ {
+ if (_M_flags & regex_constants::collate)
+ if (_M_is_icase())
+ _M_char_set.push_back(_M_traits.translate_nocase(__c));
+ else
+ _M_char_set.push_back(_M_traits.translate(__c));
+ else
+ _M_char_set.push_back(__c);
+ }
+
+ void
+ _M_add_collating_element(const _StringT& __s)
+ {
+ auto __st = _M_traits.lookup_collatename(&*__s.begin(), &*__s.end());
+ if (__st.empty())
+ __throw_regex_error(regex_constants::error_collate);
+ // TODO: digraph
+ _M_char_set.push_back(__st[0]);
+ }
- static constexpr _StateT _S_state_in_brace = 1 << 0;
- static constexpr _StateT _S_state_in_bracket = 1 << 1;
+ void
+ _M_add_equivalence_class(const _StringT& __s)
+ {
+ _M_add_character_class(
+ _M_traits.transform_primary(&*__s.begin(), &*__s.end()));
+ }
- virtual ~_Scanner_base() { };
- };
+ void
+ _M_add_character_class(const _StringT& __s)
+ {
+ auto __st = _M_traits.
+ lookup_classname(&*__s.begin(), &*__s.end(), _M_is_icase());
+ if (__st == 0)
+ __throw_regex_error(regex_constants::error_ctype);
+ _M_class_set |= __st;
+ }
+
+ void
+ _M_make_range(_CharT __l, _CharT __r)
+ { _M_range_set.push_back(make_pair(_M_get_str(__l), _M_get_str(__r))); }
+
+ bool
+ _M_is_icase() const
+ { return _M_flags & regex_constants::icase; }
+
+ _StringT
+ _M_get_str(_CharT __c) const
+ {
+ auto __s = _StringT(1,
+ _M_is_icase()
+ ? _M_traits.translate_nocase(__c)
+ : _M_traits.translate(__c));
+ return _M_traits.transform(__s.begin(), __s.end());
+ }
+
+ _TraitsT _M_traits;
+ _FlagT _M_flags;
+ bool _M_is_non_matching;
+ std::vector<_CharT> _M_char_set;
+ std::vector<pair<_StringT, _StringT>> _M_range_set;
+ _CharClassT _M_class_set;
+ };
/**
* @brief struct _Scanner. Scans an input range for regex tokens.
* constructor: different regular expression grammars will interpret
* the same input pattern in syntactically different ways.
*/
- template<typename _InputIterator>
- class _Scanner: public _Scanner_base
+ template<typename _InputIter>
+ class _Scanner
{
public:
- typedef _InputIterator _IteratorT;
- typedef typename std::iterator_traits<_IteratorT>::value_type _CharT;
+ typedef unsigned int _StateT;
+ typedef typename std::iterator_traits<_InputIter>::value_type _CharT;
typedef std::basic_string<_CharT> _StringT;
typedef regex_constants::syntax_option_type _FlagT;
typedef const std::ctype<_CharT> _CtypeT;
_S_token_unknown
};
- _Scanner(_IteratorT __begin, _IteratorT __end, _FlagT __flags,
- std::locale __loc)
+ _Scanner(_InputIter __begin, _InputIter __end,
+ _FlagT __flags, std::locale __loc)
: _M_current(__begin) , _M_end(__end) , _M_flags(__flags),
_M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(0)
{ _M_advance(); }
void
_M_eat_collsymbol();
- _IteratorT _M_current;
- _IteratorT _M_end;
+ static constexpr _StateT _S_state_in_brace = 1 << 0;
+ static constexpr _StateT _S_state_in_bracket = 1 << 1;
+ _InputIter _M_current;
+ _InputIter _M_end;
_FlagT _M_flags;
_CtypeT& _M_ctype;
_TokenT _M_curToken;
_StateT _M_state;
};
- template<typename _InputIterator>
- void
- _Scanner<_InputIterator>::
- _M_advance()
- {
- if (_M_current == _M_end)
- {
- _M_curToken = _S_token_eof;
- return;
- }
-
- _CharT __c = *_M_current;
- if (_M_state & _S_state_in_bracket)
- {
- _M_scan_in_bracket();
- return;
- }
- if (_M_state & _S_state_in_brace)
- {
- _M_scan_in_brace();
- return;
- }
-#if 0
- // TODO: re-enable line anchors when _M_assertion is implemented.
- // See PR libstdc++/47724
- else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^'))
- {
- _M_curToken = _S_token_line_begin;
- ++_M_current;
- return;
- }
- else if (__c == _M_ctype.widen('$'))
- {
- _M_curToken = _S_token_line_end;
- ++_M_current;
- return;
- }
-#endif
- else if (__c == _M_ctype.widen('.'))
- {
- _M_curToken = _S_token_anychar;
- ++_M_current;
- return;
- }
- else if (__c == _M_ctype.widen('*'))
- {
- _M_curToken = _S_token_closure0;
- ++_M_current;
- return;
- }
- else if (__c == _M_ctype.widen('+'))
- {
- _M_curToken = _S_token_closure1;
- ++_M_current;
- return;
- }
- else if (__c == _M_ctype.widen('|'))
- {
- _M_curToken = _S_token_or;
- ++_M_current;
- return;
- }
- else if (__c == _M_ctype.widen('['))
- {
- if (*++_M_current == _M_ctype.widen('^'))
- {
- _M_curToken = _S_token_bracket_inverse_begin;
- ++_M_current;
- }
- else
- _M_curToken = _S_token_bracket_begin;
- _M_state |= _S_state_in_bracket;
- return;
- }
- else if (__c == _M_ctype.widen('\\'))
- {
- _M_eat_escape();
- return;
- }
- else if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
- {
- if (__c == _M_ctype.widen('('))
- {
- _M_curToken = _S_token_subexpr_begin;
- ++_M_current;
- return;
- }
- else if (__c == _M_ctype.widen(')'))
- {
- _M_curToken = _S_token_subexpr_end;
- ++_M_current;
- return;
- }
- else if (__c == _M_ctype.widen('{'))
- {
- _M_curToken = _S_token_interval_begin;
- _M_state |= _S_state_in_brace;
- ++_M_current;
- return;
- }
- }
-
- _M_curToken = _S_token_ord_char;
- _M_curValue.assign(1, __c);
- ++_M_current;
- }
-
-
- template<typename _InputIterator>
- void
- _Scanner<_InputIterator>::
- _M_scan_in_brace()
- {
- if (_M_ctype.is(_CtypeT::digit, *_M_current))
- {
- _M_curToken = _S_token_dup_count;
- _M_curValue.assign(1, *_M_current);
- ++_M_current;
- while (_M_current != _M_end
- && _M_ctype.is(_CtypeT::digit, *_M_current))
- {
- _M_curValue += *_M_current;
- ++_M_current;
- }
- return;
- }
- else if (*_M_current == _M_ctype.widen(','))
- {
- _M_curToken = _S_token_comma;
- ++_M_current;
- return;
- }
- if (_M_flags & (regex_constants::basic | regex_constants::grep))
- {
- if (*_M_current == _M_ctype.widen('\\'))
- _M_eat_escape();
- }
- else
- {
- if (*_M_current == _M_ctype.widen('}'))
- {
- _M_curToken = _S_token_interval_end;
- _M_state &= ~_S_state_in_brace;
- ++_M_current;
- return;
- }
- }
- }
-
- template<typename _InputIterator>
- void
- _Scanner<_InputIterator>::
- _M_scan_in_bracket()
- {
- if (*_M_current == _M_ctype.widen('['))
- {
- ++_M_current;
- if (_M_current == _M_end)
- {
- _M_curToken = _S_token_eof;
- return;
- }
-
- if (*_M_current == _M_ctype.widen('.'))
- {
- _M_curToken = _S_token_collsymbol;
- _M_eat_collsymbol();
- return;
- }
- else if (*_M_current == _M_ctype.widen(':'))
- {
- _M_curToken = _S_token_char_class_name;
- _M_eat_charclass();
- return;
- }
- else if (*_M_current == _M_ctype.widen('='))
- {
- _M_curToken = _S_token_equiv_class_name;
- _M_eat_equivclass();
- return;
- }
- }
- else if (*_M_current == _M_ctype.widen('-'))
- {
- _M_curToken = _S_token_dash;
- ++_M_current;
- return;
- }
- else if (*_M_current == _M_ctype.widen(']'))
- {
- _M_curToken = _S_token_bracket_end;
- _M_state &= ~_S_state_in_bracket;
- ++_M_current;
- return;
- }
- else if (*_M_current == _M_ctype.widen('\\'))
- {
- _M_eat_escape();
- return;
- }
- _M_curToken = _S_token_collelem_single;
- _M_curValue.assign(1, *_M_current);
- ++_M_current;
- }
-
- // TODO implement it.
- template<typename _InputIterator>
- void
- _Scanner<_InputIterator>::
- _M_eat_escape()
- {
- ++_M_current;
- if (_M_current == _M_end)
- {
- _M_curToken = _S_token_eof;
- return;
- }
- _CharT __c = *_M_current;
- ++_M_current;
-
- if (__c == _M_ctype.widen('('))
- {
- if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
- {
- _M_curToken = _S_token_ord_char;
- _M_curValue.assign(1, __c);
- }
- else
- _M_curToken = _S_token_subexpr_begin;
- }
- else if (__c == _M_ctype.widen(')'))
- {
- if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
- {
- _M_curToken = _S_token_ord_char;
- _M_curValue.assign(1, __c);
- }
- else
- _M_curToken = _S_token_subexpr_end;
- }
- else if (__c == _M_ctype.widen('{'))
- {
- if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
- {
- _M_curToken = _S_token_ord_char;
- _M_curValue.assign(1, __c);
- }
- else
- {
- _M_curToken = _S_token_interval_begin;
- _M_state |= _S_state_in_brace;
- }
- }
- else if (__c == _M_ctype.widen('}'))
- {
- if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
- {
- _M_curToken = _S_token_ord_char;
- _M_curValue.assign(1, __c);
- }
- else
- {
- if (!(_M_state && _S_state_in_brace))
- __throw_regex_error(regex_constants::error_badbrace);
- _M_state &= ~_S_state_in_brace;
- _M_curToken = _S_token_interval_end;
- }
- }
- else if (__c == _M_ctype.widen('x'))
- {
- ++_M_current;
- if (_M_current == _M_end)
- {
- _M_curToken = _S_token_eof;
- return;
- }
- if (_M_ctype.is(_CtypeT::digit, *_M_current))
- {
- _M_curValue.assign(1, *_M_current);
- ++_M_current;
- if (_M_current == _M_end)
- {
- _M_curToken = _S_token_eof;
- return;
- }
- if (_M_ctype.is(_CtypeT::digit, *_M_current))
- {
- _M_curValue += *_M_current;
- ++_M_current;
- return;
- }
- }
- }
- else if (__c == _M_ctype.widen('^')
- || __c == _M_ctype.widen('.')
- || __c == _M_ctype.widen('*')
- || __c == _M_ctype.widen('$')
- || __c == _M_ctype.widen('\\'))
- {
- _M_curToken = _S_token_ord_char;
- _M_curValue.assign(1, __c);
- }
- else if (_M_ctype.is(_CtypeT::digit, __c))
- {
- _M_curToken = _S_token_backref;
- _M_curValue.assign(1, __c);
- }
- else if (_M_state & _S_state_in_bracket)
- {
- if (__c == _M_ctype.widen('-')
- || __c == _M_ctype.widen('[')
- || __c == _M_ctype.widen(']'))
- {
- _M_curToken = _S_token_ord_char;
- _M_curValue.assign(1, __c);
- }
- else if ((_M_flags & regex_constants::ECMAScript)
- && __c == _M_ctype.widen('b'))
- {
- _M_curToken = _S_token_ord_char;
- _M_curValue.assign(1, _M_ctype.widen(' '));
- }
- else
- __throw_regex_error(regex_constants::error_escape);
- }
- else
- __throw_regex_error(regex_constants::error_escape);
- }
-
- // Eats a character class or throwns an exception.
- // current point to ':' delimiter on entry, char after ']' on return
- template<typename _InputIterator>
- void
- _Scanner<_InputIterator>::
- _M_eat_charclass()
- {
- ++_M_current; // skip ':'
- if (_M_current == _M_end)
- __throw_regex_error(regex_constants::error_ctype);
- for (_M_curValue.clear();
- _M_current != _M_end && *_M_current != _M_ctype.widen(':');
- ++_M_current)
- _M_curValue += *_M_current;
- if (_M_current == _M_end)
- __throw_regex_error(regex_constants::error_ctype);
- ++_M_current; // skip ':'
- if (*_M_current != _M_ctype.widen(']'))
- __throw_regex_error(regex_constants::error_ctype);
- ++_M_current; // skip ']'
- }
-
-
- template<typename _InputIterator>
- void
- _Scanner<_InputIterator>::
- _M_eat_equivclass()
- {
- ++_M_current; // skip '='
- if (_M_current == _M_end)
- __throw_regex_error(regex_constants::error_collate);
- for (_M_curValue.clear();
- _M_current != _M_end && *_M_current != _M_ctype.widen('=');
- ++_M_current)
- _M_curValue += *_M_current;
- if (_M_current == _M_end)
- __throw_regex_error(regex_constants::error_collate);
- ++_M_current; // skip '='
- if (*_M_current != _M_ctype.widen(']'))
- __throw_regex_error(regex_constants::error_collate);
- ++_M_current; // skip ']'
- }
-
-
- template<typename _InputIterator>
- void
- _Scanner<_InputIterator>::
- _M_eat_collsymbol()
- {
- ++_M_current; // skip '.'
- if (_M_current == _M_end)
- __throw_regex_error(regex_constants::error_collate);
- for (_M_curValue.clear();
- _M_current != _M_end && *_M_current != _M_ctype.widen('.');
- ++_M_current)
- _M_curValue += *_M_current;
- if (_M_current == _M_end)
- __throw_regex_error(regex_constants::error_collate);
- ++_M_current; // skip '.'
- if (*_M_current != _M_ctype.widen(']'))
- __throw_regex_error(regex_constants::error_collate);
- ++_M_current; // skip ']'
- }
-
-#ifdef _GLIBCXX_DEBUG
- template<typename _InputIterator>
- std::ostream&
- _Scanner<_InputIterator>::
- _M_print(std::ostream& ostr)
- {
- switch (_M_curToken)
- {
- case _S_token_anychar:
- ostr << "any-character\n";
- break;
- case _S_token_backref:
- ostr << "backref\n";
- break;
- case _S_token_bracket_begin:
- ostr << "bracket-begin\n";
- break;
- case _S_token_bracket_inverse_begin:
- ostr << "bracket-inverse-begin\n";
- break;
- case _S_token_bracket_end:
- ostr << "bracket-end\n";
- break;
- case _S_token_char_class_name:
- ostr << "char-class-name \"" << _M_curValue << "\"\n";
- break;
- case _S_token_closure0:
- ostr << "closure0\n";
- break;
- case _S_token_closure1:
- ostr << "closure1\n";
- break;
- case _S_token_collelem_multi:
- ostr << "coll-elem-multi \"" << _M_curValue << "\"\n";
- break;
- case _S_token_collelem_single:
- ostr << "coll-elem-single \"" << _M_curValue << "\"\n";
- break;
- case _S_token_collsymbol:
- ostr << "collsymbol \"" << _M_curValue << "\"\n";
- break;
- case _S_token_comma:
- ostr << "comma\n";
- break;
- case _S_token_dash:
- ostr << "dash\n";
- break;
- case _S_token_dup_count:
- ostr << "dup count: " << _M_curValue << "\n";
- break;
- case _S_token_eof:
- ostr << "EOF\n";
- break;
- case _S_token_equiv_class_name:
- ostr << "equiv-class-name \"" << _M_curValue << "\"\n";
- break;
- case _S_token_interval_begin:
- ostr << "interval begin\n";
- break;
- case _S_token_interval_end:
- ostr << "interval end\n";
- break;
- case _S_token_line_begin:
- ostr << "line begin\n";
- break;
- case _S_token_line_end:
- ostr << "line end\n";
- break;
- case _S_token_opt:
- ostr << "opt\n";
- break;
- case _S_token_or:
- ostr << "or\n";
- break;
- case _S_token_ord_char:
- ostr << "ordinary character: \"" << _M_value() << "\"\n";
- break;
- case _S_token_subexpr_begin:
- ostr << "subexpr begin\n";
- break;
- case _S_token_subexpr_end:
- ostr << "subexpr end\n";
- break;
- case _S_token_word_begin:
- ostr << "word begin\n";
- break;
- case _S_token_word_end:
- ostr << "word end\n";
- break;
- case _S_token_unknown:
- ostr << "-- unknown token --\n";
- break;
- default:
- _GLIBCXX_DEBUG_ASSERT(false);
- }
- return ostr;
- }
-#endif
-
/// Builds an NFA from an input iterator interval.
- template<typename _InIter, typename _TraitsT>
+ template<typename _InputIter, typename _CharT, typename _TraitsT>
class _Compiler
{
public:
- typedef _InIter _IterT;
- typedef typename std::iterator_traits<_InIter>::value_type _CharT;
- typedef std::basic_string<_CharT> _StringT;
- typedef regex_constants::syntax_option_type _FlagT;
+ typedef typename _TraitsT::string_type _StringT;
+ typedef _NFA<_CharT, _TraitsT> _RegexT;
+ typedef regex_constants::syntax_option_type _FlagT;
- _Compiler(const _InIter& __b, const _InIter& __e,
- _TraitsT& __traits, _FlagT __flags);
+ _Compiler(_InputIter __b, _InputIter __e,
+ const _TraitsT& __traits, _FlagT __flags);
- const _Nfa&
- _M_nfa() const
- { return _M_state_store; }
+ std::shared_ptr<_RegexT>
+ _M_get_nfa() const
+ { return std::shared_ptr<_RegexT>(new _RegexT(_M_state_store)); }
private:
- typedef _Scanner<_InIter> _ScannerT;
- typedef typename _ScannerT::_TokenT _TokenT;
- typedef std::stack<_StateSeq, std::vector<_StateSeq> > _StackT;
- typedef _BracketMatcher<_InIter, _TraitsT> _BMatcherT;
+ typedef _Scanner<_InputIter> _ScannerT;
+ typedef typename _ScannerT::_TokenT _TokenT;
+ typedef _StateSeq<_CharT, _TraitsT> _StateSeqT;
+ typedef std::stack<_StateSeqT, std::vector<_StateSeqT>> _StackT;
+ typedef _BracketMatcher<_CharT, _TraitsT> _BMatcherT;
// accepts a specific token or returns false.
bool
int
_M_cur_int_value(int __radix);
- _TraitsT& _M_traits;
- _ScannerT _M_scanner;
- _StringT _M_cur_value;
- _Nfa _M_state_store;
- _StackT _M_stack;
- _FlagT _M_flags;
+ const _TraitsT& _M_traits;
+ _ScannerT _M_scanner;
+ _StringT _M_cur_value;
+ _RegexT _M_state_store;
+ _StackT _M_stack;
+ _FlagT _M_flags;
};
- template<typename _InIter, typename _TraitsT>
- _Compiler<_InIter, _TraitsT>::
- _Compiler(const _InIter& __b, const _InIter& __e, _TraitsT& __traits,
- _Compiler<_InIter, _TraitsT>::_FlagT __flags)
- : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
- _M_state_store(__flags), _M_flags(__flags)
- {
- typedef _StartTagger<_InIter, _TraitsT> _Start;
- typedef _EndTagger<_InIter, _TraitsT> _End;
-
- _StateSeq __r(_M_state_store,
- _M_state_store._M_insert_subexpr_begin(_Start(0)));
- _M_disjunction();
- if (!_M_stack.empty())
- {
- __r._M_append(_M_stack.top());
- _M_stack.pop();
- }
- __r._M_append(_M_state_store._M_insert_subexpr_end(0, _End(0)));
- __r._M_append(_M_state_store._M_insert_accept());
- }
-
- template<typename _InIter, typename _TraitsT>
- bool
- _Compiler<_InIter, _TraitsT>::
- _M_match_token(_Compiler<_InIter, _TraitsT>::_TokenT token)
- {
- if (token == _M_scanner._M_token())
- {
- _M_cur_value = _M_scanner._M_value();
- _M_scanner._M_advance();
- return true;
- }
- return false;
- }
-
- template<typename _InIter, typename _TraitsT>
- void
- _Compiler<_InIter, _TraitsT>::
- _M_disjunction()
- {
- this->_M_alternative();
- if (_M_match_token(_ScannerT::_S_token_or))
- {
- _StateSeq __alt1 = _M_stack.top(); _M_stack.pop();
- this->_M_disjunction();
- _StateSeq __alt2 = _M_stack.top(); _M_stack.pop();
- _M_stack.push(_StateSeq(__alt1, __alt2));
- }
- }
-
- template<typename _InIter, typename _TraitsT>
- void
- _Compiler<_InIter, _TraitsT>::
- _M_alternative()
- {
- if (this->_M_term())
- {
- _StateSeq __re = _M_stack.top(); _M_stack.pop();
- this->_M_alternative();
- if (!_M_stack.empty())
- {
- __re._M_append(_M_stack.top());
- _M_stack.pop();
- }
- _M_stack.push(__re);
- }
- }
-
- template<typename _InIter, typename _TraitsT>
- bool
- _Compiler<_InIter, _TraitsT>::
- _M_term()
- {
- if (this->_M_assertion())
- return true;
- if (this->_M_atom())
- {
- this->_M_quantifier();
- return true;
- }
- return false;
- }
-
- template<typename _InIter, typename _TraitsT>
- bool
- _Compiler<_InIter, _TraitsT>::
- _M_assertion()
- {
- if (_M_match_token(_ScannerT::_S_token_line_begin))
- {
- // __m.push(_Matcher::_S_opcode_line_begin);
- return true;
- }
- if (_M_match_token(_ScannerT::_S_token_line_end))
- {
- // __m.push(_Matcher::_S_opcode_line_end);
- return true;
- }
- if (_M_match_token(_ScannerT::_S_token_word_begin))
- {
- // __m.push(_Matcher::_S_opcode_word_begin);
- return true;
- }
- if (_M_match_token(_ScannerT::_S_token_word_end))
- {
- // __m.push(_Matcher::_S_opcode_word_end);
- return true;
- }
- return false;
- }
-
- template<typename _InIter, typename _TraitsT>
- void
- _Compiler<_InIter, _TraitsT>::
- _M_quantifier()
- {
- if (_M_match_token(_ScannerT::_S_token_closure0))
- {
- if (_M_stack.empty())
- __throw_regex_error(regex_constants::error_badrepeat);
- _StateSeq __r(_M_stack.top(), -1);
- __r._M_append(__r._M_front());
- _M_stack.pop();
- _M_stack.push(__r);
- return;
- }
- if (_M_match_token(_ScannerT::_S_token_closure1))
- {
- if (_M_stack.empty())
- __throw_regex_error(regex_constants::error_badrepeat);
- _StateSeq __r(_M_state_store,
- _M_state_store.
- _M_insert_alt(_S_invalid_state_id,
- _M_stack.top()._M_front()));
- _M_stack.top()._M_append(__r);
- return;
- }
- if (_M_match_token(_ScannerT::_S_token_opt))
- {
- if (_M_stack.empty())
- __throw_regex_error(regex_constants::error_badrepeat);
- _StateSeq __r(_M_stack.top(), -1);
- _M_stack.pop();
- _M_stack.push(__r);
- return;
- }
- if (_M_match_token(_ScannerT::_S_token_interval_begin))
- {
- if (_M_stack.empty())
- __throw_regex_error(regex_constants::error_badrepeat);
- if (!_M_match_token(_ScannerT::_S_token_dup_count))
- __throw_regex_error(regex_constants::error_badbrace);
- _StateSeq __r(_M_stack.top());
- int __min_rep = _M_cur_int_value(10);
- for (int __i = 1; __i < __min_rep; ++__i)
- _M_stack.top()._M_append(__r._M_clone());
- if (_M_match_token(_ScannerT::_S_token_comma))
- if (_M_match_token(_ScannerT::_S_token_dup_count))
- {
- int __n = _M_cur_int_value(10) - __min_rep;
- if (__n < 0)
- __throw_regex_error(regex_constants::error_badbrace);
- for (int __i = 0; __i < __n; ++__i)
- {
- _StateSeq __r(_M_state_store,
- _M_state_store.
- _M_insert_alt(_S_invalid_state_id,
- _M_stack.top()._M_front()));
- _M_stack.top()._M_append(__r);
- }
- }
- else
- {
- _StateSeq __r(_M_stack.top(), -1);
- __r._M_push_back(__r._M_front());
- _M_stack.pop();
- _M_stack.push(__r);
- }
- if (!_M_match_token(_ScannerT::_S_token_interval_end))
- __throw_regex_error(regex_constants::error_brace);
- return;
- }
- }
-
- template<typename _InIter, typename _TraitsT>
- bool
- _Compiler<_InIter, _TraitsT>::
- _M_atom()
- {
- typedef _CharMatcher<_InIter, _TraitsT> _CMatcher;
- typedef _StartTagger<_InIter, _TraitsT> _Start;
- typedef _EndTagger<_InIter, _TraitsT> _End;
-
- if (_M_match_token(_ScannerT::_S_token_anychar))
- {
- _M_stack.push(_StateSeq(_M_state_store,
- _M_state_store._M_insert_matcher
- (_AnyMatcher)));
- return true;
- }
- if (_M_match_token(_ScannerT::_S_token_ord_char))
- {
- _M_stack.push(_StateSeq(_M_state_store,
- _M_state_store._M_insert_matcher
- (_CMatcher(_M_cur_value[0], _M_flags, _M_traits))));
- return true;
- }
- if (_M_match_token(_ScannerT::_S_token_backref))
- {
- // __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
- _M_state_store._M_set_back_ref(true);
- //return true;
- }
- if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
- {
- int __mark = _M_state_store._M_sub_count();
- _StateSeq __r(_M_state_store,
- _M_state_store.
- _M_insert_subexpr_begin(_Start(__mark)));
- this->_M_disjunction();
- if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
- __throw_regex_error(regex_constants::error_paren);
- if (!_M_stack.empty())
- {
- __r._M_append(_M_stack.top());
- _M_stack.pop();
- }
- __r._M_append(_M_state_store._M_insert_subexpr_end
- (__mark, _End(__mark)));
- _M_stack.push(__r);
- return true;
- }
- return _M_bracket_expression();
- }
-
- template<typename _InIter, typename _TraitsT>
- bool
- _Compiler<_InIter, _TraitsT>::
- _M_bracket_expression()
- {
- bool __inverse =
- _M_match_token(_ScannerT::_S_token_bracket_inverse_begin);
- if (!(__inverse || _M_match_token(_ScannerT::_S_token_bracket_begin)))
- return false;
- _BMatcherT __matcher( __inverse, _M_flags, _M_traits);
- // special case: only if _not_ chr first after
- // '[' or '[^' or if ECMAscript
- if (!_M_bracket_list(__matcher) // list is empty
- && !(_M_flags & regex_constants::ECMAScript))
- __throw_regex_error(regex_constants::error_brack);
- _M_stack.push(_StateSeq(_M_state_store,
- _M_state_store._M_insert_matcher(__matcher)));
- return true;
- }
-
- template<typename _InIter, typename _TraitsT>
- bool // list is non-empty
- _Compiler<_InIter, _TraitsT>::
- _M_bracket_list(_BMatcherT& __matcher)
- {
- if (_M_match_token(_ScannerT::_S_token_bracket_end))
- return false;
- _M_expression_term(__matcher);
- _M_bracket_list(__matcher);
- return true;
- }
-
- template<typename _InIter, typename _TraitsT>
- void
- _Compiler<_InIter, _TraitsT>::
- _M_expression_term(_BMatcherT& __matcher)
- {
- if (_M_match_token(_ScannerT::_S_token_collsymbol))
- {
- __matcher._M_add_collating_element(_M_cur_value);
- return;
- }
- if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
- {
- __matcher._M_add_equivalence_class(_M_cur_value);
- return;
- }
- if (_M_match_token(_ScannerT::_S_token_char_class_name))
- {
- __matcher._M_add_character_class(_M_cur_value);
- return;
- }
- if (_M_match_token(_ScannerT::_S_token_collelem_single)) // [a
- {
- auto __ch = _M_cur_value[0];
- if (_M_match_token(_ScannerT::_S_token_dash)) // [a-
- {
- // If the dash is the last character in the bracket expression,
- // it is not special.
- if (_M_scanner._M_token() == _ScannerT::_S_token_bracket_end)
- __matcher._M_add_char(_M_cur_value[0]); // [a-] <=> [a\-]
- else // [a-z]
- {
- if (!_M_match_token(_ScannerT::_S_token_collelem_single))
- __throw_regex_error(regex_constants::error_range);
- __matcher._M_make_range(__ch, _M_cur_value[0]);
- }
- }
- else // [a]
- __matcher._M_add_char(__ch);
- return;
- }
- __throw_regex_error(regex_constants::error_brack);
- }
-
- template<typename _InIter, typename _TraitsT>
- int
- _Compiler<_InIter, _TraitsT>::
- _M_cur_int_value(int __radix)
- {
- int __v = 0;
- for (typename _StringT::size_type __i = 0;
- __i < _M_cur_value.length(); ++__i)
- __v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix);
- return __v;
- }
-
- template<typename _InIter, typename _TraitsT>
- _AutomatonPtr
- __compile(const _InIter& __b, const _InIter& __e, _TraitsT& __t,
- regex_constants::syntax_option_type __f)
- { return _AutomatonPtr(new _Nfa(_Compiler<_InIter, _TraitsT>(__b, __e, __t,
- __f)._M_nfa())); }
-
//@} regex-detail
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace __detail
} // namespace std
+
+#include <bits/regex_compiler.tcc>
--- /dev/null
+// class template regex -*- C++ -*-
+
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// Under Section 7 of GPL version 3, you are granted additional
+// permissions described in the GCC Runtime Library Exception, version
+// 3.1, as published by the Free Software Foundation.
+
+// You should have received a copy of the GNU General Public License and
+// a copy of the GCC Runtime Library Exception along with this program;
+// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+// <http://www.gnu.org/licenses/>.
+
+/**
+ * @file bits/regex_compiler.tcc
+ * This is an internal header file, included by other library headers.
+ * Do not attempt to use it directly. @headername{regex}
+ */
+
+namespace std _GLIBCXX_VISIBILITY(default)
+{
+namespace __detail
+{
+_GLIBCXX_BEGIN_NAMESPACE_VERSION
+
+ template<typename _BiIter>
+ void
+ _Scanner<_BiIter>::
+ _M_advance()
+ {
+ if (_M_current == _M_end)
+ {
+ _M_curToken = _S_token_eof;
+ return;
+ }
+
+ _CharT __c = *_M_current;
+ if (_M_state & _S_state_in_bracket)
+ {
+ _M_scan_in_bracket();
+ return;
+ }
+ if (_M_state & _S_state_in_brace)
+ {
+ _M_scan_in_brace();
+ return;
+ }
+#if 0
+ // TODO: re-enable line anchors when _M_assertion is implemented.
+ // See PR libstdc++/47724
+ else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^'))
+ {
+ _M_curToken = _S_token_line_begin;
+ ++_M_current;
+ return;
+ }
+ else if (__c == _M_ctype.widen('$'))
+ {
+ _M_curToken = _S_token_line_end;
+ ++_M_current;
+ return;
+ }
+#endif
+ else if (__c == _M_ctype.widen('.'))
+ {
+ _M_curToken = _S_token_anychar;
+ ++_M_current;
+ return;
+ }
+ else if (__c == _M_ctype.widen('*'))
+ {
+ _M_curToken = _S_token_closure0;
+ ++_M_current;
+ return;
+ }
+ else if (__c == _M_ctype.widen('+'))
+ {
+ _M_curToken = _S_token_closure1;
+ ++_M_current;
+ return;
+ }
+ else if (__c == _M_ctype.widen('|'))
+ {
+ _M_curToken = _S_token_or;
+ ++_M_current;
+ return;
+ }
+ else if (__c == _M_ctype.widen('['))
+ {
+ if (*++_M_current == _M_ctype.widen('^'))
+ {
+ _M_curToken = _S_token_bracket_inverse_begin;
+ ++_M_current;
+ }
+ else
+ _M_curToken = _S_token_bracket_begin;
+ _M_state |= _S_state_in_bracket;
+ return;
+ }
+ else if (__c == _M_ctype.widen('\\'))
+ {
+ _M_eat_escape();
+ return;
+ }
+ else if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
+ {
+ if (__c == _M_ctype.widen('('))
+ {
+ _M_curToken = _S_token_subexpr_begin;
+ ++_M_current;
+ return;
+ }
+ else if (__c == _M_ctype.widen(')'))
+ {
+ _M_curToken = _S_token_subexpr_end;
+ ++_M_current;
+ return;
+ }
+ else if (__c == _M_ctype.widen('{'))
+ {
+ _M_curToken = _S_token_interval_begin;
+ _M_state |= _S_state_in_brace;
+ ++_M_current;
+ return;
+ }
+ }
+
+ _M_curToken = _S_token_ord_char;
+ _M_curValue.assign(1, __c);
+ ++_M_current;
+ }
+
+ template<typename _BiIter>
+ void
+ _Scanner<_BiIter>::
+ _M_scan_in_brace()
+ {
+ if (_M_ctype.is(_CtypeT::digit, *_M_current))
+ {
+ _M_curToken = _S_token_dup_count;
+ _M_curValue.assign(1, *_M_current);
+ ++_M_current;
+ while (_M_current != _M_end
+ && _M_ctype.is(_CtypeT::digit, *_M_current))
+ {
+ _M_curValue += *_M_current;
+ ++_M_current;
+ }
+ return;
+ }
+ else if (*_M_current == _M_ctype.widen(','))
+ {
+ _M_curToken = _S_token_comma;
+ ++_M_current;
+ return;
+ }
+ if (_M_flags & (regex_constants::basic | regex_constants::grep))
+ {
+ if (*_M_current == _M_ctype.widen('\\'))
+ _M_eat_escape();
+ }
+ else
+ {
+ if (*_M_current == _M_ctype.widen('}'))
+ {
+ _M_curToken = _S_token_interval_end;
+ _M_state &= ~_S_state_in_brace;
+ ++_M_current;
+ return;
+ }
+ }
+ }
+
+ template<typename _BiIter>
+ void
+ _Scanner<_BiIter>::
+ _M_scan_in_bracket()
+ {
+ if (*_M_current == _M_ctype.widen('['))
+ {
+ ++_M_current;
+ if (_M_current == _M_end)
+ {
+ _M_curToken = _S_token_eof;
+ return;
+ }
+
+ if (*_M_current == _M_ctype.widen('.'))
+ {
+ _M_curToken = _S_token_collsymbol;
+ _M_eat_collsymbol();
+ return;
+ }
+ else if (*_M_current == _M_ctype.widen(':'))
+ {
+ _M_curToken = _S_token_char_class_name;
+ _M_eat_charclass();
+ return;
+ }
+ else if (*_M_current == _M_ctype.widen('='))
+ {
+ _M_curToken = _S_token_equiv_class_name;
+ _M_eat_equivclass();
+ return;
+ }
+ }
+ else if (*_M_current == _M_ctype.widen('-'))
+ {
+ _M_curToken = _S_token_dash;
+ ++_M_current;
+ return;
+ }
+ else if (*_M_current == _M_ctype.widen(']'))
+ {
+ _M_curToken = _S_token_bracket_end;
+ _M_state &= ~_S_state_in_bracket;
+ ++_M_current;
+ return;
+ }
+ else if (*_M_current == _M_ctype.widen('\\'))
+ {
+ _M_eat_escape();
+ return;
+ }
+ _M_curToken = _S_token_collelem_single;
+ _M_curValue.assign(1, *_M_current);
+ ++_M_current;
+ }
+
+ // TODO Complete it.
+ template<typename _BiIter>
+ void
+ _Scanner<_BiIter>::
+ _M_eat_escape()
+ {
+ ++_M_current;
+ if (_M_current == _M_end)
+ {
+ _M_curToken = _S_token_eof;
+ return;
+ }
+ _CharT __c = *_M_current;
+ ++_M_current;
+
+ if (__c == _M_ctype.widen('('))
+ {
+ if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
+ {
+ _M_curToken = _S_token_ord_char;
+ _M_curValue.assign(1, __c);
+ }
+ else
+ _M_curToken = _S_token_subexpr_begin;
+ }
+ else if (__c == _M_ctype.widen(')'))
+ {
+ if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
+ {
+ _M_curToken = _S_token_ord_char;
+ _M_curValue.assign(1, __c);
+ }
+ else
+ _M_curToken = _S_token_subexpr_end;
+ }
+ else if (__c == _M_ctype.widen('{'))
+ {
+ if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
+ {
+ _M_curToken = _S_token_ord_char;
+ _M_curValue.assign(1, __c);
+ }
+ else
+ {
+ _M_curToken = _S_token_interval_begin;
+ _M_state |= _S_state_in_brace;
+ }
+ }
+ else if (__c == _M_ctype.widen('}'))
+ {
+ if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
+ {
+ _M_curToken = _S_token_ord_char;
+ _M_curValue.assign(1, __c);
+ }
+ else
+ {
+ if (!(_M_state && _S_state_in_brace))
+ __throw_regex_error(regex_constants::error_badbrace);
+ _M_state &= ~_S_state_in_brace;
+ _M_curToken = _S_token_interval_end;
+ }
+ }
+ else if (__c == _M_ctype.widen('x'))
+ {
+ ++_M_current;
+ if (_M_current == _M_end)
+ {
+ _M_curToken = _S_token_eof;
+ return;
+ }
+ if (_M_ctype.is(_CtypeT::digit, *_M_current))
+ {
+ _M_curValue.assign(1, *_M_current);
+ ++_M_current;
+ if (_M_current == _M_end)
+ {
+ _M_curToken = _S_token_eof;
+ return;
+ }
+ if (_M_ctype.is(_CtypeT::digit, *_M_current))
+ {
+ _M_curValue += *_M_current;
+ ++_M_current;
+ return;
+ }
+ }
+ }
+ else if (__c == _M_ctype.widen('^')
+ || __c == _M_ctype.widen('.')
+ || __c == _M_ctype.widen('*')
+ || __c == _M_ctype.widen('$')
+ || __c == _M_ctype.widen('\\'))
+ {
+ _M_curToken = _S_token_ord_char;
+ _M_curValue.assign(1, __c);
+ }
+ else if (_M_ctype.is(_CtypeT::digit, __c))
+ {
+ _M_curToken = _S_token_backref;
+ _M_curValue.assign(1, __c);
+ }
+ else if (_M_state & _S_state_in_bracket)
+ {
+ if (__c == _M_ctype.widen('-')
+ || __c == _M_ctype.widen('[')
+ || __c == _M_ctype.widen(']'))
+ {
+ _M_curToken = _S_token_ord_char;
+ _M_curValue.assign(1, __c);
+ }
+ else if ((_M_flags & regex_constants::ECMAScript)
+ && __c == _M_ctype.widen('b'))
+ {
+ _M_curToken = _S_token_ord_char;
+ _M_curValue.assign(1, _M_ctype.widen(' '));
+ }
+ else
+ __throw_regex_error(regex_constants::error_escape);
+ }
+ else
+ __throw_regex_error(regex_constants::error_escape);
+ }
+
+ // Eats a character class or throwns an exception.
+ // current point to ':' delimiter on entry, char after ']' on return
+ template<typename _BiIter>
+ void
+ _Scanner<_BiIter>::
+ _M_eat_charclass()
+ {
+ ++_M_current; // skip ':'
+ if (_M_current == _M_end)
+ __throw_regex_error(regex_constants::error_ctype);
+ for (_M_curValue.clear();
+ _M_current != _M_end && *_M_current != _M_ctype.widen(':');
+ ++_M_current)
+ _M_curValue += *_M_current;
+ if (_M_current == _M_end)
+ __throw_regex_error(regex_constants::error_ctype);
+ ++_M_current; // skip ':'
+ if (*_M_current != _M_ctype.widen(']'))
+ __throw_regex_error(regex_constants::error_ctype);
+ ++_M_current; // skip ']'
+ }
+
+
+ template<typename _BiIter>
+ void
+ _Scanner<_BiIter>::
+ _M_eat_equivclass()
+ {
+ ++_M_current; // skip '='
+ if (_M_current == _M_end)
+ __throw_regex_error(regex_constants::error_collate);
+ for (_M_curValue.clear();
+ _M_current != _M_end && *_M_current != _M_ctype.widen('=');
+ ++_M_current)
+ _M_curValue += *_M_current;
+ if (_M_current == _M_end)
+ __throw_regex_error(regex_constants::error_collate);
+ ++_M_current; // skip '='
+ if (*_M_current != _M_ctype.widen(']'))
+ __throw_regex_error(regex_constants::error_collate);
+ ++_M_current; // skip ']'
+ }
+
+
+ template<typename _BiIter>
+ void
+ _Scanner<_BiIter>::
+ _M_eat_collsymbol()
+ {
+ ++_M_current; // skip '.'
+ if (_M_current == _M_end)
+ __throw_regex_error(regex_constants::error_collate);
+ for (_M_curValue.clear();
+ _M_current != _M_end && *_M_current != _M_ctype.widen('.');
+ ++_M_current)
+ _M_curValue += *_M_current;
+ if (_M_current == _M_end)
+ __throw_regex_error(regex_constants::error_collate);
+ ++_M_current; // skip '.'
+ if (*_M_current != _M_ctype.widen(']'))
+ __throw_regex_error(regex_constants::error_collate);
+ ++_M_current; // skip ']'
+ }
+
+#ifdef _GLIBCXX_DEBUG
+ template<typename _BiIter>
+ std::ostream&
+ _Scanner<_BiIter>::
+ _M_print(std::ostream& ostr)
+ {
+ switch (_M_curToken)
+ {
+ case _S_token_anychar:
+ ostr << "any-character\n";
+ break;
+ case _S_token_backref:
+ ostr << "backref\n";
+ break;
+ case _S_token_bracket_begin:
+ ostr << "bracket-begin\n";
+ break;
+ case _S_token_bracket_inverse_begin:
+ ostr << "bracket-inverse-begin\n";
+ break;
+ case _S_token_bracket_end:
+ ostr << "bracket-end\n";
+ break;
+ case _S_token_char_class_name:
+ ostr << "char-class-name \"" << _M_curValue << "\"\n";
+ break;
+ case _S_token_closure0:
+ ostr << "closure0\n";
+ break;
+ case _S_token_closure1:
+ ostr << "closure1\n";
+ break;
+ case _S_token_collelem_multi:
+ ostr << "coll-elem-multi \"" << _M_curValue << "\"\n";
+ break;
+ case _S_token_collelem_single:
+ ostr << "coll-elem-single \"" << _M_curValue << "\"\n";
+ break;
+ case _S_token_collsymbol:
+ ostr << "collsymbol \"" << _M_curValue << "\"\n";
+ break;
+ case _S_token_comma:
+ ostr << "comma\n";
+ break;
+ case _S_token_dash:
+ ostr << "dash\n";
+ break;
+ case _S_token_dup_count:
+ ostr << "dup count: " << _M_curValue << "\n";
+ break;
+ case _S_token_eof:
+ ostr << "EOF\n";
+ break;
+ case _S_token_equiv_class_name:
+ ostr << "equiv-class-name \"" << _M_curValue << "\"\n";
+ break;
+ case _S_token_interval_begin:
+ ostr << "interval begin\n";
+ break;
+ case _S_token_interval_end:
+ ostr << "interval end\n";
+ break;
+ case _S_token_line_begin:
+ ostr << "line begin\n";
+ break;
+ case _S_token_line_end:
+ ostr << "line end\n";
+ break;
+ case _S_token_opt:
+ ostr << "opt\n";
+ break;
+ case _S_token_or:
+ ostr << "or\n";
+ break;
+ case _S_token_ord_char:
+ ostr << "ordinary character: \"" << _M_value() << "\"\n";
+ break;
+ case _S_token_subexpr_begin:
+ ostr << "subexpr begin\n";
+ break;
+ case _S_token_subexpr_end:
+ ostr << "subexpr end\n";
+ break;
+ case _S_token_word_begin:
+ ostr << "word begin\n";
+ break;
+ case _S_token_word_end:
+ ostr << "word end\n";
+ break;
+ case _S_token_unknown:
+ ostr << "-- unknown token --\n";
+ break;
+ default:
+ _GLIBCXX_DEBUG_ASSERT(false);
+ }
+ return ostr;
+ }
+#endif
+
+ template<typename _InputIter, typename _CharT, typename _TraitsT>
+ _Compiler<_InputIter, _CharT, _TraitsT>::
+ _Compiler(_InputIter __b, _InputIter __e,
+ const _TraitsT& __traits, _FlagT __flags)
+ : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
+ _M_state_store(__flags), _M_flags(__flags)
+ {
+ _StateSeqT __r(_M_state_store,
+ _M_state_store._M_insert_subexpr_begin());
+ _M_disjunction();
+ if (!_M_stack.empty())
+ {
+ __r._M_append(_M_stack.top());
+ _M_stack.pop();
+ }
+ __r._M_append(_M_state_store._M_insert_subexpr_end());
+ __r._M_append(_M_state_store._M_insert_accept());
+ }
+
+ template<typename _InputIter, typename _CharT, typename _TraitsT>
+ bool
+ _Compiler<_InputIter, _CharT, _TraitsT>::
+ _M_match_token(_Compiler<_InputIter, _CharT, _TraitsT>::_TokenT token)
+ {
+ if (token == _M_scanner._M_token())
+ {
+ _M_cur_value = _M_scanner._M_value();
+ _M_scanner._M_advance();
+ return true;
+ }
+ return false;
+ }
+
+ template<typename _InputIter, typename _CharT, typename _TraitsT>
+ void
+ _Compiler<_InputIter, _CharT, _TraitsT>::
+ _M_disjunction()
+ {
+ this->_M_alternative();
+ if (_M_match_token(_ScannerT::_S_token_or))
+ {
+ _StateSeqT __alt1 = _M_stack.top(); _M_stack.pop();
+ this->_M_disjunction();
+ _StateSeqT __alt2 = _M_stack.top(); _M_stack.pop();
+ _M_stack.push(_StateSeqT(__alt1, __alt2));
+ }
+ }
+
+ template<typename _InputIter, typename _CharT, typename _TraitsT>
+ void
+ _Compiler<_InputIter, _CharT, _TraitsT>::
+ _M_alternative()
+ {
+ if (this->_M_term())
+ {
+ _StateSeqT __re = _M_stack.top(); _M_stack.pop();
+ this->_M_alternative();
+ if (!_M_stack.empty())
+ {
+ __re._M_append(_M_stack.top());
+ _M_stack.pop();
+ }
+ _M_stack.push(__re);
+ }
+ }
+
+ template<typename _InputIter, typename _CharT, typename _TraitsT>
+ bool
+ _Compiler<_InputIter, _CharT, _TraitsT>::
+ _M_term()
+ {
+ if (this->_M_assertion())
+ return true;
+ if (this->_M_atom())
+ {
+ this->_M_quantifier();
+ return true;
+ }
+ return false;
+ }
+
+ template<typename _InputIter, typename _CharT, typename _TraitsT>
+ bool
+ _Compiler<_InputIter, _CharT, _TraitsT>::
+ _M_assertion()
+ {
+ if (_M_match_token(_ScannerT::_S_token_line_begin))
+ {
+ // __m.push(_Matcher::_S_opcode_line_begin);
+ return true;
+ }
+ if (_M_match_token(_ScannerT::_S_token_line_end))
+ {
+ // __m.push(_Matcher::_S_opcode_line_end);
+ return true;
+ }
+ if (_M_match_token(_ScannerT::_S_token_word_begin))
+ {
+ // __m.push(_Matcher::_S_opcode_word_begin);
+ return true;
+ }
+ if (_M_match_token(_ScannerT::_S_token_word_end))
+ {
+ // __m.push(_Matcher::_S_opcode_word_end);
+ return true;
+ }
+ return false;
+ }
+
+ template<typename _InputIter, typename _CharT, typename _TraitsT>
+ void
+ _Compiler<_InputIter, _CharT, _TraitsT>::
+ _M_quantifier()
+ {
+ if (_M_match_token(_ScannerT::_S_token_closure0))
+ {
+ if (_M_stack.empty())
+ __throw_regex_error(regex_constants::error_badrepeat);
+ _StateSeqT __r(_M_stack.top(), -1);
+ __r._M_append(__r._M_front());
+ _M_stack.pop();
+ _M_stack.push(__r);
+ return;
+ }
+ if (_M_match_token(_ScannerT::_S_token_closure1))
+ {
+ if (_M_stack.empty())
+ __throw_regex_error(regex_constants::error_badrepeat);
+ _StateSeqT __r(_M_state_store,
+ _M_state_store.
+ _M_insert_alt(_S_invalid_state_id,
+ _M_stack.top()._M_front()));
+ _M_stack.top()._M_append(__r);
+ return;
+ }
+ if (_M_match_token(_ScannerT::_S_token_opt))
+ {
+ if (_M_stack.empty())
+ __throw_regex_error(regex_constants::error_badrepeat);
+ _StateSeqT __r(_M_stack.top(), -1);
+ _M_stack.pop();
+ _M_stack.push(__r);
+ return;
+ }
+ if (_M_match_token(_ScannerT::_S_token_interval_begin))
+ {
+ if (_M_stack.empty())
+ __throw_regex_error(regex_constants::error_badrepeat);
+ if (!_M_match_token(_ScannerT::_S_token_dup_count))
+ __throw_regex_error(regex_constants::error_badbrace);
+ _StateSeqT __r(_M_stack.top());
+ int __min_rep = _M_cur_int_value(10);
+ for (int __i = 1; __i < __min_rep; ++__i)
+ _M_stack.top()._M_append(__r._M_clone());
+ if (_M_match_token(_ScannerT::_S_token_comma))
+ if (_M_match_token(_ScannerT::_S_token_dup_count))
+ {
+ int __n = _M_cur_int_value(10) - __min_rep;
+ if (__n < 0)
+ __throw_regex_error(regex_constants::error_badbrace);
+ for (int __i = 0; __i < __n; ++__i)
+ {
+ _StateSeqT __r(_M_state_store,
+ _M_state_store.
+ _M_insert_alt(_S_invalid_state_id,
+ _M_stack.top()._M_front()));
+ _M_stack.top()._M_append(__r);
+ }
+ }
+ else
+ {
+ _StateSeqT __r(_M_stack.top(), -1);
+ __r._M_push_back(__r._M_front());
+ _M_stack.pop();
+ _M_stack.push(__r);
+ }
+ if (!_M_match_token(_ScannerT::_S_token_interval_end))
+ __throw_regex_error(regex_constants::error_brace);
+ return;
+ }
+ }
+
+ template<typename _InputIter, typename _CharT, typename _TraitsT>
+ bool
+ _Compiler<_InputIter, _CharT, _TraitsT>::
+ _M_atom()
+ {
+ if (_M_match_token(_ScannerT::_S_token_anychar))
+ {
+ const static auto&
+ __any_matcher = [](_CharT) -> bool
+ { return true; };
+
+ _M_stack.push(_StateSeqT(_M_state_store,
+ _M_state_store._M_insert_matcher
+ (__any_matcher)));
+ return true;
+ }
+ if (_M_match_token(_ScannerT::_S_token_ord_char))
+ {
+ auto __c = _M_cur_value[0];
+ __detail::_Matcher<_CharT> f;
+ if (_M_flags & regex_constants::icase)
+ {
+ auto __traits = this->_M_traits;
+ __c = __traits.translate_nocase(__c);
+ f = [__traits, __c](_CharT __ch) -> bool
+ { return __traits.translate_nocase(__ch) == __c; };
+ }
+ else
+ f = [__c](_CharT __ch) -> bool
+ { return __ch == __c; };
+
+ _M_stack.push(_StateSeqT(_M_state_store,
+ _M_state_store._M_insert_matcher(f)));
+ return true;
+ }
+ if (_M_match_token(_ScannerT::_S_token_backref))
+ {
+ // __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
+ _M_state_store._M_set_backref(true);
+ //return true;
+ }
+ if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
+ {
+ int __mark = _M_state_store._M_sub_count();
+ _StateSeqT __r(_M_state_store,
+ _M_state_store.
+ _M_insert_subexpr_begin());
+ this->_M_disjunction();
+ if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
+ __throw_regex_error(regex_constants::error_paren);
+ if (!_M_stack.empty())
+ {
+ __r._M_append(_M_stack.top());
+ _M_stack.pop();
+ }
+ __r._M_append(_M_state_store._M_insert_subexpr_end());
+ _M_stack.push(__r);
+ return true;
+ }
+ return _M_bracket_expression();
+ }
+
+ template<typename _InputIter, typename _CharT, typename _TraitsT>
+ bool
+ _Compiler<_InputIter, _CharT, _TraitsT>::
+ _M_bracket_expression()
+ {
+ bool __inverse =
+ _M_match_token(_ScannerT::_S_token_bracket_inverse_begin);
+ if (!(__inverse || _M_match_token(_ScannerT::_S_token_bracket_begin)))
+ return false;
+ _BMatcherT __matcher( __inverse, _M_traits, _M_flags);
+ // special case: only if _not_ chr first after
+ // '[' or '[^' or if ECMAscript
+ if (!_M_bracket_list(__matcher) // list is empty
+ && !(_M_flags & regex_constants::ECMAScript))
+ __throw_regex_error(regex_constants::error_brack);
+ _M_stack.push(_StateSeqT(_M_state_store,
+ _M_state_store._M_insert_matcher(__matcher)));
+ return true;
+ }
+
+ template<typename _InputIter, typename _CharT, typename _TraitsT>
+ bool // list is non-empty
+ _Compiler<_InputIter, _CharT, _TraitsT>::
+ _M_bracket_list(_BMatcherT& __matcher)
+ {
+ if (_M_match_token(_ScannerT::_S_token_bracket_end))
+ return false;
+ _M_expression_term(__matcher);
+ _M_bracket_list(__matcher);
+ return true;
+ }
+
+ template<typename _InputIter, typename _CharT, typename _TraitsT>
+ void
+ _Compiler<_InputIter, _CharT, _TraitsT>::
+ _M_expression_term(_BMatcherT& __matcher)
+ {
+ if (_M_match_token(_ScannerT::_S_token_collsymbol))
+ {
+ __matcher._M_add_collating_element(_M_cur_value);
+ return;
+ }
+ if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
+ {
+ __matcher._M_add_equivalence_class(_M_cur_value);
+ return;
+ }
+ if (_M_match_token(_ScannerT::_S_token_char_class_name))
+ {
+ __matcher._M_add_character_class(_M_cur_value);
+ return;
+ }
+ if (_M_match_token(_ScannerT::_S_token_collelem_single)) // [a
+ {
+ auto __ch = _M_cur_value[0];
+ if (_M_match_token(_ScannerT::_S_token_dash)) // [a-
+ {
+ // If the dash is the last character in the bracket expression,
+ // it is not special.
+ if (_M_scanner._M_token() == _ScannerT::_S_token_bracket_end)
+ __matcher._M_add_char(_M_cur_value[0]); // [a-] <=> [a\-]
+ else // [a-z]
+ {
+ if (!_M_match_token(_ScannerT::_S_token_collelem_single))
+ __throw_regex_error(regex_constants::error_range);
+ __matcher._M_make_range(__ch, _M_cur_value[0]);
+ }
+ }
+ else // [a]
+ __matcher._M_add_char(__ch);
+ return;
+ }
+ __throw_regex_error(regex_constants::error_brack);
+ }
+
+ template<typename _InputIter, typename _CharT, typename _TraitsT>
+ int
+ _Compiler<_InputIter, _CharT, _TraitsT>::
+ _M_cur_int_value(int __radix)
+ {
+ int __v = 0;
+ for (typename _StringT::size_type __i = 0;
+ __i < _M_cur_value.length(); ++__i)
+ __v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix);
+ return __v;
+ }
+
+ template<typename _CharT, typename _TraitsT>
+ bool _BracketMatcher<_CharT, _TraitsT>::
+ operator()(_CharT __ch) const
+ {
+ auto __oldch = __ch;
+ if (_M_flags & regex_constants::collate)
+ if (_M_is_icase())
+ __ch = _M_traits.translate_nocase(__ch);
+ else
+ __ch = _M_traits.translate(__ch);
+
+ bool __ret = false;
+ for (auto __c : _M_char_set)
+ if (__c == __ch)
+ {
+ __ret = true;
+ break;
+ }
+ if (!__ret && _M_traits.isctype(__oldch, _M_class_set))
+ __ret = true;
+ else
+ {
+ _StringT __s = _M_get_str(__ch);
+ for (auto& __it : _M_range_set)
+ if (__it.first <= __s && __s <= __it.second)
+ {
+ __ret = true;
+ break;
+ }
+ }
+ if (_M_is_non_matching)
+ __ret = !__ret;
+ return __ret;
+ }
+
+_GLIBCXX_END_NAMESPACE_VERSION
+} // namespace __detail
+} // namespace
*/
typedef unsigned int syntax_option_type;
- /**
+ /**
* Specifies that the matching of regular expressions against a character
* sequence shall be performed without regard to case.
*/
* identical to syntax_option_type extended, except that C-style escape
* sequences are supported. These sequences are:
* \\\\, \\a, \\b, \\f, \\n, \\r, \\t , \\v, \\', ',
- * and \\ddd (where ddd is one, two, or three octal digits).
+ * and \\ddd (where ddd is one, two, or three octal digits).
*/
constexpr syntax_option_type awk = 1 << _S_awk;
/**
* Specifies that the grammar recognized by the regular expression engine is
* that used by POSIX utility grep when given the -E option in
- * IEEE Std 1003.1-2001. This option is identical to syntax_option_type
+ * IEEE Std 1003.1-2001. This option is identical to syntax_option_type
* extended, except that newlines are treated as whitespace.
*/
constexpr syntax_option_type egrep = 1 << _S_egrep;
* expression shall not match [last, last).
*/
constexpr match_flag_type match_not_eol = 1 << _S_not_eol;
-
+
/**
* The expression \\b is not matched against the sub-sequence
* [first,first).
*/
constexpr match_flag_type match_not_bow = 1 << _S_not_bow;
-
+
/**
* The expression \\b should not be matched against the sub-sequence
* [last,last).
*/
constexpr match_flag_type match_not_eow = 1 << _S_not_eow;
-
+
/**
* If more than one match is possible then any match is an acceptable
* result.
*/
constexpr match_flag_type match_any = 1 << _S_any;
-
+
/**
* The expression does not match an empty sequence.
*/
constexpr match_flag_type match_not_null = 1 << _S_not_null;
-
+
/**
* The expression only matches a sub-sequence that begins at first .
*/
constexpr match_flag_type match_continuous = 1 << _S_continuous;
-
+
/**
* --first is a valid iterator position. When this flag is set then the
* flags match_not_bol and match_not_bow are ignored by the regular
* operations all non-overlapping occurrences of the regular expression
* are located and replaced, and sections of the input that did not match
* the expression are copied unchanged to the output string.
- *
+ *
* Format strings (from ECMA-262 [15.5.4.11]):
* @li $$ The dollar-sign itself ($)
* @li $& The matched substring.
+++ /dev/null
-// class template regex -*- C++ -*-
-
-// Copyright (C) 2010-2013 Free Software Foundation, Inc.
-//
-// This file is part of the GNU ISO C++ Library. This library is free
-// software; you can redistribute it and/or modify it under the
-// terms of the GNU General Public License as published by the
-// Free Software Foundation; either version 3, or (at your option)
-// any later version.
-
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU General Public License for more details.
-
-// Under Section 7 of GPL version 3, you are granted additional
-// permissions described in the GCC Runtime Library Exception, version
-// 3.1, as published by the Free Software Foundation.
-
-// You should have received a copy of the GNU General Public License and
-// a copy of the GCC Runtime Library Exception along with this program;
-// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
-// <http://www.gnu.org/licenses/>.
-
-/**
- * @file bits/regex_cursor.h
- * This is an internal header file, included by other library headers.
- * Do not attempt to use it directly. @headername{regex}
- */
-
-namespace std _GLIBCXX_VISIBILITY(default)
-{
-namespace __detail
-{
-_GLIBCXX_BEGIN_NAMESPACE_VERSION
-
- /**
- * @defgroup regex-detail Base and Implementation Classes
- * @ingroup regex
- * @{
- */
-
- /// ABC for pattern matching
- struct _PatternCursor
- {
- virtual ~_PatternCursor() { };
- virtual void _M_next() = 0;
- virtual void _M_prev() = 0;
- virtual bool _M_at_end() const = 0;
- };
-
- /// Provides a cursor into the specific target string.
- template<typename _FwdIterT>
- class _SpecializedCursor
- : public _PatternCursor
- {
- public:
- _SpecializedCursor(const _FwdIterT& __b, const _FwdIterT __e)
- : _M_b(__b), _M_c(__b), _M_e(__e)
- { }
-
- typename std::iterator_traits<_FwdIterT>::value_type
- _M_current() const
- { return *_M_c; }
-
- void
- _M_next()
- { ++_M_c; }
-
- void
- _M_prev()
- { --_M_c; }
-
- _FwdIterT
- _M_pos() const
- { return _M_c; }
-
- const _FwdIterT&
- _M_begin() const
- { return _M_b; }
-
- const _FwdIterT&
- _M_end() const
- { return _M_e; }
-
- bool
- _M_at_end() const
- { return _M_c == _M_e; }
-
- private:
- _FwdIterT _M_b;
- _FwdIterT _M_c;
- _FwdIterT _M_e;
- };
-
- // Helper function to create a cursor specialized for an iterator class.
- template<typename _FwdIterT>
- inline _SpecializedCursor<_FwdIterT>
- __cursor(const _FwdIterT& __b, const _FwdIterT __e)
- { return _SpecializedCursor<_FwdIterT>(__b, __e); }
-
- //@} regex-detail
-_GLIBCXX_END_NAMESPACE_VERSION
-} // namespace __detail
-} // namespace
* @name 5.3 Error Types
*/
//@{
-
+
enum error_type
{
_S_error_collate,
--- /dev/null
+// class template regex -*- C++ -*-
+
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// Under Section 7 of GPL version 3, you are granted additional
+// permissions described in the GCC Runtime Library Exception, version
+// 3.1, as published by the Free Software Foundation.
+
+// You should have received a copy of the GNU General Public License and
+// a copy of the GCC Runtime Library Exception along with this program;
+// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+// <http://www.gnu.org/licenses/>.
+
+/**
+ * @file bits/regex_executor.h
+ * This is an internal header file, included by other library headers.
+ * Do not attempt to use it directly. @headername{regex}
+ */
+
+namespace std _GLIBCXX_VISIBILITY(default)
+{
+_GLIBCXX_BEGIN_NAMESPACE_VERSION
+ template<typename, typename>
+ class basic_regex;
+
+ template<typename, typename>
+ class match_results;
+_GLIBCXX_END_NAMESPACE_VERSION
+
+namespace __detail
+{
+_GLIBCXX_BEGIN_NAMESPACE_VERSION
+
+ /**
+ * @addtogroup regex-detail
+ * @{
+ */
+
+ template<typename _BiIter, typename _Alloc,
+ typename _CharT, typename _TraitsT>
+ class _Executor
+ {
+ public:
+ typedef match_results<_BiIter, _Alloc> _ResultsT;
+ typedef regex_constants::match_flag_type _FlagT;
+
+ virtual
+ ~_Executor()
+ { }
+
+ // Set matched when string exactly match the pattern.
+ virtual bool
+ _M_match() = 0;
+
+ // Set matched when some prefix of the string matches the pattern.
+ virtual bool
+ _M_search_from_first() = 0;
+
+ protected:
+ typedef typename _NFA<_CharT, _TraitsT>::_SizeT _SizeT;
+ _Executor(_BiIter __begin,
+ _BiIter __end,
+ _ResultsT& __results,
+ _FlagT __flags,
+ _SizeT __size)
+ : _M_current(__begin), _M_end(__end),
+ _M_results(__results), _M_flags(__flags)
+ {
+ __results.resize(__size + 2);
+ for (auto __it : __results)
+ __it.matched = false;
+ }
+
+ _BiIter _M_current;
+ _BiIter _M_end;
+ _ResultsT& _M_results;
+ _FlagT _M_flags;
+ };
+
+ template<typename _BiIter, typename _Alloc,
+ typename _CharT, typename _TraitsT>
+ class _DFSExecutor
+ : public _Executor<_BiIter, _Alloc, _CharT, _TraitsT>
+ {
+ public:
+ typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
+ typedef _NFA<_CharT, _TraitsT> _RegexT;
+ typedef typename _BaseT::_ResultsT _ResultsT;
+ typedef regex_constants::match_flag_type _FlagT;
+
+ _DFSExecutor(_BiIter __begin,
+ _BiIter __end,
+ _ResultsT& __results,
+ const _RegexT& __nfa,
+ _FlagT __flags)
+ : _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()),
+ _M_nfa(__nfa)
+ { }
+
+ bool
+ _M_match()
+ { return _M_dfs<true>(_M_nfa._M_start()); }
+
+ bool
+ _M_search_from_first()
+ { return _M_dfs<false>(_M_nfa._M_start()); }
+
+ private:
+ template<bool __match_mode>
+ bool
+ _M_dfs(_StateIdT __i);
+
+ const _RegexT& _M_nfa;
+ };
+
+ // It's essentially a variant of Single-Source-Shortest-Path problem, where,
+ // the matching results is the final distance and should be minimized.
+ // Instead of using Dijkstra Algorithm, I pick up the queue-optimizaed
+ // (BFS-like) Bellman-Ford algorithm,
+ // SPFA(http://en.wikipedia.org/wiki/Shortest_Path_Faster_Algorithm).
+ //
+ // Every entry of _M_covered saves the solution(grouping status) for every
+ // matching head. When states transfer, solutions will be compared and
+ // deduplicated(based on which greedy mode we have).
+ //
+ // Time complexity: O(_M_str_cur.size() * _M_nfa.size())
+ // Space complexity: O(_M_nfa.size() * _M_nfa.mark_count())
+ template<typename _BiIter, typename _Alloc,
+ typename _CharT, typename _TraitsT>
+ class _BFSExecutor
+ : public _Executor<_BiIter, _Alloc, _CharT, _TraitsT>
+ {
+ public:
+ typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
+ typedef _NFA<_CharT, _TraitsT> _RegexT;
+ typedef typename _BaseT::_ResultsT _ResultsT;
+ typedef std::unique_ptr<_ResultsT> _ResultsPtr;
+ typedef regex_constants::match_flag_type _FlagT;
+
+ _BFSExecutor(_BiIter __begin,
+ _BiIter __end,
+ _ResultsT& __results,
+ const _RegexT& __nfa,
+ _FlagT __flags)
+ : _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()),
+ _M_nfa(__nfa)
+ {
+ if (_M_nfa._M_start() != _S_invalid_state_id)
+ _M_covered[_M_nfa._M_start()] =
+ _ResultsPtr(new _ResultsT(this->_M_results));
+ _M_e_closure();
+ }
+
+ bool
+ _M_match()
+ { return _M_main_loop<true>(); }
+
+ bool
+ _M_search_from_first()
+ { return _M_main_loop<false>(); }
+
+ private:
+ template<bool __match_mode>
+ bool
+ _M_main_loop();
+
+ void
+ _M_e_closure();
+
+ void
+ _M_move();
+
+ bool
+ _M_match_less_than(_StateIdT __u, _StateIdT __v) const;
+
+ bool
+ _M_includes_some() const;
+
+ std::map<_StateIdT, _ResultsPtr> _M_covered;
+ const _RegexT& _M_nfa;
+ };
+
+ //@} regex-detail
+_GLIBCXX_END_NAMESPACE_VERSION
+} // namespace __detail
+} // namespace std
+
+#include <bits/regex_executor.tcc>
--- /dev/null
+// class template regex -*- C++ -*-
+
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// Under Section 7 of GPL version 3, you are granted additional
+// permissions described in the GCC Runtime Library Exception, version
+// 3.1, as published by the Free Software Foundation.
+
+// You should have received a copy of the GNU General Public License and
+// a copy of the GCC Runtime Library Exception along with this program;
+// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+// <http://www.gnu.org/licenses/>.
+
+/**
+ * @file bits/regex_executor.tcc
+ * This is an internal header file, included by other library headers.
+ * Do not attempt to use it directly. @headername{regex}
+ */
+
+namespace std _GLIBCXX_VISIBILITY(default)
+{
+namespace __detail
+{
+_GLIBCXX_BEGIN_NAMESPACE_VERSION
+
+ // TODO: This is too slow. Try to compile the NFA to a DFA.
+ template<typename _BiIter, typename _Alloc,
+ typename _CharT, typename _TraitsT>
+ template<bool __match_mode>
+ bool _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
+ _M_dfs(_StateIdT __i)
+ {
+ auto& __current = this->_M_current;
+ auto& __end = this->_M_end;
+ auto& __results = this->_M_results;
+ if (__i == _S_invalid_state_id)
+ // This is not that certain. Need deeper investigate.
+ return false;
+ const auto& __state = _M_nfa[__i];
+ bool __ret = false;
+ switch (__state._M_opcode)
+ {
+ case _S_opcode_alternative:
+ // Greedy mode by default. For non-greedy mode,
+ // swap _M_alt and _M_next.
+ // TODO: Add greedy mode option.
+ __ret = _M_dfs<__match_mode>(__state._M_alt)
+ || _M_dfs<__match_mode>(__state._M_next);
+ break;
+ case _S_opcode_subexpr_begin:
+ __results.at(__state._M_subexpr).first = __current;
+ __ret = _M_dfs<__match_mode>(__state._M_next);
+ break;
+ case _S_opcode_subexpr_end:
+ __ret = _M_dfs<__match_mode>(__state._M_next);
+ __results.at(__state._M_subexpr).second = __current;
+ __results.at(__state._M_subexpr).matched = __ret;
+ break;
+ case _S_opcode_match:
+ if (__current != __end && __state._M_matches(*__current))
+ {
+ ++__current;
+ __ret = _M_dfs<__match_mode>(__state._M_next);
+ --__current;
+ }
+ break;
+ case _S_opcode_accept:
+ if (__match_mode)
+ __ret = __current == __end;
+ else
+ __ret = true;
+ break;
+ default:
+ _GLIBCXX_DEBUG_ASSERT(false);
+ }
+ return __ret;
+ }
+
+ template<typename _BiIter, typename _Alloc,
+ typename _CharT, typename _TraitsT>
+ template<bool __match_mode>
+ bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
+ _M_main_loop()
+ {
+ while (this->_M_current != this->_M_end)
+ {
+ if (!__match_mode)
+ if (_M_includes_some())
+ return true;
+ _M_move();
+ ++this->_M_current;
+ _M_e_closure();
+ }
+ return _M_includes_some();
+ }
+
+ // The SPFA approach.
+ template<typename _BiIter, typename _Alloc,
+ typename _CharT, typename _TraitsT>
+ void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
+ _M_e_closure()
+ {
+ auto& __current = this->_M_current;
+ std::queue<_StateIdT> __q;
+ std::vector<bool> __in_q(_M_nfa.size(), false);
+ for (auto& __it : _M_covered)
+ {
+ __in_q[__it.first] = true;
+ __q.push(__it.first);
+ }
+ while (!__q.empty())
+ {
+ auto __u = __q.front();
+ __q.pop();
+ __in_q[__u] = false;
+ const auto& __state = _M_nfa[__u];
+
+ // Can be implemented using method, but there're too much arguments.
+ auto __add_visited_state = [&](_StateIdT __v)
+ {
+ if (__v == _S_invalid_state_id)
+ return;
+ if (_M_match_less_than(__u, __v))
+ {
+ _M_covered[__v] = _ResultsPtr(new _ResultsT(*_M_covered[__u]));
+ // if a state is updated, it's outgoing neighbors should be
+ // reconsidered too. Push them to the queue.
+ if (!__in_q[__v])
+ {
+ __in_q[__v] = true;
+ __q.push(__v);
+ }
+ }
+ };
+
+ switch (__state._M_opcode)
+ {
+ case _S_opcode_alternative:
+ __add_visited_state(__state._M_next);
+ __add_visited_state(__state._M_alt);
+ break;
+ case _S_opcode_subexpr_begin:
+ _M_covered[__u]->at(__state._M_subexpr).first = __current;
+ __add_visited_state(__state._M_next);
+ break;
+ case _S_opcode_subexpr_end:
+ _M_covered[__u]->at(__state._M_subexpr).second = __current;
+ _M_covered[__u]->at(__state._M_subexpr).matched = true;
+ __add_visited_state(__state._M_next);
+ break;
+ case _S_opcode_match:
+ break;
+ case _S_opcode_accept:
+ __add_visited_state(__state._M_next);
+ break;
+ default:
+ _GLIBCXX_DEBUG_ASSERT(false);
+ }
+ }
+ }
+
+ template<typename _BiIter, typename _Alloc,
+ typename _CharT, typename _TraitsT>
+ void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
+ _M_move()
+ {
+ decltype(_M_covered) __next;
+ for (auto& __it : _M_covered)
+ {
+ const auto& __state = _M_nfa[__it.first];
+ if (__state._M_opcode == _S_opcode_match
+ && __state._M_matches(*this->_M_current))
+ if (_M_match_less_than(__it.first, __state._M_next)
+ && __state._M_next != _S_invalid_state_id)
+ __next[__state._M_next] = move(__it.second);
+ }
+ _M_covered = move(__next);
+ }
+
+ template<typename _BiIter, typename _Alloc,
+ typename _CharT, typename _TraitsT>
+ bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
+ _M_match_less_than(_StateIdT __u, _StateIdT __v) const
+ {
+ if (_M_covered.count(__u) == 0)
+ return false;
+ if (_M_covered.count(__v) > 0)
+ return true;
+ // TODO: Greedy and Non-greedy support
+ return true;
+ }
+
+ template<typename _BiIter, typename _Alloc,
+ typename _CharT, typename _TraitsT>
+ bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
+ _M_includes_some() const
+ {
+ auto& __s = _M_nfa._M_final_states();
+ auto& __t = _M_covered;
+ if (__s.size() > 0 && __t.size() > 0)
+ {
+ auto __first = __s.begin();
+ auto __second = __t.begin();
+ while (__first != __s.end() && __second != __t.end())
+ {
+ if (*__first < __second->first)
+ ++__first;
+ else if (__second->first < *__first)
+ ++__second;
+ else
+ {
+ this->_M_results = *__second->second;
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ template<typename _BiIter, typename _Alloc,
+ typename _CharT, typename _TraitsT>
+ std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>>
+ __get_executor(_BiIter __b,
+ _BiIter __e,
+ match_results<_BiIter, _Alloc>& __m,
+ const basic_regex<_CharT, _TraitsT>& __re,
+ regex_constants::match_flag_type __flags)
+ {
+ typedef std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>>
+ _ExecutorPtr;
+ typedef _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _DFSExecutorT;
+ auto __p = std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
+ (__re._M_automaton);
+ if (__p->_M_has_backref)
+ return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, *__p, __flags));
+ return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, *__p, __flags));
+ }
+
+_GLIBCXX_END_NAMESPACE_VERSION
+} // namespace __detail
+} // namespace
+++ /dev/null
-// class template regex -*- C++ -*-
-
-// Copyright (C) 2010-2013 Free Software Foundation, Inc.
-//
-// This file is part of the GNU ISO C++ Library. This library is free
-// software; you can redistribute it and/or modify it under the
-// terms of the GNU General Public License as published by the
-// Free Software Foundation; either version 3, or (at your option)
-// any later version.
-
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU General Public License for more details.
-
-// Under Section 7 of GPL version 3, you are granted additional
-// permissions described in the GCC Runtime Library Exception, version
-// 3.1, as published by the Free Software Foundation.
-
-// You should have received a copy of the GNU General Public License and
-// a copy of the GCC Runtime Library Exception along with this program;
-// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
-// <http://www.gnu.org/licenses/>.
-
-/**
- * @file bits/regex_grep_matcher.h
- * This is an internal header file, included by other library headers.
- * Do not attempt to use it directly. @headername{regex}
- */
-
-namespace std _GLIBCXX_VISIBILITY(default)
-{
-_GLIBCXX_BEGIN_NAMESPACE_VERSION
-
- template<typename _BiIter>
- class sub_match;
-
- template<typename _Bi_iter, typename _Allocator>
- class match_results;
-
-_GLIBCXX_END_NAMESPACE_VERSION
-
-namespace __detail
-{
-_GLIBCXX_BEGIN_NAMESPACE_VERSION
-
- /**
- * @defgroup regex-detail Base and Implementation Classes
- * @ingroup regex
- * @{
- */
-
- /// A _Results facade specialized for wrapping a templated match_results.
- template<typename _FwdIterT, typename _Alloc>
- class _SpecializedResults
- : public _Results
- {
- public:
- _SpecializedResults(const _Automaton::_SizeT __size,
- const _SpecializedCursor<_FwdIterT>& __cursor,
- match_results<_FwdIterT, _Alloc>& __m);
-
- ~_SpecializedResults()
- {
- if (_M_managed)
- delete &_M_results;
- }
-
- private:
- _SpecializedResults(const _SpecializedResults& __rhs)
- : _M_results(*new match_results<_FwdIterT, _Alloc>(__rhs._M_results)),
- _M_managed(true)
- { }
-
- public:
- void
- _M_set_pos(int __i, int __j, const _PatternCursor& __pc);
-
- void
- _M_set_range(int __i, const _PatternCursor& __pc)
- {
- typedef const _SpecializedCursor<_FwdIterT>& _CursorT;
- _CursorT __c = static_cast<_CursorT>(__pc);
- _M_results.at(__i).first = __c._M_begin();
- _M_results.at(__i).second = __c._M_end();
- }
-
- void
- _M_set_matched(int __i, bool __is_matched)
- { _M_results.at(__i).matched = __is_matched; }
-
- std::unique_ptr<_Results>
- _M_clone() const
- { return unique_ptr<_Results>(new _SpecializedResults(*this)); }
-
- void
- _M_assign(const _Results& __rhs)
- {
- auto __r = static_cast<const _SpecializedResults*>(&__rhs);
- _M_results = __r->_M_results;
- }
-
- private:
- match_results<_FwdIterT, _Alloc>& _M_results;
- bool _M_managed;
- };
-
- template<typename _FwdIterT, typename _Alloc>
- _SpecializedResults<_FwdIterT, _Alloc>::
- _SpecializedResults(const _Automaton::_SizeT __size,
- const _SpecializedCursor<_FwdIterT>& __cursor,
- match_results<_FwdIterT, _Alloc>& __m)
- : _M_results(__m), _M_managed(false)
- {
- _M_results.clear();
- _M_results.reserve(__size + 2);
- _M_results.resize(__size);
- typename match_results<_FwdIterT, _Alloc>::value_type __sm;
- __sm.first = __sm.second = __cursor._M_begin();
- _M_results.push_back(__sm);
- __sm.first = __sm.second = __cursor._M_end();
- _M_results.push_back(__sm);
- }
-
- template<typename _FwdIterT, typename _Alloc>
- void
- _SpecializedResults<_FwdIterT, _Alloc>::
- _M_set_pos(int __i, int __j, const _PatternCursor& __pc)
- {
- typedef const _SpecializedCursor<_FwdIterT>& _CursorT;
- _CursorT __c = static_cast<_CursorT>(__pc);
- if (__j == 0)
- _M_results.at(__i).first = __c._M_pos();
- else
- _M_results.at(__i).second = __c._M_pos();
- }
-
- /// Executes a regular expression NFA/DFA over a range using a
- /// variant of the parallel execution algorithm featured in the grep
- /// utility, modified to use Laurikari tags.
- class _Grep_matcher
- {
- public:
- _Grep_matcher(_PatternCursor& __p,
- _Results& __r,
- const _AutomatonPtr& __automaton,
- regex_constants::match_flag_type __flags)
- : _M_nfa(static_pointer_cast<_Nfa>(__automaton)),
- _M_str_cur(__p), _M_results(__r)
- { }
-
- virtual
- ~_Grep_matcher()
- { }
-
- // Set matched when string exactly match the pattern.
- virtual bool
- _M_match() = 0;
-
- // Set matched when some prefix of the string matches the pattern.
- virtual bool
- _M_search_from_first() = 0;
-
- protected:
- const std::shared_ptr<_Nfa> _M_nfa;
- _PatternCursor& _M_str_cur;
- _Results& _M_results;
- };
-
- // Time complexity: exponential
- // Space complexity: O(_M_str_cur.size())
- // _M_dfs() take a state, along with current string cursor(_M_str_cur),
- // trying to match current state with current character.
- // Only _S_opcode_match will consume a character.
- class _DFSMatcher
- : public _Grep_matcher
- {
- public:
- _DFSMatcher(_PatternCursor& __p,
- _Results& __r,
- const _AutomatonPtr& __automaton,
- regex_constants::match_flag_type __flags)
- : _Grep_matcher(__p, __r, __automaton, __flags)
- { }
-
- bool
- _M_match()
- { return _M_dfs<true>(_M_nfa->_M_start()); }
-
- bool
- _M_search_from_first()
- { return _M_dfs<false>(_M_nfa->_M_start()); }
-
- private:
- template<bool __match_mode>
- bool
- _M_dfs(_StateIdT __i);
- };
-
- // It's essentially a variant of Single-Source-Shortest-Path problem, where,
- // the matching results is the final distance and should be minimized.
- // Instead of using Dijkstra Algorithm, I pick up the queue-optimizaed
- // (BFS-like) Bellman-Ford algorithm,
- // SPFA(http://en.wikipedia.org/wiki/Shortest_Path_Faster_Algorithm).
- //
- // Every entry of _M_current saves the solution(grouping status) for every
- // matching head. When states transfer, solutions will be compared and
- // deduplicated(based on which greedy mode we have).
- //
- // Time complexity: O(_M_str_cur.size() * _M_nfa.size())
- // Space complexity: O(_M_nfa.size() * _M_nfa.mark_count())
- class _BFSMatcher
- : public _Grep_matcher
- {
- public:
- _BFSMatcher(_PatternCursor& __p,
- _Results& __r,
- const _AutomatonPtr& __automaton,
- regex_constants::match_flag_type __flags)
- : _Grep_matcher(__p, __r, __automaton, __flags)
- {
- if (_M_nfa->_M_start() != _S_invalid_state_id)
- _M_current[_M_nfa->_M_start()] = _M_results._M_clone();
- _M_e_closure();
- }
-
- bool
- _M_match()
- { return _M_main_loop<true>(); }
-
- bool
- _M_search_from_first()
- { return _M_main_loop<false>(); }
-
- private:
- template<bool __match_mode>
- bool
- _M_main_loop();
-
- void
- _M_e_closure();
-
- void
- _M_move();
-
- bool
- _M_match_less_than(_StateIdT __u, _StateIdT __v) const;
-
- bool
- _M_includes_some() const;
-
- std::map<_StateIdT, std::unique_ptr<_Results>> _M_current;
- };
-
- //@} regex-detail
-_GLIBCXX_END_NAMESPACE_VERSION
-} // namespace __detail
-} // namespace std
-
-#include <bits/regex_grep_matcher.tcc>
+++ /dev/null
-// class template regex -*- C++ -*-
-
-// Copyright (C) 2010-2013 Free Software Foundation, Inc.
-//
-// This file is part of the GNU ISO C++ Library. This library is free
-// software; you can redistribute it and/or modify it under the
-// terms of the GNU General Public License as published by the
-// Free Software Foundation; either version 3, or (at your option)
-// any later version.
-
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU General Public License for more details.
-
-// Under Section 7 of GPL version 3, you are granted additional
-// permissions described in the GCC Runtime Library Exception, version
-// 3.1, as published by the Free Software Foundation.
-
-// You should have received a copy of the GNU General Public License and
-// a copy of the GCC Runtime Library Exception along with this program;
-// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
-// <http://www.gnu.org/licenses/>.
-
-/**
- * @file bits/regex_grep_matcher.tcc
- * This is an internal header file, included by other library headers.
- * Do not attempt to use it directly. @headername{regex}
- */
-
-#include <regex>
-
-namespace std _GLIBCXX_VISIBILITY(default)
-{
-namespace __detail
-{
-_GLIBCXX_BEGIN_NAMESPACE_VERSION
-
- // TODO: This is too slow. Try to compile the NFA to a DFA.
- template<bool __match_mode>
- bool _DFSMatcher::
- _M_dfs(_StateIdT __i)
- {
- if (__i == _S_invalid_state_id)
- // This is not that certain. Need deeper investigate.
- return false;
- const auto& __state = (*_M_nfa)[__i];
- bool __ret = false;
- switch (__state._M_opcode)
- {
- case _S_opcode_alternative:
- // Greedy mode by default. For non-greedy mode,
- // swap _M_alt and _M_next.
- // TODO: Add greedy mode option.
- __ret = _M_dfs<__match_mode>(__state._M_alt)
- || _M_dfs<__match_mode>(__state._M_next);
- break;
- case _S_opcode_subexpr_begin:
- __state._M_tagger(_M_str_cur, _M_results);
- __ret = _M_dfs<__match_mode>(__state._M_next);
- break;
- case _S_opcode_subexpr_end:
- __state._M_tagger(_M_str_cur, _M_results);
- __ret = _M_dfs<__match_mode>(__state._M_next);
- _M_results._M_set_matched(__state._M_subexpr, __ret);
- break;
- case _S_opcode_match:
- if (!_M_str_cur._M_at_end() && __state._M_matches(_M_str_cur))
- {
- _M_str_cur._M_next();
- __ret = _M_dfs<__match_mode>(__state._M_next);
- _M_str_cur._M_prev();
- }
- break;
- case _S_opcode_accept:
- if (__match_mode)
- __ret = _M_str_cur._M_at_end();
- else
- __ret = true;
- break;
- default:
- _GLIBCXX_DEBUG_ASSERT(false);
- }
- return __ret;
- }
-
- template<bool __match_mode>
- bool _BFSMatcher::
- _M_main_loop()
- {
- while (!_M_str_cur._M_at_end())
- {
- if (!__match_mode)
- if (_M_includes_some())
- return true;
- _M_move();
- _M_str_cur._M_next();
- _M_e_closure();
- }
- return _M_includes_some();
- }
-
- // The SPFA approach.
- // FIXME: move it to src/c++11 when it's stable, and make it not inlined.
- inline
- void _BFSMatcher::
- _M_e_closure()
- {
- std::queue<_StateIdT> __q;
- std::vector<bool> __in_q(_M_nfa->size(), false);
- for (auto& __it : _M_current)
- {
- __in_q[__it.first] = true;
- __q.push(__it.first);
- }
- while (!__q.empty())
- {
- auto __u = __q.front();
- __q.pop();
- __in_q[__u] = false;
- const auto& __state = (*_M_nfa)[__u];
-
- // Can be implemented using method, but there're too much arguments.
- auto __add_visited_state = [&](_StateIdT __v)
- {
- if (__v == _S_invalid_state_id)
- return;
- if (_M_match_less_than(__u, __v))
- {
- _M_current[__v] = _M_current[__u]->_M_clone();
- // if a state is updated, it's outgoing neighbors should be
- // reconsidered too. Push them to the queue.
- if (!__in_q[__v])
- {
- __in_q[__v] = true;
- __q.push(__v);
- }
- }
- };
-
- switch (__state._M_opcode)
- {
- case _S_opcode_alternative:
- __add_visited_state(__state._M_next);
- __add_visited_state(__state._M_alt);
- break;
- case _S_opcode_subexpr_begin:
- __state._M_tagger(_M_str_cur, *_M_current[__u]);
- __add_visited_state(__state._M_next);
- break;
- case _S_opcode_subexpr_end:
- __state._M_tagger(_M_str_cur, *_M_current[__u]);
- _M_current[__u]->_M_set_matched(__state._M_subexpr, true);
- __add_visited_state(__state._M_next);
- break;
- case _S_opcode_match:
- break;
- case _S_opcode_accept:
- __add_visited_state(__state._M_next);
- break;
- default:
- _GLIBCXX_DEBUG_ASSERT(false);
- }
- }
- }
-
- // FIXME: move it to src/c++11 when it's stable, and make it not inlined.
- inline
- void _BFSMatcher::
- _M_move()
- {
- decltype(_M_current) __next;
- for (auto& __it : _M_current)
- {
- const auto& __state = (*_M_nfa)[__it.first];
- if (__state._M_opcode == _S_opcode_match
- && __state._M_matches(_M_str_cur))
- if (_M_match_less_than(__it.first, __state._M_next)
- && __state._M_next != _S_invalid_state_id)
- __next[__state._M_next] = __it.second->_M_clone();
- }
- _M_current = move(__next);
- }
-
- // FIXME: move it to src/c++11 when it's stable, and make it not inlined.
- inline
- bool _BFSMatcher::
- _M_match_less_than(_StateIdT __u, _StateIdT __v) const
- {
- if (_M_current.count(__u) == 0)
- return false;
- if (_M_current.count(__v) > 0)
- return true;
- // TODO: Greedy and Non-greedy support
- return true;
- }
-
- // FIXME: move it to src/c++11 when it's stable, and make it not inlined.
- inline
- bool _BFSMatcher::
- _M_includes_some() const
- {
- auto& __s = _M_nfa->_M_final_states();
- auto& __t = _M_current;
- if (__s.size() > 0 && __t.size() > 0)
- {
- auto __first = __s.begin();
- auto __second = __t.begin();
- while (__first != __s.end() && __second != __t.end())
- {
- if (*__first < __second->first)
- ++__first;
- else if (__second->first < *__first)
- ++__second;
- else
- {
- _M_results._M_assign(*__second->second);
- return true;
- }
- }
- }
- return false;
- }
-
- // FIXME: move it to src/c++11 when it's stable, and make it not inlined.
- inline
- std::unique_ptr<_Grep_matcher> _Nfa::
- _M_get_matcher(_PatternCursor& __p,
- _Results& __r,
- const _AutomatonPtr& __a,
- regex_constants::match_flag_type __flags)
- {
- if (_M_has_back_ref)
- return unique_ptr<_Grep_matcher>(
- new _DFSMatcher(__p, __r, __a, __flags));
- else
- return unique_ptr<_Grep_matcher>(
- new _BFSMatcher(__p, __r, __a, __flags));
- }
-
-_GLIBCXX_END_NAMESPACE_VERSION
-} // namespace __detail
-} // namespace
+++ /dev/null
-// class template regex -*- C++ -*-
-
-// Copyright (C) 2010-2013 Free Software Foundation, Inc.
-//
-// This file is part of the GNU ISO C++ Library. This library is free
-// software; you can redistribute it and/or modify it under the
-// terms of the GNU General Public License as published by the
-// Free Software Foundation; either version 3, or (at your option)
-// any later version.
-
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU General Public License for more details.
-
-// Under Section 7 of GPL version 3, you are granted additional
-// permissions described in the GCC Runtime Library Exception, version
-// 3.1, as published by the Free Software Foundation.
-
-// You should have received a copy of the GNU General Public License and
-// a copy of the GCC Runtime Library Exception along with this program;
-// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
-// <http://www.gnu.org/licenses/>.
-
-/**
- * @file bits/regex_nfa.h
- * This is an internal header file, included by other library headers.
- * Do not attempt to use it directly. @headername{regex}
- */
-
-namespace std _GLIBCXX_VISIBILITY(default)
-{
-namespace __detail
-{
-_GLIBCXX_BEGIN_NAMESPACE_VERSION
-
- /**
- * @addtogroup regex-detail
- * @{
- */
-
- /// Provides a generic facade for a templated match_results.
- struct _Results
- {
- virtual
- ~_Results()
- { }
- virtual void _M_set_pos(int __i, int __j, const _PatternCursor& __p) = 0;
- virtual void _M_set_matched(int __i, bool __is_matched) = 0;
- virtual std::unique_ptr<_Results> _M_clone() const = 0;
- virtual void _M_assign(const _Results& __rhs) = 0;
- };
-
- class _Grep_matcher;
- class _Automaton;
-
- /// Generic shared pointer to an automaton.
- typedef std::shared_ptr<_Automaton> _AutomatonPtr;
-
- /// Base class for, um, automata. Could be an NFA or a DFA. Your choice.
- class _Automaton
- {
- public:
- typedef unsigned int _SizeT;
-
- public:
- virtual
- ~_Automaton() { }
-
- virtual _SizeT
- _M_sub_count() const = 0;
-
- virtual std::unique_ptr<_Grep_matcher>
- _M_get_matcher(_PatternCursor& __p,
- _Results& __r,
- const _AutomatonPtr& __automaton,
- regex_constants::match_flag_type __flags) = 0;
-
-#ifdef _GLIBCXX_DEBUG
- virtual std::ostream&
- _M_dot(std::ostream& __ostr) const = 0;
-#endif
- };
-
- /// Operation codes that define the type of transitions within the base NFA
- /// that represents the regular expression.
- enum _Opcode
- {
- _S_opcode_unknown = 0,
- _S_opcode_alternative = 1,
- _S_opcode_subexpr_begin = 4,
- _S_opcode_subexpr_end = 5,
- _S_opcode_match = 100,
- _S_opcode_accept = 255
- };
-
- /// Tags current state (for subexpr begin/end).
- typedef std::function<void (const _PatternCursor&, _Results&)> _Tagger;
-
- /// Start state tag.
- template<typename _FwdIterT, typename _TraitsT>
- struct _StartTagger
- {
- explicit
- _StartTagger(int __i)
- : _M_index(__i)
- { }
-
- void
- operator()(const _PatternCursor& __pc, _Results& __r)
- { __r._M_set_pos(_M_index, 0, __pc); }
-
- int _M_index;
- };
-
- /// End state tag.
- template<typename _FwdIterT, typename _TraitsT>
- struct _EndTagger
- {
- explicit
- _EndTagger(int __i)
- : _M_index(__i)
- { }
-
- void
- operator()(const _PatternCursor& __pc, _Results& __r)
- { __r._M_set_pos(_M_index, 1, __pc); }
-
- int _M_index;
- };
-
- // TODO For now we use an all-in-one comparator. In the future there may be
- // optimizations based on regex_traits::translate and regex_transform.
- template<typename _InIterT, typename _TraitsT>
- struct _Comparator
- {
- typedef regex_constants::syntax_option_type _FlagT;
- typedef typename _TraitsT::char_type _CharT;
- typedef std::basic_string<_CharT> _StringT;
-
- _Comparator(_FlagT __flags, const _TraitsT& __traits)
- : _M_flags(__flags), _M_traits(__traits)
- { }
-
- bool
- _M_equ(_CharT __a, _CharT __b) const;
-
- bool
- _M_le(_CharT __a, _CharT __b) const;
-
- _FlagT _M_flags;
- _TraitsT _M_traits;
- };
-
- /// Indicates if current state matches cursor current.
- typedef std::function<bool (const _PatternCursor&)> _Matcher;
-
- /// Matches any character
- inline bool
- _AnyMatcher(const _PatternCursor&)
- { return true; }
-
- /// Matches a single character
- template<typename _InIterT, typename _TraitsT>
- struct _CharMatcher
- : public _Comparator<_InIterT, _TraitsT>
- {
- typedef _Comparator<_InIterT, _TraitsT> _BaseT;
- typedef typename _TraitsT::char_type _CharT;
- typedef regex_constants::syntax_option_type _FlagT;
-
- explicit
- _CharMatcher(_CharT __c, _FlagT __flags, const _TraitsT& __t)
- : _BaseT(__flags, __t), _M_c(__c)
- { }
-
- bool
- operator()(const _PatternCursor& __pc) const
- {
- typedef const _SpecializedCursor<_InIterT>& _CursorT;
- _CursorT __c = static_cast<_CursorT>(__pc);
- return this->_M_equ(__c._M_current(), _M_c);
- }
-
- _CharT _M_c;
- };
-
- /// Matches a character range (bracket expression)
- template<typename _InIterT, typename _TraitsT>
- struct _BracketMatcher
- : public _Comparator<_InIterT, _TraitsT>
- {
- typedef _Comparator<_InIterT, _TraitsT> _BaseT;
- typedef typename _TraitsT::char_class_type _CharClassT;
- typedef regex_constants::syntax_option_type _FlagT;
- typedef typename _TraitsT::char_type _CharT;
- typedef std::basic_string<_CharT> _StringT;
-
- explicit
- _BracketMatcher(bool __is_non_matching,
- _FlagT __flags,
- const _TraitsT& __t)
- : _BaseT(__flags, __t), _M_flags(__flags), _M_traits(__t),
- _M_is_non_matching(__is_non_matching), _M_class_set(0)
- { }
-
- bool
- operator()(const _PatternCursor& __pc) const;
-
- void
- _M_add_char(_CharT __c)
- { _M_char_set.push_back(__c); }
-
- void
- _M_add_collating_element(const _StringT& __s)
- {
- auto __st = _M_traits.lookup_collatename(&*__s.begin(), &*__s.end());
- if (__st.empty())
- __throw_regex_error(regex_constants::error_collate);
- // TODO: digraph
- _M_char_set.push_back(__st[0]);
- }
-
- void
- _M_add_equivalence_class(const _StringT& __s)
- {
- _M_add_character_class(
- _M_traits.transform_primary(&*__s.begin(), &*__s.end()));
- }
-
- void
- _M_add_character_class(const _StringT& __s)
- {
- auto __st = _M_traits.lookup_classname(
- &*__s.begin(), &*__s.end(), (_M_flags & regex_constants::icase));
- if (__st == 0)
- __throw_regex_error(regex_constants::error_ctype);
- _M_class_set |= __st;
- }
-
- void
- _M_make_range(_CharT __l, _CharT __r)
- {
- if (!this->_M_le(__l, __r))
- __throw_regex_error(regex_constants::error_range);
- _M_range_set.push_back(make_pair(__l, __r));
- }
-
- _FlagT _M_flags;
- _TraitsT _M_traits;
- bool _M_is_non_matching;
- std::vector<_CharT> _M_char_set;
- std::vector<pair<_CharT, _CharT>> _M_range_set;
- _CharClassT _M_class_set;
- };
-
- /// Identifies a state in the NFA.
- typedef int _StateIdT;
-
- /// The special case in which a state identifier is not an index.
- static const _StateIdT _S_invalid_state_id = -1;
-
-
- /**
- * @brief struct _State
- *
- * An individual state in an NFA
- *
- * In this case a "state" is an entry in the NFA definition coupled
- * with its outgoing transition(s). All states have a single outgoing
- * transition, except for accepting states (which have no outgoing
- * transitions) and alt states, which have two outgoing transitions.
- */
- struct _State
- {
- typedef int _OpcodeT;
-
- _OpcodeT _M_opcode; // type of outgoing transition
- _StateIdT _M_next; // outgoing transition
- _StateIdT _M_alt; // for _S_opcode_alternative
- unsigned int _M_subexpr; // for _S_opcode_subexpr_*
- _Tagger _M_tagger; // for _S_opcode_subexpr_*
- _Matcher _M_matches; // for _S_opcode_match
-
- explicit _State(_OpcodeT __opcode)
- : _M_opcode(__opcode), _M_next(_S_invalid_state_id)
- { }
-
- _State(const _Matcher& __m)
- : _M_opcode(_S_opcode_match), _M_next(_S_invalid_state_id), _M_matches(__m)
- { }
-
- _State(_OpcodeT __opcode, unsigned int __s, const _Tagger& __t)
- : _M_opcode(__opcode), _M_next(_S_invalid_state_id), _M_subexpr(__s),
- _M_tagger(__t)
- { }
-
- _State(_StateIdT __next, _StateIdT __alt)
- : _M_opcode(_S_opcode_alternative), _M_next(__next), _M_alt(__alt)
- { }
-
-#ifdef _GLIBCXX_DEBUG
- std::ostream&
- _M_print(std::ostream& ostr) const;
-
- // Prints graphviz dot commands for state.
- std::ostream&
- _M_dot(std::ostream& __ostr, _StateIdT __id) const;
-#endif
- };
-
-
- /// The Grep Matcher works on sets of states. Here are sets of states.
- typedef std::set<_StateIdT> _StateSet;
-
- /**
- * @brief struct _Nfa
- *
- * A collection of all states making up an NFA.
- *
- * An NFA is a 4-tuple M = (K, S, s, F), where
- * K is a finite set of states,
- * S is the alphabet of the NFA,
- * s is the initial state,
- * F is a set of final (accepting) states.
- *
- * This NFA class is templated on S, a type that will hold values of the
- * underlying alphabet (without regard to semantics of that alphabet). The
- * other elements of the tuple are generated during construction of the NFA
- * and are available through accessor member functions.
- */
- class _Nfa
- : public _Automaton, public std::vector<_State>
- {
- public:
- typedef _State _StateT;
- typedef unsigned int _SizeT;
- typedef regex_constants::syntax_option_type _FlagT;
-
- _Nfa(_FlagT __f)
- : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0),
- // TODO: BFS by default. Your choice. Need to be set by the compiler.
- _M_has_back_ref(false)
- { }
-
- ~_Nfa()
- { }
-
- _FlagT
- _M_options() const
- { return _M_flags; }
-
- _StateIdT
- _M_start() const
- { return _M_start_state; }
-
- const _StateSet&
- _M_final_states() const
- { return _M_accepting_states; }
-
- _SizeT
- _M_sub_count() const
- { return _M_subexpr_count; }
-
- _StateIdT
- _M_insert_accept()
- {
- this->push_back(_StateT(_S_opcode_accept));
- _M_accepting_states.insert(this->size()-1);
- return this->size()-1;
- }
-
- _StateIdT
- _M_insert_alt(_StateIdT __next, _StateIdT __alt)
- {
- this->push_back(_StateT(__next, __alt));
- return this->size()-1;
- }
-
- _StateIdT
- _M_insert_matcher(_Matcher __m)
- {
- this->push_back(_StateT(__m));
- return this->size()-1;
- }
-
- _StateIdT
- _M_insert_subexpr_begin(const _Tagger& __t)
- {
- this->push_back(_StateT(_S_opcode_subexpr_begin, _M_subexpr_count++,
- __t));
- return this->size()-1;
- }
-
- _StateIdT
- _M_insert_subexpr_end(unsigned int __i, const _Tagger& __t)
- {
- this->push_back(_StateT(_S_opcode_subexpr_end, __i, __t));
- return this->size()-1;
- }
-
- void
- _M_set_back_ref(bool __b)
- { _M_has_back_ref = __b; }
-
- std::unique_ptr<_Grep_matcher>
- _M_get_matcher(_PatternCursor& __p,
- _Results& __r,
- const _AutomatonPtr& __automaton,
- regex_constants::match_flag_type __flags);
-
-#ifdef _GLIBCXX_DEBUG
- std::ostream&
- _M_dot(std::ostream& __ostr) const;
-#endif
-
- private:
- _FlagT _M_flags;
- _StateIdT _M_start_state;
- _StateSet _M_accepting_states;
- _SizeT _M_subexpr_count;
- bool _M_has_back_ref;
- };
-
- /// Describes a sequence of one or more %_State, its current start
- /// and end(s). This structure contains fragments of an NFA during
- /// construction.
- class _StateSeq
- {
- public:
- // Constructs a single-node sequence
- _StateSeq(_Nfa& __ss, _StateIdT __s, _StateIdT __e = _S_invalid_state_id)
- : _M_nfa(__ss), _M_start(__s), _M_end1(__s), _M_end2(__e)
- { }
- // Constructs a split sequence from two other sequencces
- _StateSeq(const _StateSeq& __e1, const _StateSeq& __e2)
- : _M_nfa(__e1._M_nfa),
- _M_start(_M_nfa._M_insert_alt(__e1._M_start, __e2._M_start)),
- _M_end1(__e1._M_end1), _M_end2(__e2._M_end1)
- { }
-
- // Constructs a split sequence from a single sequence
- _StateSeq(const _StateSeq& __e, _StateIdT __id)
- : _M_nfa(__e._M_nfa),
- _M_start(_M_nfa._M_insert_alt(__id, __e._M_start)),
- _M_end1(__id), _M_end2(__e._M_end1)
- { }
-
- // Constructs a copy of a %_StateSeq
- _StateSeq(const _StateSeq& __rhs)
- : _M_nfa(__rhs._M_nfa), _M_start(__rhs._M_start),
- _M_end1(__rhs._M_end1), _M_end2(__rhs._M_end2)
- { }
-
-
- _StateSeq& operator=(const _StateSeq& __rhs);
-
- _StateIdT
- _M_front() const
- { return _M_start; }
-
- // Extends a sequence by one.
- void
- _M_push_back(_StateIdT __id);
-
- // Extends and maybe joins a sequence.
- void
- _M_append(_StateIdT __id);
-
- void
- _M_append(_StateSeq& __rhs);
-
- // Clones an entire sequence.
- _StateIdT
- _M_clone();
-
- private:
- _Nfa& _M_nfa;
- _StateIdT _M_start;
- _StateIdT _M_end1;
- _StateIdT _M_end2;
-
- };
-
- //@} regex-detail
-_GLIBCXX_END_NAMESPACE_VERSION
-} // namespace __detail
-} // namespace std
-
-#include <bits/regex_nfa.tcc>
-
+++ /dev/null
-// class template regex -*- C++ -*-
-
-// Copyright (C) 2010-2013 Free Software Foundation, Inc.
-//
-// This file is part of the GNU ISO C++ Library. This library is free
-// software; you can redistribute it and/or modify it under the
-// terms of the GNU General Public License as published by the
-// Free Software Foundation; either version 3, or (at your option)
-// any later version.
-
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU General Public License for more details.
-
-// Under Section 7 of GPL version 3, you are granted additional
-// permissions described in the GCC Runtime Library Exception, version
-// 3.1, as published by the Free Software Foundation.
-
-// You should have received a copy of the GNU General Public License and
-// a copy of the GCC Runtime Library Exception along with this program;
-// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
-// <http://www.gnu.org/licenses/>.
-
-/**
- * @file bits/regex_nfa.tcc
- * This is an internal header file, included by other library headers.
- * Do not attempt to use it directly. @headername{regex}
- */
-#include <regex>
-
-namespace std _GLIBCXX_VISIBILITY(default)
-{
-namespace __detail
-{
-_GLIBCXX_BEGIN_NAMESPACE_VERSION
-
- template<typename _InIterT, typename _TraitsT>
- bool _BracketMatcher<_InIterT, _TraitsT>::
- operator()(const _PatternCursor& __pc) const
- {
- typedef const _SpecializedCursor<_InIterT>& _CursorT;
- _CursorT __c = static_cast<_CursorT>(__pc);
- _CharT __ch = __c._M_current();
- bool __ret = false;
- for (auto __c : _M_char_set)
- if (this->_M_equ(__c, __ch))
- {
- __ret = true;
- break;
- }
- if (!__ret && _M_traits.isctype(__ch, _M_class_set))
- __ret = true;
- else
- {
- for (auto& __it : _M_range_set)
- if (this->_M_le(__it.first, __ch) && this->_M_le(__ch, __it.second))
- {
- __ret = true;
- break;
- }
- }
- if (_M_is_non_matching)
- __ret = !__ret;
- return __ret;
- }
-
- template<typename _InIterT, typename _TraitsT>
- bool _Comparator<_InIterT, _TraitsT>::
- _M_equ(_CharT __a, _CharT __b) const
- {
- if (_M_flags & regex_constants::icase)
- return _M_traits.translate_nocase(__a)
- == _M_traits.translate_nocase(__b);
- if (_M_flags & regex_constants::collate)
- return _M_traits.translate(__a) == _M_traits.translate(__b);
- return __a == __b;
- }
-
- template<typename _InIterT, typename _TraitsT>
- bool _Comparator<_InIterT, _TraitsT>::
- _M_le(_CharT __a, _CharT __b) const
- {
- _StringT __str1 = _StringT(1,
- _M_flags & regex_constants::icase
- ? _M_traits.translate_nocase(__a)
- : _M_traits.translate(__a));
- _StringT __str2 = _StringT(1,
- _M_flags & regex_constants::icase
- ? _M_traits.translate_nocase(__b)
- : _M_traits.translate(__b));
- return _M_traits.transform(__str1.begin(), __str1.end())
- <= _M_traits.transform(__str2.begin(), __str2.end());
- }
-
-#ifdef _GLIBCXX_DEBUG
-inline std::ostream& _State::
-_M_print(std::ostream& ostr) const
-{
- switch (_M_opcode)
- {
- case _S_opcode_alternative:
- ostr << "alt next=" << _M_next << " alt=" << _M_alt;
- break;
- case _S_opcode_subexpr_begin:
- ostr << "subexpr begin next=" << _M_next << " index=" << _M_subexpr;
- break;
- case _S_opcode_subexpr_end:
- ostr << "subexpr end next=" << _M_next << " index=" << _M_subexpr;
- break;
- case _S_opcode_match:
- ostr << "match next=" << _M_next;
- break;
- case _S_opcode_accept:
- ostr << "accept next=" << _M_next;
- break;
- default:
- ostr << "unknown next=" << _M_next;
- break;
- }
- return ostr;
-}
-
-// Prints graphviz dot commands for state.
-inline std::ostream& _State::
-_M_dot(std::ostream& __ostr, _StateIdT __id) const
-{
- switch (_M_opcode)
- {
- case _S_opcode_alternative:
- __ostr << __id << " [label=\"" << __id << "\\nALT\"];\n"
- << __id << " -> " << _M_next
- << " [label=\"epsilon\", tailport=\"s\"];\n"
- << __id << " -> " << _M_alt
- << " [label=\"epsilon\", tailport=\"n\"];\n";
- break;
- case _S_opcode_subexpr_begin:
- __ostr << __id << " [label=\"" << __id << "\\nSBEGIN "
- << _M_subexpr << "\"];\n"
- << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
- break;
- case _S_opcode_subexpr_end:
- __ostr << __id << " [label=\"" << __id << "\\nSEND "
- << _M_subexpr << "\"];\n"
- << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
- break;
- case _S_opcode_match:
- __ostr << __id << " [label=\"" << __id << "\\nMATCH\"];\n"
- << __id << " -> " << _M_next << " [label=\"<match>\"];\n";
- break;
- case _S_opcode_accept:
- __ostr << __id << " [label=\"" << __id << "\\nACC\"];\n" ;
- break;
- default:
- __ostr << __id << " [label=\"" << __id << "\\nUNK\"];\n"
- << __id << " -> " << _M_next << " [label=\"?\"];\n";
- break;
- }
- return __ostr;
-}
-
-inline std::ostream& _Nfa::
-_M_dot(std::ostream& __ostr) const
-{
- __ostr << "digraph _Nfa {\n"
- << " rankdir=LR;\n";
- for (unsigned int __i = 0; __i < this->size(); ++__i)
- { this->at(__i)._M_dot(__ostr, __i); }
- __ostr << "}\n";
- return __ostr;
-}
-#endif
-
-inline _StateSeq& _StateSeq::
-operator=(const _StateSeq& __rhs)
-{
- _M_start = __rhs._M_start;
- _M_end1 = __rhs._M_end1;
- _M_end2 = __rhs._M_end2;
- return *this;
-}
-
-inline void _StateSeq::
-_M_push_back(_StateIdT __id)
-{
- if (_M_end1 != _S_invalid_state_id)
- _M_nfa[_M_end1]._M_next = __id;
- _M_end1 = __id;
-}
-
-inline void _StateSeq::
-_M_append(_StateIdT __id)
-{
- if (_M_end2 != _S_invalid_state_id)
- {
- if (_M_end2 == _M_end1)
- _M_nfa[_M_end2]._M_alt = __id;
- else
- _M_nfa[_M_end2]._M_next = __id;
- _M_end2 = _S_invalid_state_id;
- }
- if (_M_end1 != _S_invalid_state_id)
- _M_nfa[_M_end1]._M_next = __id;
- _M_end1 = __id;
-}
-
-inline void _StateSeq::
-_M_append(_StateSeq& __rhs)
-{
- if (_M_end2 != _S_invalid_state_id)
- {
- if (_M_end2 == _M_end1)
- _M_nfa[_M_end2]._M_alt = __rhs._M_start;
- else
- _M_nfa[_M_end2]._M_next = __rhs._M_start;
- _M_end2 = _S_invalid_state_id;
- }
- if (__rhs._M_end2 != _S_invalid_state_id)
- _M_end2 = __rhs._M_end2;
- if (_M_end1 != _S_invalid_state_id)
- _M_nfa[_M_end1]._M_next = __rhs._M_start;
- _M_end1 = __rhs._M_end1;
-}
-
-// @todo implement this function.
-inline _StateIdT _StateSeq::
-_M_clone()
-{ return 0; }
-
-_GLIBCXX_END_NAMESPACE_VERSION
-} // namespace __detail
-} // namespace
#include <utility>
#include <vector>
-#include <bits/range_access.h>
#include <bits/regex_constants.h>
#include <bits/regex_error.h>
-#include <bits/regex_cursor.h>
-#include <bits/regex_nfa.h>
+#include <bits/regex_automaton.h>
#include <bits/regex_compiler.h>
-#include <bits/regex_grep_matcher.h>
+#include <bits/regex_executor.h>
#include <bits/regex.h>
#endif // C++11
regex_constants::match_flag_type __flags
= regex_constants::match_default)
{
- __detail::_AutomatonPtr __a = __re._M_get_automaton();
- __detail::_Automaton::_SizeT __sz = __a->_M_sub_count();
- __detail::_SpecializedCursor<_Bi_iter> __cs(__s, __e);
- __detail::_SpecializedResults<_Bi_iter, _Alloc> __r(__sz, __cs, __m);
- VERIFY( dynamic_cast<__detail::_DFSMatcher *>(
- &*__a->_M_get_matcher(__cs, __r, __a, __flags)) != nullptr );
+ VERIFY( (dynamic_cast
+ <__detail::_DFSExecutor<_Bi_iter, _Alloc, _Ch_type, _Rx_traits>*>
+ (&*__detail::__get_executor(__s, __e, __m, __re, __flags))
+ != nullptr) );
}
void