+2013-09-14 Tim Shen <timshen91@gmail.com>
+
+ * include/bits/regex.h (regex_match<>, regex_search<>):
+ Change regex_executor caller. Now use their return value instead
+ of checking __m[0].matched to find out if it's successful.
+ (regex_search<>): Move the search logic to regex_executor.
+ * include/bits/regex_automaton.h: Add some new _Opcode. Refactor
+ _NFA::_M_insert_*.
+ * include/bits/regex_automaton.tcc: Add DEBUG dump for new
+ _Opcode. Refactor _NFA::_M_insert_*.
+ * include/bits/regex_compiler.h (_Compiler<>::_M_get_nfa):
+ Use make_shared instead of construct by hand.
+ * include/bits/regex_compiler.tcc: Implement _Compiler<>::_M_assertion.
+ * include/bits/regex_constants.h: Fix indentation and line breaking.
+ * include/bits/regex_executor.h: Add _ResultsEntry to support
+ greedy/ungreedy mode. Move regex_search logic here.
+ * include/bits/regex_executor.tcc: Implement assertions and
+ greedy/ungreedy matching.
+ * include/bits/regex_scanner.h: Add a new token _S_token_ungreedy.
+ * include/bits/regex_scanner.tcc: Parse a new token _S_token_ungreedy.
+ * testsuite/28_regex/algorithms/regex_search/ecma/assertion.cc: New.
+ * testsuite/28_regex/algorithms/regex_search/ecma/greedy.cc: New.
+ * testsuite/28_regex/algorithms/regex_search/ecma/string_01.cc:
+ Fix comment.
+
2013-09-13 Paolo Carlini <paolo.carlini@oracle.com>
PR libstdc++/58415
template<typename, typename, typename, typename>
friend class __detail::_BFSExecutor;
- template<typename _Bp, typename _Ap, typename _Ch_type, typename _Rx_traits>
+ template<typename _Bp, typename _Ap,
+ typename _Ch_type, typename _Rx_traits>
friend bool
regex_match(_Bp, _Bp, match_results<_Bp, _Ap>&,
const basic_regex<_Ch_type,
_Rx_traits>&,
regex_constants::match_flag_type);
- template<typename _Bp, typename _Ap, typename _Ch_type, typename _Rx_traits>
+ template<typename _Bp, typename _Ap,
+ typename _Ch_type, typename _Rx_traits>
friend bool
regex_search(_Bp, _Bp, match_results<_Bp, _Ap>&,
const basic_regex<_Ch_type,
{
if (__re._M_automaton == nullptr)
return false;
- __detail::__get_executor(__s, __e, __m, __re, __flags)->_M_match();
- if (__m.size() > 0 && __m[0].matched)
+ if (__detail::__get_executor(__s, __e, __m, __re, __flags)->_M_match())
{
for (auto __it : __m)
if (!__it.matched)
{
if (__re._M_automaton == nullptr)
return false;
- auto __cur = __first;
- // Continue when __cur == __last
- do
+ if (__detail::__get_executor(__first, __last, __m, __re, __flags)
+ ->_M_search())
{
- __detail::__get_executor(__cur, __last, __m, __re, __flags)
- ->_M_search_from_first();
- if (__m.size() > 0 && __m[0].matched)
- {
- for (auto __it : __m)
- if (!__it.matched)
- __it.first = __it.second = __last;
- __m.at(__m.size()).first = __first;
- __m.at(__m.size()).second = __m[0].first;
- __m.at(__m.size()+1).first = __m[0].second;
- __m.at(__m.size()+1).second = __last;
- __m.at(__m.size()).matched =
- (__m.prefix().first != __m.prefix().second);
- __m.at(__m.size()+1).matched =
- (__m.suffix().first != __m.suffix().second);
- return true;
- }
+ for (auto __it : __m)
+ if (!__it.matched)
+ __it.first = __it.second = __last;
+ __m.at(__m.size()).first = __first;
+ __m.at(__m.size()).second = __m[0].first;
+ __m.at(__m.size()+1).first = __m[0].second;
+ __m.at(__m.size()+1).second = __last;
+ __m.at(__m.size()).matched =
+ (__m.prefix().first != __m.prefix().second);
+ __m.at(__m.size()+1).matched =
+ (__m.suffix().first != __m.suffix().second);
+ return true;
}
- while (__cur++ != __last);
return false;
}
/// that represents the regular expression.
enum _Opcode
{
- _S_opcode_unknown = 0,
- _S_opcode_alternative = 1,
- _S_opcode_backref = 2,
- _S_opcode_subexpr_begin = 4,
- _S_opcode_subexpr_end = 5,
- _S_opcode_dummy = 6,
- _S_opcode_match = 100,
- _S_opcode_accept = 255
+ _S_opcode_unknown,
+ _S_opcode_alternative,
+ _S_opcode_backref,
+ _S_opcode_line_begin_assertion,
+ _S_opcode_line_end_assertion,
+ _S_opcode_word_boundry,
+ _S_opcode_subexpr_lookahead,
+ _S_opcode_subexpr_begin,
+ _S_opcode_subexpr_end,
+ _S_opcode_dummy,
+ _S_opcode_match,
+ _S_opcode_accept,
};
template<typename _CharT, typename _TraitsT>
_StateIdT _M_next; // outgoing transition
union // Since they are mutually exclusive.
{
- _StateIdT _M_alt; // for _S_opcode_alternative
unsigned int _M_subexpr; // for _S_opcode_subexpr_*
unsigned int _M_backref_index; // for _S_opcode_backref
+ struct
+ {
+ // for _S_opcode_alternative.
+ _StateIdT _M_quant_index;
+ // for _S_opcode_alternative or _S_opcode_subexpr_lookahead
+ _StateIdT _M_alt;
+ // for _S_opcode_word_boundry or _S_opcode_subexpr_lookahead or
+ // quantifiers(ungreedy if set true)
+ bool _M_neg;
+ };
};
- _MatcherT _M_matches; // for _S_opcode_match
+ _MatcherT _M_matches; // for _S_opcode_match
explicit _State(_OpcodeT __opcode)
: _M_opcode(__opcode), _M_next(_S_invalid_state_id)
{ }
- _State(const _MatcherT& __m)
- : _M_opcode(_S_opcode_match), _M_next(_S_invalid_state_id),
- _M_matches(__m)
- { }
-
- _State(_OpcodeT __opcode, unsigned __index)
- : _M_opcode(__opcode), _M_next(_S_invalid_state_id)
- {
- if (__opcode == _S_opcode_subexpr_begin
- || __opcode == _S_opcode_subexpr_end)
- _M_subexpr = __index;
- else if (__opcode == _S_opcode_backref)
- _M_backref_index = __index;
- }
-
- _State(_StateIdT __next, _StateIdT __alt)
- : _M_opcode(_S_opcode_alternative), _M_next(__next), _M_alt(__alt)
- { }
-
#ifdef _GLIBCXX_DEBUG
std::ostream&
_M_print(std::ostream& ostr) const;
_NFA(_FlagT __f)
: _M_flags(__f), _M_start_state(0), _M_subexpr_count(0),
- _M_has_backref(false)
+ _M_has_backref(false), _M_quant_count(0)
{ }
_FlagT
_StateIdT
_M_insert_accept()
{
- this->push_back(_StateT(_S_opcode_accept));
- _M_accepting_states.insert(this->size()-1);
- return this->size()-1;
+ auto __ret = _M_insert_state(_StateT(_S_opcode_accept));
+ _M_accepting_states.insert(__ret);
+ return __ret;
}
_StateIdT
- _M_insert_alt(_StateIdT __next, _StateIdT __alt)
+ _M_insert_alt(_StateIdT __next, _StateIdT __alt, bool __neg)
{
- this->push_back(_StateT(__next, __alt));
- return this->size()-1;
+ _StateT __tmp(_S_opcode_alternative);
+ // It labels every quantifier to make greedy comparison easier in BFS
+ // approach.
+ __tmp._M_quant_index = _M_quant_count++;
+ __tmp._M_next = __next;
+ __tmp._M_alt = __alt;
+ __tmp._M_neg = __neg;
+ return _M_insert_state(__tmp);
}
_StateIdT
_M_insert_matcher(_MatcherT __m)
{
- this->push_back(_StateT(__m));
- return this->size()-1;
+ _StateT __tmp(_S_opcode_match);
+ __tmp._M_matches = __m;
+ return _M_insert_state(__tmp);
}
_StateIdT
{
auto __id = _M_subexpr_count++;
_M_paren_stack.push_back(__id);
- this->push_back(_StateT(_S_opcode_subexpr_begin, __id));
- return this->size()-1;
+ _StateT __tmp(_S_opcode_subexpr_begin);
+ __tmp._M_subexpr = __id;
+ return _M_insert_state(__tmp);
}
_StateIdT
_M_insert_subexpr_end()
{
- this->push_back(_StateT(_S_opcode_subexpr_end, _M_paren_stack.back()));
+ _StateT __tmp(_S_opcode_subexpr_end);
+ __tmp._M_subexpr = _M_paren_stack.back();
_M_paren_stack.pop_back();
- return this->size()-1;
+ return _M_insert_state(__tmp);
}
_StateIdT
_M_insert_backref(unsigned int __index);
_StateIdT
- _M_insert_dummy()
+ _M_insert_line_begin()
+ { return _M_insert_state(_StateT(_S_opcode_line_begin_assertion)); }
+
+ _StateIdT
+ _M_insert_line_end()
+ { return _M_insert_state(_StateT(_S_opcode_line_end_assertion)); }
+
+ _StateIdT
+ _M_insert_word_bound(bool __neg)
{
- this->push_back(_StateT(_S_opcode_dummy));
- return this->size()-1;
+ _StateT __tmp(_S_opcode_word_boundry);
+ __tmp._M_neg = __neg;
+ return _M_insert_state(__tmp);
}
_StateIdT
+ _M_insert_lookahead(_StateIdT __alt, bool __neg)
+ {
+ _StateT __tmp(_S_opcode_subexpr_lookahead);
+ __tmp._M_alt = __alt;
+ __tmp._M_neg = __neg;
+ return _M_insert_state(__tmp);
+ }
+
+ _StateIdT
+ _M_insert_dummy()
+ { return _M_insert_state(_StateT(_S_opcode_dummy)); }
+
+ _StateIdT
_M_insert_state(_StateT __s)
{
this->push_back(__s);
_FlagT _M_flags;
_StateIdT _M_start_state;
_SizeT _M_subexpr_count;
+ _SizeT _M_quant_count;
bool _M_has_backref;
};
<< __id << " -> " << _M_alt
<< " [label=\"epsilon\", tailport=\"n\"];\n";
break;
+ case _S_opcode_backref:
+ __ostr << __id << " [label=\"" << __id << "\\nBACKREF "
+ << _M_subexpr << "\"];\n"
+ << __id << " -> " << _M_next << " [label=\"<match>\"];\n";
+ break;
+ case _S_opcode_line_begin_assertion:
+ __ostr << __id << " [label=\"" << __id << "\\nLINE_BEGIN \"];\n"
+ << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
+ break;
+ case _S_opcode_line_end_assertion:
+ __ostr << __id << " [label=\"" << __id << "\\nLINE_END \"];\n"
+ << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
+ break;
+ case _S_opcode_word_boundry:
+ __ostr << __id << " [label=\"" << __id << "\\nWORD_BOUNDRY "
+ << _M_neg << "\"];\n"
+ << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
+ break;
+ case _S_opcode_subexpr_lookahead:
+ __ostr << __id << " [label=\"" << __id << "\\nLOOK_AHEAD\"];\n"
+ << __id << " -> " << _M_next
+ << " [label=\"epsilon\", tailport=\"s\"];\n"
+ << __id << " -> " << _M_alt
+ << " [label=\"<assert>\", tailport=\"n\"];\n";
+ break;
case _S_opcode_subexpr_begin:
__ostr << __id << " [label=\"" << __id << "\\nSBEGIN "
<< _M_subexpr << "\"];\n"
<< _M_subexpr << "\"];\n"
<< __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
break;
- case _S_opcode_backref:
- __ostr << __id << " [label=\"" << __id << "\\nBACKREF "
- << _M_subexpr << "\"];\n"
- << __id << " -> " << _M_next << " [label=\"<match>\"];\n";
+ case _S_opcode_dummy:
break;
case _S_opcode_match:
__ostr << __id << " [label=\"" << __id << "\\nMATCH\"];\n"
case _S_opcode_accept:
__ostr << __id << " [label=\"" << __id << "\\nACC\"];\n" ;
break;
- case _S_opcode_dummy:
- break;
default:
_GLIBCXX_DEBUG_ASSERT(false);
break;
if (__index == __it)
__throw_regex_error(regex_constants::error_backref);
_M_has_backref = true;
- this->push_back(_StateT(_S_opcode_backref, __index));
- return this->size()-1;
+ _StateT __tmp(_S_opcode_backref);
+ __tmp._M_backref_index = __index;
+ return _M_insert_state(__tmp);
}
template<typename _CharT, typename _TraitsT>
for (auto& __it : *this)
{
while (__it._M_next >= 0 && (*this)[__it._M_next]._M_opcode
- == _S_opcode_dummy)
+ == _S_opcode_dummy)
__it._M_next = (*this)[__it._M_next]._M_next;
if (__it._M_opcode == _S_opcode_alternative)
while (__it._M_alt >= 0 && (*this)[__it._M_alt]._M_opcode
std::shared_ptr<_RegexT>
_M_get_nfa() const
- { return std::shared_ptr<_RegexT>(new _RegexT(_M_nfa)); }
+ { return make_shared<_RegexT>(_M_nfa); }
private:
typedef _Scanner<_FwdIter> _ScannerT;
__alt2._M_append(__end);
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_alt(__alt1._M_start,
- __alt2._M_start),
+ __alt2._M_start, false),
__end));
}
}
return false;
}
- // TODO Implement it.
template<typename _FwdIter, typename _CharT, typename _TraitsT>
bool
_Compiler<_FwdIter, _CharT, _TraitsT>::
_M_assertion()
{
- // temporary place holders.
if (_M_match_token(_ScannerT::_S_token_line_begin))
- _M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy()));
+ _M_stack.push(_StateSeqT(_M_nfa, _M_nfa.
+ _M_insert_line_begin()));
else if (_M_match_token(_ScannerT::_S_token_line_end))
- _M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy()));
+ _M_stack.push(_StateSeqT(_M_nfa, _M_nfa.
+ _M_insert_line_end()));
else if (_M_match_token(_ScannerT::_S_token_word_bound))
- _M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy()));
- else if (_M_match_token(_ScannerT::_S_token_neg_word_bound))
- _M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy()));
+ // _M_value[0] == 'n' means it's negtive, say "not word boundary".
+ _M_stack.push(_StateSeqT(_M_nfa, _M_nfa.
+ _M_insert_word_bound(_M_value[0] == 'n')));
else if (_M_match_token(_ScannerT::_S_token_subexpr_lookahead_begin))
- _M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy()));
- else if (_M_match_token(_ScannerT::_S_token_subexpr_neg_lookahead_begin))
- _M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy()));
+ {
+ auto __neg = _M_value[0] == 'n';
+ this->_M_disjunction();
+ if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
+ __throw_regex_error(regex_constants::error_paren);
+ auto __tmp = _M_pop();
+ __tmp._M_append(_M_nfa._M_insert_accept());
+ _M_stack.push(
+ _StateSeqT(
+ _M_nfa,
+ _M_nfa._M_insert_lookahead(__tmp._M_start, __neg)));
+ }
else
return false;
return true;
_Compiler<_FwdIter, _CharT, _TraitsT>::
_M_quantifier()
{
- if (_M_match_token(_ScannerT::_S_token_closure0))
+ bool __neg = regex_constants::ECMAScript;
+ auto __init = [this, &__neg]()
{
if (_M_stack.empty())
__throw_regex_error(regex_constants::error_badrepeat);
+ __neg = __neg && _M_match_token(_ScannerT::_S_token_opt);
+ };
+ if (_M_match_token(_ScannerT::_S_token_closure0))
+ {
+ __init();
auto __e = _M_pop();
_StateSeqT __r(_M_nfa, _M_nfa._M_insert_alt(_S_invalid_state_id,
- __e._M_start));
+ __e._M_start, __neg));
__e._M_append(__r);
_M_stack.push(__r);
}
else if (_M_match_token(_ScannerT::_S_token_closure1))
{
- if (_M_stack.empty())
- __throw_regex_error(regex_constants::error_badrepeat);
+ __init();
auto __e = _M_pop();
- __e._M_append(_M_nfa._M_insert_alt(_S_invalid_state_id, __e._M_start));
+ __e._M_append(_M_nfa._M_insert_alt(_S_invalid_state_id, __e._M_start,
+ __neg));
_M_stack.push(__e);
}
else if (_M_match_token(_ScannerT::_S_token_opt))
{
- if (_M_stack.empty())
- __throw_regex_error(regex_constants::error_badrepeat);
+ __init();
auto __e = _M_pop();
auto __end = _M_nfa._M_insert_dummy();
_StateSeqT __r(_M_nfa, _M_nfa._M_insert_alt(_S_invalid_state_id,
- __e._M_start));
+ __e._M_start, __neg));
__e._M_append(__end);
__r._M_append(__end);
_M_stack.push(__r);
}
else if (_M_match_token(_ScannerT::_S_token_interval_begin))
{
- if (_M_stack.empty())
- __throw_regex_error(regex_constants::error_badrepeat);
+ __init();
if (!_M_match_token(_ScannerT::_S_token_dup_count))
__throw_regex_error(regex_constants::error_badbrace);
_StateSeqT __r(_M_pop());
if (_M_match_token(_ScannerT::_S_token_comma))
if (_M_match_token(_ScannerT::_S_token_dup_count)) // {3,7}
{
- int __n = _M_cur_int_value(10) - __min_rep;
- if (__n < 0)
- __throw_regex_error(regex_constants::error_badbrace);
- auto __end = _M_nfa._M_insert_dummy();
- for (int __i = 0; __i < __n; ++__i)
- {
+ int __n = _M_cur_int_value(10) - __min_rep;
+ if (__n < 0)
+ __throw_regex_error(regex_constants::error_badbrace);
+ auto __end = _M_nfa._M_insert_dummy();
+ for (int __i = 0; __i < __n; ++__i)
+ {
auto __tmp = __r._M_clone();
- __e._M_append(_StateSeqT(_M_nfa, _M_nfa.
- _M_insert_alt(__tmp._M_start, __end), __tmp._M_end));
- }
+ __e._M_append
+ (_StateSeqT(_M_nfa,
+ _M_nfa._M_insert_alt(__tmp._M_start,
+ __end, __neg),
+ __tmp._M_end));
+ }
__e._M_append(__end);
}
else // {3,}
{
auto __tmp = __r._M_clone();
- _StateSeqT __s(_M_nfa, _M_nfa._M_insert_alt(_S_invalid_state_id,
- __tmp._M_start));
+ _StateSeqT __s(_M_nfa,
+ _M_nfa._M_insert_alt(_S_invalid_state_id,
+ __tmp._M_start, __neg));
__tmp._M_append(__s);
__e._M_append(__s);
}
* %set.
*/
enum syntax_option_type : unsigned int
- {
- /**
- * Specifies that the matching of regular expressions against a character
- * sequence shall be performed without regard to case.
- */
- icase = 1 << _S_icase,
-
- /**
- * Specifies that when a regular expression is matched against a character
- * container sequence, no sub-expression matches are to be stored in the
- * supplied match_results structure.
- */
- nosubs = 1 << _S_nosubs,
-
- /**
- * Specifies that the regular expression engine should pay more attention to
- * the speed with which regular expressions are matched, and less to the
- * speed with which regular expression objects are constructed. Otherwise
- * it has no detectable effect on the program output.
- */
- optimize = 1 << _S_optimize,
-
- /**
- * Specifies that character ranges of the form [a-b] should be locale
- * sensitive.
- */
- collate = 1 << _S_collate,
-
- /**
- * Specifies that the grammar recognized by the regular expression engine is
- * that used by ECMAScript in ECMA-262 [Ecma International, ECMAScript
- * Language Specification, Standard Ecma-262, third edition, 1999], as
- * modified in section [28.13]. This grammar is similar to that defined
- * in the PERL scripting language but extended with elements found in the
- * POSIX regular expression grammar.
- */
- ECMAScript = 1 << _S_ECMAScript,
-
- /**
- * Specifies that the grammar recognized by the regular expression engine is
- * that used by POSIX basic regular expressions in IEEE Std 1003.1-2001,
- * Portable Operating System Interface (POSIX), Base Definitions and
- * Headers, Section 9, Regular Expressions [IEEE, Information Technology --
- * Portable Operating System Interface (POSIX), IEEE Standard 1003.1-2001].
- */
- basic = 1 << _S_basic,
-
- /**
- * Specifies that the grammar recognized by the regular expression engine is
- * that used by POSIX extended regular expressions in IEEE Std 1003.1-2001,
- * Portable Operating System Interface (POSIX), Base Definitions and Headers,
- * Section 9, Regular Expressions.
- */
- extended = 1 << _S_extended,
-
- /**
- * Specifies that the grammar recognized by the regular expression engine is
- * that used by POSIX utility awk in IEEE Std 1003.1-2001. This option is
- * identical to syntax_option_type extended, except that C-style escape
- * sequences are supported. These sequences are:
- * \\\\, \\a, \\b, \\f, \\n, \\r, \\t , \\v, \\&apos,, &apos,,
- * and \\ddd (where ddd is one, two, or three octal digits).
- */
- awk = 1 << _S_awk,
-
- /**
- * Specifies that the grammar recognized by the regular expression engine is
- * that used by POSIX utility grep in IEEE Std 1003.1-2001. This option is
- * identical to syntax_option_type basic, except that newlines are treated
- * as whitespace.
- */
- grep = 1 << _S_grep,
-
- /**
- * Specifies that the grammar recognized by the regular expression engine is
- * that used by POSIX utility grep when given the -E option in
- * IEEE Std 1003.1-2001. This option is identical to syntax_option_type
- * extended, except that newlines are treated as whitespace.
- */
- egrep = 1 << _S_egrep,
- };
+ {
+ /**
+ * Specifies that the matching of regular expressions against a character
+ * sequence shall be performed without regard to case.
+ */
+ icase = 1 << _S_icase,
+
+ /**
+ * Specifies that when a regular expression is matched against a character
+ * container sequence, no sub-expression matches are to be stored in the
+ * supplied match_results structure.
+ */
+ nosubs = 1 << _S_nosubs,
+
+ /**
+ * Specifies that the regular expression engine should pay more attention to
+ * the speed with which regular expressions are matched, and less to the
+ * speed with which regular expression objects are constructed. Otherwise
+ * it has no detectable effect on the program output.
+ */
+ optimize = 1 << _S_optimize,
+
+ /**
+ * Specifies that character ranges of the form [a-b] should be locale
+ * sensitive.
+ */
+ collate = 1 << _S_collate,
+
+ /**
+ * Specifies that the grammar recognized by the regular expression engine is
+ * that used by ECMAScript in ECMA-262 [Ecma International, ECMAScript
+ * Language Specification, Standard Ecma-262, third edition, 1999], as
+ * modified in section [28.13]. This grammar is similar to that defined
+ * in the PERL scripting language but extended with elements found in the
+ * POSIX regular expression grammar.
+ */
+ ECMAScript = 1 << _S_ECMAScript,
+
+ /**
+ * Specifies that the grammar recognized by the regular expression engine is
+ * that used by POSIX basic regular expressions in IEEE Std 1003.1-2001,
+ * Portable Operating System Interface (POSIX), Base Definitions and
+ * Headers, Section 9, Regular Expressions [IEEE, Information Technology --
+ * Portable Operating System Interface (POSIX), IEEE Standard 1003.1-2001].
+ */
+ basic = 1 << _S_basic,
+
+ /**
+ * Specifies that the grammar recognized by the regular expression engine is
+ * that used by POSIX extended regular expressions in IEEE Std 1003.1-2001,
+ * Portable Operating System Interface (POSIX), Base Definitions and
+ * Headers, Section 9, Regular Expressions.
+ */
+ extended = 1 << _S_extended,
+
+ /**
+ * Specifies that the grammar recognized by the regular expression engine is
+ * that used by POSIX utility awk in IEEE Std 1003.1-2001. This option is
+ * identical to syntax_option_type extended, except that C-style escape
+ * sequences are supported. These sequences are:
+ * \\\\, \\a, \\b, \\f, \\n, \\r, \\t , \\v, \\&apos,, &apos,,
+ * and \\ddd (where ddd is one, two, or three octal digits).
+ */
+ awk = 1 << _S_awk,
+
+ /**
+ * Specifies that the grammar recognized by the regular expression engine is
+ * that used by POSIX utility grep in IEEE Std 1003.1-2001. This option is
+ * identical to syntax_option_type basic, except that newlines are treated
+ * as whitespace.
+ */
+ grep = 1 << _S_grep,
+
+ /**
+ * Specifies that the grammar recognized by the regular expression engine is
+ * that used by POSIX utility grep when given the -E option in
+ * IEEE Std 1003.1-2001. This option is identical to syntax_option_type
+ * extended, except that newlines are treated as whitespace.
+ */
+ egrep = 1 << _S_egrep,
+ };
constexpr inline syntax_option_type
operator&(syntax_option_type __a, syntax_option_type __b)
{ }
// Set matched when string exactly match the pattern.
- virtual void
+ virtual bool
_M_match() = 0;
// Set matched when some prefix of the string matches the pattern.
- virtual void
- _M_search_from_first() = 0;
+ virtual bool
+ _M_search() = 0;
protected:
typedef typename _NFA<_CharT, _TraitsT>::_SizeT _SizeT;
- _Executor(_BiIter __begin,
- _BiIter __end,
- _ResultsT& __results,
- _FlagT __flags,
- _SizeT __size)
- : _M_current(__begin), _M_end(__end), _M_results(__results),
- _M_flags(__flags)
+ typedef typename _TraitsT::char_class_type _ClassT;
+
+ _Executor(_BiIter __begin,
+ _BiIter __end,
+ _ResultsT& __results,
+ _FlagT __flags,
+ _SizeT __size,
+ const _TraitsT& __traits)
+ : _M_current(__begin), _M_begin(__begin), _M_end(__end),
+ _M_results(__results), _M_flags(__flags), _M_traits(__traits)
{
__size += 2;
_M_results.resize(__size);
- for (auto __i = 0; __i < __size; __i++)
+ for (_SizeT __i = 0; __i < __size; ++__i)
_M_results[__i].matched = false;
}
- _BiIter _M_current;
- _BiIter _M_end;
- _ResultsVec& _M_results;
- _FlagT _M_flags;
+ bool
+ _M_is_word(_CharT __ch)
+ {
+ static const _CharT __s = 'w';
+ return _M_traits.isctype(__ch,
+ _M_traits.lookup_classname(&__s, &__s+1));
+ }
+
+ _BiIter _M_current;
+ const _BiIter _M_begin;
+ const _BiIter _M_end;
+ _ResultsVec& _M_results;
+ const _TraitsT& _M_traits;
+ _FlagT _M_flags;
};
// A _DFSExecutor perform a DFS on given NFA and input string. At the very
const _RegexT& __nfa,
const _TraitsT& __traits,
_FlagT __flags)
- : _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()),
- _M_traits(__traits), _M_nfa(__nfa), _M_results_ret(this->_M_results)
+ : _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count(),
+ __traits),
+ _M_traits(__traits), _M_nfa(__nfa), _M_cur_results(this->_M_results),
+ _M_start_state(__nfa._M_start())
{ }
- void
+ bool
_M_match()
- { _M_dfs<true>(_M_nfa._M_start()); }
+ {
+ this->_M_current = this->_M_begin;
+ return _M_dfs<true>(_M_start_state);
+ }
- void
+ bool
_M_search_from_first()
- { _M_dfs<false>(_M_nfa._M_start()); }
+ {
+ this->_M_current = this->_M_begin;
+ return _M_dfs<false>(_M_start_state);
+ }
+
+ bool
+ _M_search()
+ {
+ auto __cur = this->_M_begin;
+ do
+ {
+ this->_M_current = __cur;
+ if (_M_dfs<false>(_M_start_state))
+ return true;
+ }
+ // Continue when __cur == _M_end
+ while (__cur++ != this->_M_end);
+ return false;
+ }
private:
template<bool __match_mode>
bool
_M_dfs(_StateIdT __i);
- _ResultsVec _M_results_ret;
+ // To record current solution.
+ _ResultsVec _M_cur_results;
const _TraitsT& _M_traits;
const _RegexT& _M_nfa;
+ _StateIdT _M_start_state;
};
// Like the DFS approach, it try every possible state transition; Unlike DFS,
typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
typedef _NFA<_CharT, _TraitsT> _RegexT;
typedef typename _BaseT::_ResultsT _ResultsT;
- typedef typename _BaseT::_ResultsVec _ResultsVec;
- typedef std::unique_ptr<_ResultsVec> _ResultsPtr;
+ // Here's a solution for greedy/ungreedy mode in BFS approach. We need to
+ // carefully work out how to compare to conflict matching states.
+ //
+ // A matching state is a pair(where, when); `where` is a NFA node; `when`
+ // is a _BiIter, indicating which char is the next to be mathed one. Two
+ // matching states conflict means that they have equivalent `where` and
+ // `when`.
+ //
+ // Now since we need to drop one and keep another, because at most one of
+ // them could be the final optimal solution. This behavior is affected by
+ // greedy policy.
+ //
+ // The definition of `greedy`:
+ // For the sequence of quantifiers in NFA sorted by there start position,
+ // now maintain a vector in a matching state, with equal length to
+ // quantifier seq, recording repeating times of every quantifier. Now to
+ // compare two matching states, we just lexically compare these two
+ // vectors. To win the compare(to survive), one matching state needs to
+ // make its greedy quantifier count larger, and ungreedy quantifiers
+ // count smaller.
+ //
+ // In the implementation, we recorded negtive numbers for greedy
+ // quantifiers and positive numbers of ungreedy ones. Now a simple
+ // operator<() for lexicographical_compare will emit the answer.
+ //
+ // When two vectors equal, it means the `where`, `when` and quantifier
+ // counts are identical, it indicates the same answer, so just return
+ // false.
+ struct _ResultsEntry
+ : private _BaseT::_ResultsVec
+ {
+ public:
+ _ResultsEntry(unsigned int __res_sz, unsigned int __sz)
+ : _BaseT::_ResultsVec(__res_sz), _M_quant_keys(__sz)
+ { }
+
+ sub_match<_BiIter>&
+ operator[](unsigned int __idx)
+ { return this->_BaseT::_ResultsVec::operator[](__idx); }
+
+ bool
+ operator<(const _ResultsEntry& __rhs) const
+ {
+ _GLIBCXX_DEBUG_ASSERT(_M_quant_keys.size()
+ == __rhs._M_quant_keys.size());
+ return lexicographical_compare(_M_quant_keys.begin(),
+ _M_quant_keys.end(),
+ __rhs._M_quant_keys.begin(),
+ __rhs._M_quant_keys.end());
+ }
+
+ void
+ _M_inc(unsigned int __idx, bool __neg)
+ { _M_quant_keys[__idx] += __neg ? 1 : -1; }
+
+ typename _BaseT::_ResultsVec
+ _M_get()
+ { return *this; }
+
+ public:
+ std::vector<int> _M_quant_keys;
+ };
+
+ typedef std::unique_ptr<_ResultsEntry> _ResultsPtr;
typedef regex_constants::match_flag_type _FlagT;
- _BFSExecutor(_BiIter __begin,
- _BiIter __end,
- _ResultsT& __results,
- const _RegexT& __nfa,
- _FlagT __flags)
- : _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()),
- _M_nfa(__nfa)
- {
- if (_M_nfa._M_start() != _S_invalid_state_id)
- _M_covered[_M_nfa._M_start()] =
- _ResultsPtr(new _ResultsVec(this->_M_results));
- _M_e_closure();
- }
+ _BFSExecutor(_BiIter __begin,
+ _BiIter __end,
+ _ResultsT& __results,
+ const _RegexT& __nfa,
+ const _TraitsT& __traits,
+ _FlagT __flags)
+ : _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count(),
+ __traits),
+ _M_nfa(__nfa),
+ _M_cur_results(nullptr),
+ _M_start_state(__nfa._M_start())
+ { }
- void
+ bool
_M_match()
- { _M_main_loop<true>(); }
+ {
+ _M_init(this->_M_begin);
+ return _M_main_loop<true>();
+ }
- void
+ bool
_M_search_from_first()
- { _M_main_loop<false>(); }
+ {
+ _M_init(this->_M_begin);
+ return _M_main_loop<false>();
+ }
+
+ bool
+ _M_search()
+ {
+ auto __cur = this->_M_begin;
+ do
+ {
+ _M_init(__cur);
+ if (_M_main_loop<false>())
+ return true;
+ }
+ // Continue when __cur == _M_end
+ while (__cur++ != this->_M_end);
+ return false;
+ }
private:
+ void
+ _M_init(_BiIter __cur)
+ {
+ _GLIBCXX_DEBUG_ASSERT(_M_start_state != _S_invalid_state_id);
+ this->_M_current = __cur;
+ _M_covered.clear();
+ _M_covered[_M_start_state] =
+ _ResultsPtr(new _ResultsEntry(this->_M_results.size(),
+ _M_nfa._M_quant_count));
+ _M_e_closure();
+ }
+
template<bool __match_mode>
- void
+ bool
_M_main_loop();
void
_M_move();
bool
- _M_match_less_than(const _ResultsVec& __u, const _ResultsVec& __v) const;
-
- bool
- _M_includes_some() const;
+ _M_includes_some();
- std::map<_StateIdT, _ResultsPtr> _M_covered;
- const _RegexT& _M_nfa;
+ std::map<_StateIdT, _ResultsPtr> _M_covered;
+ // To record global optimal solution.
+ _ResultsPtr _M_cur_results;
+ const _RegexT& _M_nfa;
+ _StateIdT _M_start_state;
};
//@} regex-detail
// This is not that certain. Need deeper investigate.
return false;
auto& __current = this->_M_current;
+ auto& __begin = this->_M_begin;
auto& __end = this->_M_end;
- auto& __results = _M_results_ret;
+ auto& __results = _M_cur_results;
const auto& __state = _M_nfa[__i];
bool __ret = false;
switch (__state._M_opcode)
{
case _S_opcode_alternative:
- // Greedy mode by default. For non-greedy mode,
- // swap _M_alt and _M_next.
- // TODO: Add greedy mode option.
- __ret = _M_dfs<__match_mode>(__state._M_alt)
- || _M_dfs<__match_mode>(__state._M_next);
+ // Greedy or not, this is a question ;)
+ if (!__state._M_neg)
+ __ret = _M_dfs<__match_mode>(__state._M_alt)
+ || _M_dfs<__match_mode>(__state._M_next);
+ else
+ __ret = _M_dfs<__match_mode>(__state._M_next)
+ || _M_dfs<__match_mode>(__state._M_alt);
break;
case _S_opcode_subexpr_begin:
// Here's the critical part: if there's nothing changed since last
else
__ret = _M_dfs<__match_mode>(__state._M_next);
break;
+ case _S_opcode_line_begin_assertion:
+ if (__current == __begin)
+ __ret = _M_dfs<__match_mode>(__state._M_next);
+ break;
+ case _S_opcode_line_end_assertion:
+ if (__current == __end)
+ __ret = _M_dfs<__match_mode>(__state._M_next);
+ break;
+ // By definition.
+ case _S_opcode_word_boundry:
+ {
+ bool __ans = false;
+ if (__current == __begin && this->_M_is_word(*__current))
+ __ans = true;
+ else if (__current == __end && this->_M_is_word(*__current))
+ __ans = true;
+ else
+ {
+ auto __pre = __current;
+ --__pre;
+ if (this->_M_is_word(*__current)
+ != this->_M_is_word(*__pre))
+ __ans = true;
+ }
+ if (__ans == !__state._M_neg)
+ __ret = _M_dfs<__match_mode>(__state._M_next);
+ }
+ break;
+ // Here __state._M_alt offers a single start node for a sub-NFA.
+ // We recursivly invoke our algorithm to match the sub-NFA.
+ case _S_opcode_subexpr_lookahead:
+ {
+ _ResultsT __m;
+ // FIXME Here's not necessarily a DFSExecutor. But we need to
+ // refactor the whole NFA to a recursive tree structure first.
+ _DFSExecutor __sub(this->_M_current,
+ this->_M_end,
+ __m,
+ this->_M_nfa,
+ this->_M_traits,
+ this->_M_flags);
+ __sub._M_start_state = __state._M_alt;
+ if (__sub._M_search_from_first() == !__state._M_neg)
+ __ret = _M_dfs<__match_mode>(__state._M_next);
+ }
+ break;
case _S_opcode_match:
if (__current != __end && __state._M_matches(*__current))
{
template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT>
template<bool __match_mode>
- void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
+ bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
_M_main_loop()
{
+ bool __ret = false;
while (this->_M_current != this->_M_end)
{
if (!__match_mode)
- if (_M_includes_some())
- return;
+ // To keep regex_search greedy, no "return true" here.
+ __ret = _M_includes_some() || __ret;
_M_move();
++this->_M_current;
_M_e_closure();
}
- _M_includes_some();
+ __ret = _M_includes_some() || __ret;
+ if (__ret)
+ this->_M_results = _M_cur_results->_M_get();
+ return __ret;
}
template<typename _BiIter, typename _Alloc,
auto& __current = this->_M_current;
std::queue<_StateIdT> __q;
std::vector<bool> __in_q(_M_nfa.size(), false);
+ auto& __begin = this->_M_begin;
+ auto& __end = this->_M_end;
+
for (auto& __it : _M_covered)
{
__in_q[__it.first] = true;
__in_q[__u] = false;
const auto& __state = _M_nfa[__u];
- // Can be implemented using method, but there're too much arguments.
- // I would use macro function before C++11, but lambda is a better
- // choice, since hopefully compiler can inline it.
+ // Can be implemented using method, but there will be too many
+ // arguments. I would use macro function before C++11, but lambda is
+ // a better choice, since hopefully compiler can inline it.
auto __add_visited_state = [&](_StateIdT __v)
{
if (__v == _S_invalid_state_id)
return;
if (_M_covered.count(__u) != 0
&& (_M_covered.count(__v) == 0
- || _M_match_less_than(*_M_covered[__u], *_M_covered[__v])))
+ || *_M_covered[__u] < *_M_covered[__v]))
{
- _M_covered[__v] = _ResultsPtr(new _ResultsVec(*_M_covered[__u]));
+ _M_covered[__v] =
+ _ResultsPtr(new _ResultsEntry(*_M_covered[__u]));
// if a state is updated, it's outgoing neighbors should be
// reconsidered too. Push them to the queue.
if (!__in_q[__v])
}
};
+ // Identical to DFS's switch part.
switch (__state._M_opcode)
{
+ // Needs to maintain quantifier count vector here. A quantifier
+ // must be concerned with a alt node.
case _S_opcode_alternative:
- __add_visited_state(__state._M_next);
- __add_visited_state(__state._M_alt);
+ {
+ __add_visited_state(__state._M_next);
+ auto __back =
+ _M_covered[__u]->_M_quant_keys[__state._M_quant_index];
+ _M_covered[__u]->_M_inc(__state._M_quant_index,
+ __state._M_neg);
+ __add_visited_state(__state._M_alt);
+ _M_covered[__u]->_M_quant_keys[__state._M_quant_index]
+ = __back;
+ }
break;
case _S_opcode_subexpr_begin:
{
- auto& __cu = *_M_covered[__u];
- auto __back = __cu[__state._M_subexpr].first;
- __cu[__state._M_subexpr].first = __current;
- __add_visited_state(__state._M_next);
- __cu[__state._M_subexpr].first = __back;
+ auto& __sub = (*_M_covered[__u])[__state._M_subexpr];
+ if (!__sub.matched || __sub.first != __current)
+ {
+ auto __back = __sub.first;
+ __sub.first = __current;
+ __add_visited_state(__state._M_next);
+ __sub.first = __back;
+ }
}
break;
case _S_opcode_subexpr_end:
__cu[__state._M_subexpr] = __back;
}
break;
+ case _S_opcode_line_begin_assertion:
+ if (__current == __begin)
+ __add_visited_state(__state._M_next);
+ break;
+ case _S_opcode_line_end_assertion:
+ if (__current == __end)
+ __add_visited_state(__state._M_next);
+ break;
+ case _S_opcode_word_boundry:
+ {
+ bool __ans = false;
+ if (__current == __begin && this->_M_is_word(*__current))
+ __ans = true;
+ else if (__current == __end && this->_M_is_word(*__current))
+ __ans = true;
+ else
+ {
+ auto __pre = __current;
+ --__pre;
+ if (this->_M_is_word(*__current)
+ != this->_M_is_word(*__pre))
+ __ans = true;
+ }
+ if (__ans == !__state._M_neg)
+ __add_visited_state(__state._M_next);
+ }
+ break;
+ case _S_opcode_subexpr_lookahead:
+ {
+ _ResultsT __m;
+ // Same comment as in DFS.
+ _BFSExecutor __sub(this->_M_current,
+ this->_M_end,
+ __m,
+ this->_M_nfa,
+ this->_M_traits,
+ this->_M_flags);
+ __sub._M_start_state = __state._M_alt;
+ if (__sub._M_search_from_first() == !__state._M_neg)
+ __add_visited_state(__state._M_next);
+ }
+ break;
case _S_opcode_match:
break;
case _S_opcode_accept:
- __add_visited_state(__state._M_next);
break;
default:
_GLIBCXX_DEBUG_ASSERT(false);
&& __state._M_matches(*this->_M_current))
if (__state._M_next != _S_invalid_state_id)
if (__next.count(__state._M_next) == 0
- || _M_match_less_than(*__it.second, *__next[__state._M_next]))
+ || *__it.second < *__next[__state._M_next])
__next[__state._M_next] = move(__it.second);
}
_M_covered = move(__next);
template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT>
bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
- _M_match_less_than(const _ResultsVec& __u, const _ResultsVec& __v) const
- {
- // TODO: Greedy and Non-greedy support
- _GLIBCXX_DEBUG_ASSERT(__u.size() == __v.size());
- auto __size = __u.size();
- for (auto __i = 0; __i < __size; __i++)
- {
- auto __uit = __u[__i], __vit = __v[__i];
- if (__uit.matched && !__vit.matched)
- return true;
- if (!__uit.matched && __vit.matched)
- return false;
- if (__uit.matched && __vit.matched)
- {
- // GREEDY
- if (__uit.first != __vit.first)
- return __uit.first < __vit.first;
- if (__uit.second != __vit.second)
- return __uit.second > __vit.second;
- }
- }
- return false;
- }
-
- template<typename _BiIter, typename _Alloc,
- typename _CharT, typename _TraitsT>
- bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
- _M_includes_some() const
+ _M_includes_some()
{
auto& __s = _M_nfa._M_final_states();
auto& __t = _M_covered;
+ bool __succ = false;
if (__s.size() > 0 && __t.size() > 0)
{
auto __first = __s.begin();
{
if (*__first < __second->first)
++__first;
- else if (__second->first < *__first)
+ else if (*__first > __second->first)
++__second;
else
{
- this->_M_results = *__second->second;
- return true;
+ if (_M_cur_results == nullptr
+ || *__second->second < *_M_cur_results)
+ _M_cur_results =
+ _ResultsPtr(new _ResultsEntry(*__second->second));
+ __succ = true;
+ ++__first;
+ ++__second;
}
}
}
- return false;
+ return __succ;
}
template<typename _BiIter, typename _Alloc,
if (__p->_M_has_backref)
return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, *__p,
__re._M_traits, __flags));
- return _ExecutorPtr(new _BFSExecutorT(__b, __e, __m, *__p, __flags));
+ return _ExecutorPtr(new _BFSExecutorT(__b, __e, __m, *__p,
+ __re._M_traits, __flags));
}
_GLIBCXX_END_NAMESPACE_VERSION
_S_token_subexpr_begin,
_S_token_subexpr_no_group_begin,
_S_token_subexpr_lookahead_begin,
- _S_token_subexpr_neg_lookahead_begin,
_S_token_subexpr_end,
_S_token_bracket_begin,
_S_token_bracket_neg_begin,
_S_token_or,
_S_token_closure0,
_S_token_closure1,
+ _S_token_ungreedy,
_S_token_line_begin,
_S_token_line_end,
_S_token_word_bound,
- _S_token_neg_word_bound,
_S_token_comma,
_S_token_dup_count,
_S_token_eof,
{
++_M_current;
_M_token = _S_token_subexpr_lookahead_begin;
+ _M_value.assign(1, 'p');
}
else if (*_M_current == '!')
{
++_M_current;
- _M_token = _S_token_subexpr_neg_lookahead_begin;
+ _M_token = _S_token_subexpr_lookahead_begin;
+ _M_value.assign(1, 'n');
}
else
__throw_regex_error(regex_constants::error_paren);
_M_value.assign(1, _M_escape_map.at(__c));
}
else if (__c == 'b')
- _M_token = _S_token_word_bound;
+ {
+ _M_token = _S_token_word_bound;
+ _M_value.assign(1, 'p');
+ }
else if (__c == 'B')
- _M_token = _S_token_neg_word_bound;
+ {
+ _M_token = _S_token_word_bound;
+ _M_value.assign(1, 'n');
+ }
// N3376 28.13
else if (__c == 'd'
|| __c == 'D'
case _S_token_subexpr_lookahead_begin:
ostr << "lookahead subexpr begin\n";
break;
- case _S_token_subexpr_neg_lookahead_begin:
- ostr << "neg lookahead subexpr begin\n";
- break;
case _S_token_subexpr_end:
ostr << "subexpr end\n";
break;
--- /dev/null
+// { dg-options "-std=gnu++11" }
+// { dg-do run { xfail *-*-* } }
+
+//
+// 2013-09-14 Tim Shen <timshen91@gmail.com>
+//
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.11.3 regex_search
+// Tests ECMAScript assertion.
+
+#include <regex>
+#include <testsuite_hooks.h>
+
+using namespace std;
+
+void
+test01()
+{
+ bool test __attribute__((unused)) = true;
+
+ VERIFY(!regex_search("2123456", regex("^1234")));
+ VERIFY(regex_search("123456", regex("^1234")));
+ VERIFY(regex_search("123456", regex("(5|^)1234")));
+ VERIFY(regex_search("5123456", regex("(5|^)1234")));
+ VERIFY(!regex_search("1234562", regex("3456$")));
+ VERIFY(regex_search("123456", regex("3456$")));
+ VERIFY(!regex_search("123456", regex("(?=1234)56")));
+ VERIFY(regex_search("123456", regex("(?=1234)123456")));
+ VERIFY(regex_search("123456", regex("(?!1234)56")));
+ VERIFY(!regex_search("123456", regex("(?!1234)123456")));
+
+ VERIFY(regex_search("a-", regex("a\\b-")));
+ VERIFY(!regex_search("ab", regex("a\\bb")));
+ VERIFY(!regex_search("a-", regex("a\\B-")));
+ VERIFY(regex_search("ab", regex("a\\Bb")));
+
+ string s("This is a regular expression");
+ string sol[] =
+ {
+ "This",
+ "is",
+ "a",
+ "regular",
+ "expression",
+ };
+
+ regex re("\\b\\w*\\b");
+ int i = 0;
+ for (auto it = sregex_iterator(s.begin(), s.end(), re);
+ it != sregex_iterator() && i < 5;
+ ++it)
+ {
+ string s((*it)[0].first, (*it)[0].second);
+ VERIFY(s == sol[i++]);
+ }
+ VERIFY(i == 5);
+}
+
+int
+main()
+{
+ test01();
+ return 0;
+}
--- /dev/null
+// { dg-options "-std=gnu++11" }
+
+//
+// 2013-09-14 Tim Shen <timshen91@gmail.com>
+//
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.11.3 regex_search
+// Tests ECMAScript greedy and ungreedy quantifiers.
+
+#include <regex>
+#include <testsuite_hooks.h>
+
+using namespace std;
+
+void
+test01()
+{
+ bool test __attribute__((unused)) = true;
+
+ cmatch m;
+#define TEST(i, s) VERIFY(m[i].matched && string(m[i].first, m[i].second) == s)
+ VERIFY(regex_search("aaaa", m, regex("a*")));
+ TEST(0, "aaaa");
+ VERIFY(regex_search("aaaa", m, regex("a*?")));
+ TEST(0, "");
+ VERIFY(regex_search("aaaa", m, regex("a+")));
+ TEST(0, "aaaa");
+ VERIFY(regex_search("aaaa", m, regex("a+?")));
+ TEST(0, "a");
+ VERIFY(regex_search("a", m, regex("a?")));
+ TEST(0, "a");
+ VERIFY(regex_search("a", m, regex("a??")));
+ TEST(0, "");
+ VERIFY(regex_search("", m, regex("a??")));
+ TEST(0, "");
+ VERIFY(regex_search("aaaa", m, regex("(a+)(a+)")));
+ TEST(1, "aaa");
+ TEST(2, "a");
+ VERIFY(regex_search("aaaa", m, regex("(a+?)(a+)")));
+ TEST(1, "a");
+ TEST(2, "aaa");
+ VERIFY(regex_search("aaaa", m, regex("(a+?)(a+)")));
+ TEST(1, "a");
+ TEST(2, "aaa");
+ VERIFY(regex_search("aaaa", m, regex("(a+?)(a+?)")));
+ TEST(1, "a");
+ TEST(2, "a");
+}
+
+int
+main()
+{
+ test01();
+ return 0;
+}
// <http://www.gnu.org/licenses/>.
// 28.11.3 regex_search
-// Tests BRE against a std::string target.
+// Tests ECMAScript against a std::string target.
#include <regex>
#include <testsuite_hooks.h>