From bf6319b96ade5c4e855b16250250ca0865856640 Mon Sep 17 00:00:00 2001 From: Jonathan Wakely Date: Mon, 14 Feb 2011 23:35:22 +0000 Subject: [PATCH] regex.h (sub_match::sub_match): Add. 2011-02-14 Jonathan Wakely * include/bits/regex.h (sub_match::sub_match): Add. (match_results::ready): Add. (match_results::empty): Adjust. (match_results::length): Add missing dereference. (match_results::operator[],prefix,suffix): Add debug mode checks. (match_results::cend): Re-use end(). (match_results::format): Adjust signatures. (operator==(match_results,match_results)): Implement. * include/bits/regex_compiler.h (_Scanner_base): Use constexpr. * include/bits/regex_constants.h (syntax_option_type): Likewise. * include/bits/regex_grep_matcher.h: Fix comment typo. (_Specialized_results::_Specialized_results): Simplify. * include/bits/regex_cursor.h: Fix comment typo. * include/bits/regex_nfa.h: Likewise. * testsuite/28_regex/basic_regex/ctors/basic/string_range_01_02_03.cc: Fix error code, remove xfail. * testsuite/28_regex/basic_regex/ctors/extended/ string_range_01_02_03.cc: Likewise. From-SVN: r170158 --- libstdc++-v3/ChangeLog | 21 ++++ libstdc++-v3/include/bits/regex.h | 109 ++++++++++++++++----- libstdc++-v3/include/bits/regex_compiler.h | 11 +-- libstdc++-v3/include/bits/regex_constants.h | 48 ++++----- libstdc++-v3/include/bits/regex_cursor.h | 4 +- libstdc++-v3/include/bits/regex_grep_matcher.h | 21 ++-- libstdc++-v3/include/bits/regex_nfa.h | 2 +- .../ctors/basic/string_range_01_02_03.cc | 7 +- .../ctors/extended/string_range_01_02_03.cc | 7 +- 9 files changed, 152 insertions(+), 78 deletions(-) diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index cb03341..e43018d 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,5 +1,26 @@ 2011-02-14 Jonathan Wakely + * include/bits/regex.h (sub_match::sub_match): Add. + (match_results::ready): Add. + (match_results::empty): Adjust. + (match_results::length): Add missing dereference. + (match_results::operator[],prefix,suffix): Add debug mode checks. + (match_results::cend): Re-use end(). + (match_results::format): Adjust signatures. + (operator==(match_results,match_results)): Implement. + * include/bits/regex_compiler.h (_Scanner_base): Use constexpr. + * include/bits/regex_constants.h (syntax_option_type): Likewise. + * include/bits/regex_grep_matcher.h: Fix comment typo. + (_Specialized_results::_Specialized_results): Simplify. + * include/bits/regex_cursor.h: Fix comment typo. + * include/bits/regex_nfa.h: Likewise. + * testsuite/28_regex/basic_regex/ctors/basic/string_range_01_02_03.cc: + Fix error code, remove xfail. + * testsuite/28_regex/basic_regex/ctors/extended/ + string_range_01_02_03.cc: Likewise. + +2011-02-14 Jonathan Wakely + * include/bits/regex_compiler.h: Remove unnecessary bind() calls. * include/bits/regex_nfa.h: Remove unnecessary base classes. diff --git a/libstdc++-v3/include/bits/regex.h b/libstdc++-v3/include/bits/regex.h index f21530f..627bdc0 100644 --- a/libstdc++-v3/include/bits/regex.h +++ b/libstdc++-v3/include/bits/regex.h @@ -1,6 +1,6 @@ // class template regex -*- C++ -*- -// Copyright (C) 2010 Free Software Foundation, Inc. +// Copyright (C) 2010, 2011 Free Software Foundation, Inc. // // This file is part of the GNU ISO C++ Library. This library is free // software; you can redistribute it and/or modify it under the @@ -765,6 +765,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION public: bool matched; + constexpr sub_match() : matched() { } + /** * Gets the length of the matching sequence. */ @@ -1521,6 +1523,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION //@} + // 28.10.2, state: + /** + * @brief Indicates if the %match_results is ready. + * @retval true The object has a fully-established result state. + * @retval false The object is not ready. + */ + bool ready() const { return !_Base_type::empty(); } + /** * @name 10.2 Size */ @@ -1553,7 +1563,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION */ bool empty() const - { return _Base_type::empty(); } + { return size() == 0; } //@} @@ -1565,17 +1575,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION /** * @brief Gets the length of the indicated submatch. * @param sub indicates the submatch. + * @pre ready() == true * * This function returns the length of the indicated submatch, or the * length of the entire match if @p sub is zero (the default). */ difference_type length(size_type __sub = 0) const - { return this[__sub].length(); } + { return (*this)[__sub].length(); } /** * @brief Gets the offset of the beginning of the indicated submatch. * @param sub indicates the submatch. + * @pre ready() == true * * This function returns the offset from the beginning of the target * sequence to the beginning of the submatch, unless the value of @p sub @@ -1595,6 +1607,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION /** * @brief Gets the match or submatch converted to a string type. * @param sub indicates the submatch. + * @pre ready() == true * * This function gets the submatch (or match, if @p sub is zero) extracted * from the target range and converted to the associated string type. @@ -1606,6 +1619,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION /** * @brief Gets a %sub_match reference for the match or submatch. * @param sub indicates the submatch. + * @pre ready() == true * * This function gets a reference to the indicated submatch, or the entire * match if @p sub is zero. @@ -1616,6 +1630,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION const_reference operator[](size_type __sub) const { + _GLIBCXX_DEBUG_ASSERT( ready() ); return __sub < size() ? _Base_type::operator[](__sub) : __unmatched_sub<_Bi_iter>(); @@ -1623,6 +1638,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION /** * @brief Gets a %sub_match representing the match prefix. + * @pre ready() == true * * This function gets a reference to a %sub_match object representing the * part of the target range between the start of the target range and the @@ -1631,6 +1647,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION const_reference prefix() const { + _GLIBCXX_DEBUG_ASSERT( ready() ); return !empty() ? _Base_type::operator[](_Base_type::size() - 2) : __unmatched_sub<_Bi_iter>(); @@ -1638,6 +1655,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION /** * @brief Gets a %sub_match representing the match suffix. + * @pre ready() == true * * This function gets a reference to a %sub_match object representing the * part of the target range between the end of the match and the end of @@ -1646,8 +1664,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION const_reference suffix() const { - return !empty() - ? _Base_type::operator[](_Base_type::size() - 1) + _GLIBCXX_DEBUG_ASSERT( ready() ); + return !empty() + ? _Base_type::operator[](_Base_type::size() - 1) : __unmatched_sub<_Bi_iter>(); } @@ -1670,52 +1689,79 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION */ const_iterator end() const - { - return !empty() - ? _Base_type::end() - 2 - : _Base_type::end(); - } + { return !empty() ? _Base_type::end() - 2 : _Base_type::end(); } /** * @brief Gets an iterator to one-past-the-end of the collection. */ const_iterator cend() const - { - return !empty() - ? _Base_type::cend() - 2 - : _Base_type::cend(); - } + { return end(); } //@} /** * @name 10.4 Formatting * - * These functions perform formatted substitution of the matched character - * sequences into their target. The format specifiers and escape sequences - * accepted by these functions are determined by their @p flags parameter - * as documented above. + * These functions perform formatted substitution of the matched + * character sequences into their target. The format specifiers and + * escape sequences accepted by these functions are determined by + * their @p flags parameter as documented above. */ //@{ /** + * @pre ready() == true * @todo Implement this function. */ template _Out_iter - format(_Out_iter __out, const string_type& __fmt, + format(_Out_iter __out, const char_type* __fmt_first, + const char_type* __fmt_last, regex_constants::match_flag_type __flags = regex_constants::format_default) const { return __out; } /** - * @todo Implement this function. + * @pre ready() == true + */ + template + _Out_iter + format(_Out_iter __out, const basic_string& __fmt, + regex_constants::match_flag_type __flags + = regex_constants::format_default) const + { + return format(__out, __fmt.data(), __fmt.data() + __fmt.size(), + __flags); + } + + /** + * @pre ready() == true + */ + template + basic_string + format(const basic_string& __fmt, + regex_constants::match_flag_type __flags + = regex_constants::format_default) const + { + basic_string __result; + format(std::back_inserter(__result), __fmt, __flags); + return __result; + } + + /** + * @pre ready() == true */ string_type - format(const string_type& __fmt, + format(const char_type* __fmt, regex_constants::match_flag_type __flags - = regex_constants::format_default) const; + = regex_constants::format_default) const + { + string_type __result; + format(std::back_inserter(__result), __fmt + __builtin_strlen(__fmt), + __flags); + return __result; + } //@} @@ -1762,12 +1808,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * @brief Compares two match_results for equality. * @returns true if the two objects refer to the same match, * false otherwise. - * @todo Implement this function. */ template inline bool operator==(const match_results<_Bi_iter, _Allocator>& __m1, - const match_results<_Bi_iter, _Allocator>& __m2); + const match_results<_Bi_iter, _Allocator>& __m2) + { + if (__m1.ready() != __m2.ready()) + return false; + if (!__m1.ready()) // both are not ready + return true; + if (__m1.empty() != __m2.empty()) + return false; + if (__m1.empty()) // both are empty + return true; + return __m1.prefix() == __m2.prefix() + && __m1.size() == __m2.size() + && std::equal(__m1.begin(), __m1.end(), __m2.begin()) + && __m1.suffix() == __m2.suffix(); + } /** * @brief Compares two match_results for inequality. diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h index eda4e35..b979c8d 100644 --- a/libstdc++-v3/include/bits/regex_compiler.h +++ b/libstdc++-v3/include/bits/regex_compiler.h @@ -36,12 +36,11 @@ namespace __regex { struct _Scanner_base { - // FIXME: replace these constanst with constexpr typedef unsigned int _StateT; - static const _StateT _S_state_at_start = 1 << 0; - static const _StateT _S_state_in_brace = 1 << 2; - static const _StateT _S_state_in_bracket = 1 << 3; + static constexpr _StateT _S_state_at_start = 1 << 0; + static constexpr _StateT _S_state_in_brace = 1 << 2; + static constexpr _StateT _S_state_in_bracket = 1 << 3; }; // @@ -51,8 +50,8 @@ namespace __regex // range passed to its constructor as a sequence of parse tokens passed to // the regular expression compiler. The sequence of tokens provided depends // on the flag settings passed to the constructor: different regular - // expression gramars will interpret the same input pattern in syntactically - // different ways. + // expression grammars will interpret the same input pattern in + // syntactically different ways. // template class _Scanner: public _Scanner_base diff --git a/libstdc++-v3/include/bits/regex_constants.h b/libstdc++-v3/include/bits/regex_constants.h index 1cdd93c..bddef3a 100644 --- a/libstdc++-v3/include/bits/regex_constants.h +++ b/libstdc++-v3/include/bits/regex_constants.h @@ -76,14 +76,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * Specifies that the matching of regular expressions against a character * sequence shall be performed without regard to case. */ - static const syntax_option_type icase = 1 << _S_icase; + static constexpr syntax_option_type icase = 1 << _S_icase; /** * Specifies that when a regular expression is matched against a character * container sequence, no sub-expression matches are to be stored in the * supplied match_results structure. */ - static const syntax_option_type nosubs = 1 << _S_nosubs; + static constexpr syntax_option_type nosubs = 1 << _S_nosubs; /** * Specifies that the regular expression engine should pay more attention to @@ -91,13 +91,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * speed with which regular expression objects are constructed. Otherwise * it has no detectable effect on the program output. */ - static const syntax_option_type optimize = 1 << _S_optimize; + static constexpr syntax_option_type optimize = 1 << _S_optimize; /** * Specifies that character ranges of the form [a-b] should be locale * sensitive. */ - static const syntax_option_type collate = 1 << _S_collate; + static constexpr syntax_option_type collate = 1 << _S_collate; /** * Specifies that the grammar recognized by the regular expression engine is @@ -107,7 +107,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * in the PERL scripting language but extended with elements found in the * POSIX regular expression grammar. */ - static const syntax_option_type ECMAScript = 1 << _S_ECMAScript; + static constexpr syntax_option_type ECMAScript = 1 << _S_ECMAScript; /** * Specifies that the grammar recognized by the regular expression engine is @@ -116,7 +116,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * Headers, Section 9, Regular Expressions [IEEE, Information Technology -- * Portable Operating System Interface (POSIX), IEEE Standard 1003.1-2001]. */ - static const syntax_option_type basic = 1 << _S_basic; + static constexpr syntax_option_type basic = 1 << _S_basic; /** * Specifies that the grammar recognized by the regular expression engine is @@ -124,7 +124,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * Portable Operating System Interface (POSIX), Base Definitions and Headers, * Section 9, Regular Expressions. */ - static const syntax_option_type extended = 1 << _S_extended; + static constexpr syntax_option_type extended = 1 << _S_extended; /** * Specifies that the grammar recognized by the regular expression engine is @@ -134,7 +134,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * \\\\, \\a, \\b, \\f, \\n, \\r, \\t , \\v, \\', ', * and \\ddd (where ddd is one, two, or three octal digits). */ - static const syntax_option_type awk = 1 << _S_awk; + static constexpr syntax_option_type awk = 1 << _S_awk; /** * Specifies that the grammar recognized by the regular expression engine is @@ -142,7 +142,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * identical to syntax_option_type basic, except that newlines are treated * as whitespace. */ - static const syntax_option_type grep = 1 << _S_grep; + static constexpr syntax_option_type grep = 1 << _S_grep; /** * Specifies that the grammar recognized by the regular expression engine is @@ -150,7 +150,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * IEEE Std 1003.1-2001. This option is identical to syntax_option_type * extended, except that newlines are treated as whitespace. */ - static const syntax_option_type egrep = 1 << _S_egrep; + static constexpr syntax_option_type egrep = 1 << _S_egrep; //@} @@ -193,56 +193,56 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION /** * The default matching rules. */ - static const match_flag_type match_default = 0; + static constexpr match_flag_type match_default = 0; /** * The first character in the sequence [first, last) is treated as though it * is not at the beginning of a line, so the character (^) in the regular * expression shall not match [first, first). */ - static const match_flag_type match_not_bol = 1 << _S_not_bol; + static constexpr match_flag_type match_not_bol = 1 << _S_not_bol; /** * The last character in the sequence [first, last) is treated as though it * is not at the end of a line, so the character ($) in the regular * expression shall not match [last, last). */ - static const match_flag_type match_not_eol = 1 << _S_not_eol; + static constexpr match_flag_type match_not_eol = 1 << _S_not_eol; /** * The expression \\b is not matched against the sub-sequence * [first,first). */ - static const match_flag_type match_not_bow = 1 << _S_not_bow; + static constexpr match_flag_type match_not_bow = 1 << _S_not_bow; /** * The expression \\b should not be matched against the sub-sequence * [last,last). */ - static const match_flag_type match_not_eow = 1 << _S_not_eow; + static constexpr match_flag_type match_not_eow = 1 << _S_not_eow; /** * If more than one match is possible then any match is an acceptable * result. */ - static const match_flag_type match_any = 1 << _S_any; + static constexpr match_flag_type match_any = 1 << _S_any; /** * The expression does not match an empty sequence. */ - static const match_flag_type match_not_null = 1 << _S_not_null; + static constexpr match_flag_type match_not_null = 1 << _S_not_null; /** * The expression only matches a sub-sequence that begins at first . */ - static const match_flag_type match_continuous = 1 << _S_continuous; + static constexpr match_flag_type match_continuous = 1 << _S_continuous; /** * --first is a valid iterator position. When this flag is set then the * flags match_not_bol and match_not_bow are ignored by the regular - * expression algorithms 7.11 and iterators 7.12. + * expression algorithms 28.11 and iterators 28.12. */ - static const match_flag_type match_prev_avail = 1 << _S_prev_avail; + static constexpr match_flag_type match_prev_avail = 1 << _S_prev_avail; /** * When a regular expression match is to be replaced by a new string, the @@ -270,7 +270,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * undefined, use the empty string instead. If * nn > match_results::size(), the result is implementation-defined. */ - static const match_flag_type format_default = 0; + static constexpr match_flag_type format_default = 0; /** * When a regular expression match is to be replaced by a new string, the @@ -278,20 +278,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * in IEEE Std 1003.1- 2001 [IEEE, Information Technology -- Portable * Operating System Interface (POSIX), IEEE Standard 1003.1-2001]. */ - static const match_flag_type format_sed = 1 << _S_sed; + static constexpr match_flag_type format_sed = 1 << _S_sed; /** * During a search and replace operation, sections of the character * container sequence being searched that do not match the regular * expression shall not be copied to the output string. */ - static const match_flag_type format_no_copy = 1 << _S_no_copy; + static constexpr match_flag_type format_no_copy = 1 << _S_no_copy; /** * When specified during a search and replace operation, only the first * occurrence of the regular expression shall be replaced. */ - static const match_flag_type format_first_only = 1 << _S_first_only; + static constexpr match_flag_type format_first_only = 1 << _S_first_only; //@} diff --git a/libstdc++-v3/include/bits/regex_cursor.h b/libstdc++-v3/include/bits/regex_cursor.h index d9d5556..c53f759 100644 --- a/libstdc++-v3/include/bits/regex_cursor.h +++ b/libstdc++-v3/include/bits/regex_cursor.h @@ -1,6 +1,6 @@ // class template regex -*- C++ -*- -// Copyright (C) 2010 Free Software Foundation, Inc. +// Copyright (C) 2010, 2011 Free Software Foundation, Inc. // // This file is part of the GNU ISO C++ Library. This library is free // software; you can redistribute it and/or modify it under the @@ -82,7 +82,7 @@ namespace __regex _FwdIterT _M_e; }; - // Helper funxtion to create a cursor specialized for an iterator class. + // Helper function to create a cursor specialized for an iterator class. template inline _SpecializedCursor<_FwdIterT> __cursor(const _FwdIterT& __b, const _FwdIterT __e) diff --git a/libstdc++-v3/include/bits/regex_grep_matcher.h b/libstdc++-v3/include/bits/regex_grep_matcher.h index 04d06e5..202a36e 100644 --- a/libstdc++-v3/include/bits/regex_grep_matcher.h +++ b/libstdc++-v3/include/bits/regex_grep_matcher.h @@ -1,6 +1,6 @@ // class template regex -*- C++ -*- -// Copyright (C) 2010 Free Software Foundation, Inc. +// Copyright (C) 2010, 2011 Free Software Foundation, Inc. // // This file is part of the GNU ISO C++ Library. This library is free // software; you can redistribute it and/or modify it under the @@ -40,7 +40,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION namespace __regex { - // A _Results facade specialized for wrapping a templated sub_match. + // A _Results facade specialized for wrapping a templated match_results. template class _SpecializedResults : public _Results @@ -68,17 +68,14 @@ namespace __regex match_results<_FwdIterT, _Alloc>& __m) : _M_results(__m) { - typedef typename match_results<_FwdIterT, _Alloc>::size_type size_type; _M_results.clear(); - std::sub_match<_FwdIterT> __sm; - __sm.matched = false; - size_type __result_count = __size + 2; - for (size_type __i = 0; __i < __result_count; ++__i) - _M_results.push_back(__sm); - _M_results.at(__size+0).first = __cursor._M_begin(); - _M_results.at(__size+0).second = __cursor._M_begin(); - _M_results.at(__size+1).first = __cursor._M_end(); - _M_results.at(__size+1).second = __cursor._M_end(); + _M_results.reserve(__size + 2); + _M_results.resize(__size); + typename match_results<_FwdIterT, _Alloc>::value_type __sm; + __sm.first = __sm.second = __cursor._M_begin(); + _M_results.push_back(__sm); + __sm.first = __sm.second = __cursor._M_end(); + _M_results.push_back(__sm); } template diff --git a/libstdc++-v3/include/bits/regex_nfa.h b/libstdc++-v3/include/bits/regex_nfa.h index 8aed9b4..17982eb 100644 --- a/libstdc++-v3/include/bits/regex_nfa.h +++ b/libstdc++-v3/include/bits/regex_nfa.h @@ -55,7 +55,7 @@ namespace __regex #endif }; - // Generic shred pointer to an automaton. + // Generic shared pointer to an automaton. typedef std::shared_ptr<_Automaton> _AutomatonPtr; // Operation codes that define the type of transitions within the base NFA diff --git a/libstdc++-v3/testsuite/28_regex/basic_regex/ctors/basic/string_range_01_02_03.cc b/libstdc++-v3/testsuite/28_regex/basic_regex/ctors/basic/string_range_01_02_03.cc index 471c89c..70ae2fa 100644 --- a/libstdc++-v3/testsuite/28_regex/basic_regex/ctors/basic/string_range_01_02_03.cc +++ b/libstdc++-v3/testsuite/28_regex/basic_regex/ctors/basic/string_range_01_02_03.cc @@ -1,10 +1,9 @@ -// { dg-options "-std=c++0x" } -// { dg-do run { xfail *-*-* } } +// { dg-options "-std=gnu++0x" } // // 2010-06-16 Stephen M. Webb // -// Copyright (C) 2010 Free Software Foundation, Inc. +// Copyright (C) 2010, 2011 Free Software Foundation, Inc. // // This file is part of the GNU ISO C++ Library. This library is free // software; you can redistribute it and/or modify it under the @@ -38,7 +37,7 @@ test01() } catch (std::regex_error& ex) { - VERIFY( ex.code() == std::regex_constants::error_badbrace ); + VERIFY( ex.code() == std::regex_constants::error_brace ); } } diff --git a/libstdc++-v3/testsuite/28_regex/basic_regex/ctors/extended/string_range_01_02_03.cc b/libstdc++-v3/testsuite/28_regex/basic_regex/ctors/extended/string_range_01_02_03.cc index 036321f..a6dddba 100644 --- a/libstdc++-v3/testsuite/28_regex/basic_regex/ctors/extended/string_range_01_02_03.cc +++ b/libstdc++-v3/testsuite/28_regex/basic_regex/ctors/extended/string_range_01_02_03.cc @@ -1,10 +1,9 @@ -// { dg-options "-std=c++0x" } -// { dg-do run { xfail *-*-* } } +// { dg-options "-std=gnu++0x" } // // 2010-06-16 Stephen M. Webb // -// Copyright (C) 2010 Free Software Foundation, Inc. +// Copyright (C) 2010, 2011 Free Software Foundation, Inc. // // This file is part of the GNU ISO C++ Library. This library is free // software; you can redistribute it and/or modify it under the @@ -38,7 +37,7 @@ test01() } catch (std::regex_error& ex) { - VERIFY( ex.code() == std::regex_constants::error_badbrace ); + VERIFY( ex.code() == std::regex_constants::error_brace ); } } -- 2.7.4