# Add regexp engine sources. ICU is used by default.
if (${USE_RE2} STREQUAL "ON")
+ # Add a flag to select the right regexp factory implementation used by
+ # regexp_factory.h and regexp_adapter_test.cc.
+ add_definitions (-DUSE_RE2)
list (APPEND SOURCES "src/phonenumbers/regexp_adapter_re2.cc")
else ()
list (APPEND SOURCES "src/phonenumbers/regexp_adapter_icu.cc")
#include "phonenumbers/phonenumber.pb.h"
#include "phonenumbers/regexp_adapter.h"
#include "phonenumbers/regexp_cache.h"
+#include "phonenumbers/regexp_factory.h"
#include "phonenumbers/region_code.h"
#include "phonenumbers/stl_util.h"
#include "phonenumbers/stringutil.h"
namespace {
scoped_ptr<Logger> logger_;
+scoped_ptr<const AbstractRegExpFactory> regexp_factory;
scoped_ptr<RegExpCache> regexp_cache;
// These objects are created in the function InitializeStaticMapsAndSets.
int size = it->leading_digits_pattern_size();
if (size > 0) {
const scoped_ptr<RegExpInput> number_copy(
- RegExpInput::Create(number_for_leading_digits_match));
+ regexp_factory->CreateInput(number_for_leading_digits_match));
// We always use the last leading_digits_pattern, as it is the most
// detailed.
if (!regexp_cache->GetRegExp(it->leading_digits_pattern(size - 1))
void InitializeStaticMapsAndSets() {
// Create global objects.
- regexp_cache.reset(new RegExpCache(128));
+ regexp_factory.reset(new RegExpFactory());
+ regexp_cache.reset(new RegExpCache(*regexp_factory.get(), 128));
all_plus_number_grouping_symbols.reset(new map<char32, char>);
alpha_mappings.reset(new map<char32, char>);
alpha_phone_mappings.reset(new map<char32, char>);
number->assign(normalized_number);
}
-// Strips the IDD from the start of the number if present. Helper function used
-// by MaybeStripInternationalPrefixAndNormalize.
-bool ParsePrefixAsIdd(const RegExp& idd_pattern, string* number) {
- DCHECK(number);
- const scoped_ptr<RegExpInput> number_copy(RegExpInput::Create(*number));
- // First attempt to strip the idd_pattern at the start, if present. We make a
- // copy so that we can revert to the original string if necessary.
- if (idd_pattern.Consume(number_copy.get())) {
- // Only strip this if the first digit after the match is not a 0, since
- // country calling codes cannot begin with 0.
- string extracted_digit;
- if (capturing_digit_pattern->PartialMatch(number_copy->ToString(),
- &extracted_digit)) {
- PhoneNumberUtil::NormalizeDigitsOnly(&extracted_digit);
- if (extracted_digit == "0") {
- return false;
- }
- }
- number->assign(number_copy->ToString());
- return true;
- }
- return false;
-}
-
PhoneNumberUtil::ValidationResult TestNumberLengthAgainstPattern(
const RegExp& number_pattern, const string& number) {
string extracted_number;
#endif
void PhoneNumberUtil::CreateRegularExpressions() const {
- unique_international_prefix.reset(RegExp::Create(
+ unique_international_prefix.reset(regexp_factory->CreateRegExp(
/* "[\\d]+(?:[~⁓∼~][\\d]+)?" */
"[\\d]+(?:[~\xE2\x81\x93\xE2\x88\xBC\xEF\xBD\x9E][\\d]+)?"));
// The first_group_capturing_pattern was originally set to $1 but there are
// pattern (e.g. Argentina) so the $1 group does not match correctly.
// Therefore, we use \d, so that the first group actually used in the pattern
// will be matched.
- first_group_capturing_pattern.reset(RegExp::Create("(\\$\\d)"));
- carrier_code_pattern.reset(RegExp::Create("\\$CC"));
- digits_pattern.reset(RegExp::Create(StrCat("[", kDigits, "]*")));
- capturing_digit_pattern.reset(RegExp::Create(StrCat("([", kDigits, "])")));
- capturing_ascii_digits_pattern.reset(RegExp::Create("(\\d+)"));
+ first_group_capturing_pattern.reset(regexp_factory->CreateRegExp("(\\$\\d)"));
+ carrier_code_pattern.reset(regexp_factory->CreateRegExp("\\$CC"));
+ digits_pattern.reset(
+ regexp_factory->CreateRegExp(StrCat("[", kDigits, "]*")));
+ capturing_digit_pattern.reset(
+ regexp_factory->CreateRegExp(StrCat("([", kDigits, "])")));
+ capturing_ascii_digits_pattern.reset(regexp_factory->CreateRegExp("(\\d+)"));
valid_start_char.reset(new string(StrCat("[", kPlusChars, kDigits, "]")));
- valid_start_char_pattern.reset(RegExp::Create(*valid_start_char));
- capture_up_to_second_number_start_pattern.reset(RegExp::Create(
+ valid_start_char_pattern.reset(
+ regexp_factory->CreateRegExp(*valid_start_char));
+ capture_up_to_second_number_start_pattern.reset(regexp_factory->CreateRegExp(
kCaptureUpToSecondNumberStart));
- unwanted_end_char_pattern.reset(RegExp::Create(kUnwantedEndChar));
- separator_pattern.reset(RegExp::Create(StrCat("[", kValidPunctuation, "]+")));
+ unwanted_end_char_pattern.reset(
+ regexp_factory->CreateRegExp(kUnwantedEndChar));
+ separator_pattern.reset(
+ regexp_factory->CreateRegExp(StrCat("[", kValidPunctuation, "]+")));
valid_phone_number.reset(new string(
StrCat("[", kPlusChars, "]*(?:[", kValidPunctuation, "]*[", kDigits,
"]){3,}[", kValidAlpha, kValidPunctuation, kDigits, "]*")));
"[:\\.\xEF\xBC\x8E]?[ \xC2\xA0\\t,-]*", capturing_extn_digits,
"#?|[- ]+([", kDigits, "]{1,5})#")));
- extn_pattern.reset(RegExp::Create(
+ extn_pattern.reset(regexp_factory->CreateRegExp(
StrCat("(?i)(?:", *known_extn_patterns, ")$")));
- valid_phone_number_pattern.reset(RegExp::Create(
+ valid_phone_number_pattern.reset(regexp_factory->CreateRegExp(
StrCat("(?i)", *valid_phone_number, "(?:", *known_extn_patterns, ")?")));
- valid_alpha_phone_pattern.reset(RegExp::Create(
+ valid_alpha_phone_pattern.reset(regexp_factory->CreateRegExp(
StrCat("(?i)(?:.*?[", kValidAlpha, "]){3}")));
- plus_chars_pattern.reset(RegExp::Create(StrCat("[", kPlusChars, "]+")));
+ plus_chars_pattern.reset(
+ regexp_factory->CreateRegExp(StrCat("[", kPlusChars, "]+")));
}
const string& PhoneNumberUtil::GetExtnPatterns() const {
const PhoneMetadata* metadata = GetMetadataForRegion(*it);
if (metadata->has_leading_digits()) {
const scoped_ptr<RegExpInput> number(
- RegExpInput::Create(national_number));
+ regexp_factory->CreateInput(national_number));
if (regexp_cache->GetRegExp(metadata->leading_digits()).Consume(
number.get())) {
*region_code = *it;
const string& number_to_parse,
const string& default_region) const {
if (!IsValidRegionCode(default_region) && !number_to_parse.empty()) {
- const scoped_ptr<RegExpInput> number(RegExpInput::Create(number_to_parse));
+ const scoped_ptr<RegExpInput> number(
+ regexp_factory->CreateInput(number_to_parse));
if (!plus_chars_pattern->Consume(number.get())) {
return false;
}
string formatted_number;
Format(copied_proto, INTERNATIONAL, &formatted_number);
const scoped_ptr<RegExpInput> i18n_number(
- RegExpInput::Create(formatted_number));
+ regexp_factory->CreateInput(formatted_number));
string digit_group;
string ndc;
string third_group;
return ndc.size();
}
-// static
-void PhoneNumberUtil::NormalizeDigitsOnly(string* number) {
+void PhoneNumberUtil::NormalizeDigitsOnly(string* number) const {
DCHECK(number);
- static const scoped_ptr<const RegExp> non_digits_pattern(RegExp::Create(
- StrCat("[^", kDigits, "]")));
+ const RegExp& non_digits_pattern = regexp_cache->GetRegExp(
+ StrCat("[^", kDigits, "]"));
// Delete everything that isn't valid digits.
- non_digits_pattern->GlobalReplace(number, "");
+ non_digits_pattern.GlobalReplace(number, "");
// Normalize all decimal digits to ASCII digits.
number->assign(NormalizeUTF8::NormalizeDecimalDigits(*number));
}
return valid_phone_number_pattern->FullMatch(number);
}
+// Strips the IDD from the start of the number if present. Helper function used
+// by MaybeStripInternationalPrefixAndNormalize.
+bool PhoneNumberUtil::ParsePrefixAsIdd(const RegExp& idd_pattern,
+ string* number) const {
+ DCHECK(number);
+ const scoped_ptr<RegExpInput> number_copy(
+ regexp_factory->CreateInput(*number));
+ // First attempt to strip the idd_pattern at the start, if present. We make a
+ // copy so that we can revert to the original string if necessary.
+ if (idd_pattern.Consume(number_copy.get())) {
+ // Only strip this if the first digit after the match is not a 0, since
+ // country calling codes cannot begin with 0.
+ string extracted_digit;
+ if (capturing_digit_pattern->PartialMatch(number_copy->ToString(),
+ &extracted_digit)) {
+ NormalizeDigitsOnly(&extracted_digit);
+ if (extracted_digit == "0") {
+ return false;
+ }
+ }
+ number->assign(number_copy->ToString());
+ return true;
+ }
+ return false;
+}
+
// Strips any international prefix (such as +, 00, 011) present in the number
// provided, normalizes the resulting number, and indicates if an international
// prefix was present.
return PhoneNumber::FROM_DEFAULT_COUNTRY;
}
const scoped_ptr<RegExpInput> number_string_piece(
- RegExpInput::Create(*number));
+ regexp_factory->CreateInput(*number));
if (plus_chars_pattern->Consume(number_string_piece.get())) {
number->assign(number_string_piece->ToString());
// Can now normalize the rest of the number since we've consumed the "+"
}
// We use two copies here since Consume modifies the phone number, and if the
// first if-clause fails the number will already be changed.
- const scoped_ptr<RegExpInput> number_copy(RegExpInput::Create(*number));
+ const scoped_ptr<RegExpInput> number_copy(
+ regexp_factory->CreateInput(*number));
const scoped_ptr<RegExpInput> number_copy_without_transform(
- RegExpInput::Create(*number));
+ regexp_factory->CreateInput(*number));
string number_string_copy(*number);
string captured_part_of_prefix;
const RegExp& national_number_rule = regexp_cache->GetRegExp(
string possible_extension_three;
string number_copy(*number);
const scoped_ptr<RegExpInput> number_copy_as_regexp_input(
- RegExpInput::Create(number_copy));
+ regexp_factory->CreateInput(number_copy));
if (extn_pattern->Consume(number_copy_as_regexp_input.get(),
false,
&possible_extension_one,
class PhoneMetadata;
class PhoneMetadataCollection;
class PhoneNumber;
+class RegExp;
// NOTE: A lot of methods in this class require Region Code strings. These must
// be provided using ISO 3166-1 two-letter country-code format. The list of the
// Normalizes a string of characters representing a phone number. This
// converts wide-ascii and arabic-indic numerals to European numerals, and
// strips punctuation and alpha characters.
- static void NormalizeDigitsOnly(string* number);
+ void NormalizeDigitsOnly(string* number) const;
// Gets the national significant number of a phone number. Note a national
// significant number doesn't contain a national prefix or any formatting.
const list<string>& region_codes,
string* region_code) const;
+ // Strips the IDD from the start of the number if present. Helper function
+ // used by MaybeStripInternationalPrefixAndNormalize.
+ bool ParsePrefixAsIdd(const RegExp& idd_pattern, string* number) const;
+
void Normalize(string* number) const;
PhoneNumber::CountryCodeSource MaybeStripInternationalPrefixAndNormalize(
const string& possible_idd_prefix,
// Author: George Yakovlev
// Philippe Liard
//
-// Regexp adapter to allow a pluggable regexp engine. It has been introduced
+// RegExp adapter to allow a pluggable regexp engine. It has been introduced
// during the integration of the open-source version of this library into
// Chromium to be able to use the ICU Regex engine instead of RE2, which is not
// officially supported on Windows.
public:
virtual ~RegExpInput() {}
- // Creates a new instance of the default RegExpInput implementation. The
- // deletion of the returned instance is under the responsibility of the
- // caller.
- static RegExpInput* Create(const string& utf8_input);
-
// Converts to a C++ string.
virtual string ToString() const = 0;
};
public:
virtual ~RegExp() {}
- // Creates a new instance of the default RegExp implementation. The deletion
- // of the returned instance is under the responsibility of the caller.
- static RegExp* Create(const string& utf8_regexp);
-
// Matches string to regular expression, returns true if expression was
// matched, false otherwise, advances position in the match.
// input_string - string to be searched.
}
};
+// Abstract factory class that lets its subclasses instantiate the classes
+// implementing RegExp and RegExpInput.
+class AbstractRegExpFactory {
+ public:
+ virtual ~AbstractRegExpFactory() {}
+
+ // Creates a new instance of RegExpInput. The deletion of the returned
+ // instance is under the responsibility of the caller.
+ virtual RegExpInput* CreateInput(const string& utf8_input) const = 0;
+
+ // Creates a new instance of RegExp. The deletion of the returned instance is
+ // under the responsibility of the caller.
+ virtual RegExp* CreateRegExp(const string& utf8_regexp) const = 0;
+};
+
} // namespace phonenumbers
} // namespace i18n
// Author: George Yakovlev
// Philippe Liard
-#include "phonenumbers/regexp_adapter.h"
+#include "phonenumbers/regexp_adapter_icu.h"
#include <string>
DISALLOW_COPY_AND_ASSIGN(IcuRegExp);
};
-RegExpInput* RegExpInput::Create(const string& utf8_input) {
+RegExpInput* ICURegExpFactory::CreateInput(const string& utf8_input) const {
return new IcuRegExpInput(utf8_input);
}
-RegExp* RegExp::Create(const string& utf8_regexp) {
+RegExp* ICURegExpFactory::CreateRegExp(const string& utf8_regexp) const {
return new IcuRegExp(utf8_regexp);
}
--- /dev/null
+// Copyright (C) 2011 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Author: Philippe Liard
+
+#ifndef I18N_PHONENUMBERS_REGEXP_ADAPTER_ICU_H_
+#define I18N_PHONENUMBERS_REGEXP_ADAPTER_ICU_H_
+
+#include "phonenumbers/regexp_adapter.h"
+
+namespace i18n {
+namespace phonenumbers {
+
+// ICU regexp factory that lets the user instantiate the underlying
+// implementation of RegExp and RegExpInput classes based on the ICU regexp
+// engine.
+class ICURegExpFactory : public AbstractRegExpFactory {
+ public:
+ virtual ~ICURegExpFactory() {}
+
+ virtual RegExpInput* CreateInput(const string& utf8_input) const;
+ virtual RegExp* CreateRegExp(const string& utf8_regexp) const;
+};
+
+} // namespace phonenumbers
+} // namespace i18n
+
+#endif // I18N_PHONENUMBERS_REGEXP_ADAPTER_ICU_H_
// Author: George Yakovlev
// Philippe Liard
-#include "phonenumbers/regexp_adapter.h"
+#include "phonenumbers/regexp_adapter_re2.h"
#include <cstddef>
#include <string>
RE2 utf8_regexp_;
};
-// Implementation of the adapter static factory methods.
-// RE2 RegExp engine is the default implementation.
-RegExpInput* RegExpInput::Create(const string& utf8_input) {
+RegExpInput* RE2RegExpFactory::CreateInput(const string& utf8_input) const {
return new RE2RegExpInput(utf8_input);
}
-RegExp* RegExp::Create(const string& utf8_regexp) {
+RegExp* RE2RegExpFactory::CreateRegExp(const string& utf8_regexp) const {
return new RE2RegExp(utf8_regexp);
}
--- /dev/null
+// Copyright (C) 2011 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Author: Philippe Liard
+
+#ifndef I18N_PHONENUMBERS_REGEXP_ADAPTER_RE2_H_
+#define I18N_PHONENUMBERS_REGEXP_ADAPTER_RE2_H_
+
+#include "phonenumbers/regexp_adapter.h"
+
+namespace i18n {
+namespace phonenumbers {
+
+// RE2 regexp factory that lets the user instantiate the underlying
+// implementation of RegExp and RegExpInput classes based on RE2.
+class RE2RegExpFactory : public AbstractRegExpFactory {
+ public:
+ virtual ~RE2RegExpFactory() {}
+
+ virtual RegExpInput* CreateInput(const string& utf8_input) const;
+ virtual RegExp* CreateRegExp(const string& utf8_regexp) const;
+};
+
+} // namespace phonenumbers
+} // namespace i18n
+
+#endif // I18N_PHONENUMBERS_REGEXP_ADAPTER_RE2_H_
#include "phonenumbers/regexp_adapter.h"
#include <string>
+#include <vector>
#include <gtest/gtest.h>
+#include "base/basictypes.h"
#include "base/scoped_ptr.h"
+#include "stl_util.h"
+#include "stringutil.h"
+
+#ifdef USE_RE2
+#include "phonenumbers/regexp_adapter_re2.h"
+#else
+#include "phonenumbers/regexp_adapter_icu.h"
+#endif // USE_RE2
namespace i18n {
namespace phonenumbers {
using std::string;
+using std::vector;
+
+// Structure that contains the attributes used to test an implementation of the
+// regexp adapter.
+struct RegExpTestContext {
+ explicit RegExpTestContext(const string& name,
+ const AbstractRegExpFactory* factory)
+ : name(name),
+ factory(factory),
+ digits(factory->CreateRegExp("\\d+")),
+ parentheses_digits(factory->CreateRegExp("\\((\\d+)\\)")),
+ single_digit(factory->CreateRegExp("\\d")),
+ two_digit_groups(factory->CreateRegExp("(\\d+)-(\\d+)")) {}
+
+ const string name;
+ const scoped_ptr<const AbstractRegExpFactory> factory;
+ const scoped_ptr<const RegExp> digits;
+ const scoped_ptr<const RegExp> parentheses_digits;
+ const scoped_ptr<const RegExp> single_digit;
+ const scoped_ptr<const RegExp> two_digit_groups;
+};
class RegExpAdapterTest : public testing::Test {
protected:
- RegExpAdapterTest()
- : digits_(RegExp::Create("\\d+")),
- parentheses_digits_(RegExp::Create("\\((\\d+)\\)")),
- single_digit_(RegExp::Create("\\d")),
- two_digit_groups_(RegExp::Create("(\\d+)-(\\d+)")) {}
-
- const scoped_ptr<const RegExp> digits_;
- const scoped_ptr<const RegExp> parentheses_digits_;
- const scoped_ptr<const RegExp> single_digit_;
- const scoped_ptr<const RegExp> two_digit_groups_;
+ RegExpAdapterTest() {
+#ifdef USE_RE2
+ contexts_.push_back(
+ new RegExpTestContext("RE2", new RE2RegExpFactory()));
+#else
+ contexts_.push_back(
+ new RegExpTestContext("ICU Regex", new ICURegExpFactory()));
+#endif // USE_RE2
+ }
+
+ ~RegExpAdapterTest() {
+ STLDeleteElements(&contexts_);
+ }
+
+ static string ErrorMessage(const RegExpTestContext& context) {
+ return StrCat("Test failed with ", context.name, " implementation.");
+ }
+
+ typedef vector<const RegExpTestContext*>::const_iterator TestContextIterator;
+ vector<const RegExpTestContext*> contexts_;
};
TEST_F(RegExpAdapterTest, TestConsumeNoMatch) {
- const scoped_ptr<RegExpInput> input(RegExpInput::Create("+1-123-456-789"));
-
- // When 'true' is passed to Consume(), the match occurs from the beginning of
- // the input.
- ASSERT_FALSE(digits_->Consume(input.get(), true, NULL, NULL, NULL));
- ASSERT_EQ("+1-123-456-789", input->ToString());
-
- string res1;
- ASSERT_FALSE(parentheses_digits_->Consume(
- input.get(), true, &res1, NULL, NULL));
- ASSERT_EQ("+1-123-456-789", input->ToString());
- ASSERT_EQ("", res1);
+ for (vector<const RegExpTestContext*>::const_iterator it = contexts_.begin();
+ it != contexts_.end();
+ ++it) {
+ const RegExpTestContext& context = **it;
+ const scoped_ptr<RegExpInput> input(
+ context.factory->CreateInput("+1-123-456-789"));
+
+ // When 'true' is passed to Consume(), the match occurs from the beginning
+ // of the input.
+ ASSERT_FALSE(context.digits->Consume(input.get(), true, NULL, NULL, NULL))
+ << ErrorMessage(context);
+ ASSERT_EQ("+1-123-456-789", input->ToString()) << ErrorMessage(context);
+
+ string res1;
+ ASSERT_FALSE(context.parentheses_digits->Consume(
+ input.get(), true, &res1, NULL, NULL)) << ErrorMessage(context);
+ ASSERT_EQ("+1-123-456-789", input->ToString()) << ErrorMessage(context);
+ ASSERT_EQ("", res1) << ErrorMessage(context);
+ }
}
TEST_F(RegExpAdapterTest, TestConsumeWithNull) {
- const scoped_ptr<RegExpInput> input(RegExpInput::Create("+123"));
- const scoped_ptr<const RegExp> plus_sign(RegExp::Create("(\\+)"));
-
- ASSERT_TRUE(plus_sign->Consume(input.get(), true, NULL, NULL, NULL));
- ASSERT_EQ("123", input->ToString());
+ for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
+ ++it) {
+ const RegExpTestContext& context = **it;
+ const AbstractRegExpFactory& factory = *context.factory;
+ const scoped_ptr<RegExpInput> input(factory.CreateInput("+123"));
+ const scoped_ptr<const RegExp> plus_sign(factory.CreateRegExp("(\\+)"));
+
+ ASSERT_TRUE(plus_sign->Consume(input.get(), true, NULL, NULL, NULL))
+ << ErrorMessage(context);
+ ASSERT_EQ("123", input->ToString()) << ErrorMessage(context);
+ }
}
TEST_F(RegExpAdapterTest, TestConsumeRetainsMatches) {
- const scoped_ptr<RegExpInput> input(RegExpInput::Create("1-123-456-789"));
-
- string res1, res2;
- ASSERT_TRUE(two_digit_groups_->Consume(
- input.get(), true, &res1, &res2, NULL));
- ASSERT_EQ("-456-789", input->ToString());
- ASSERT_EQ("1", res1);
- ASSERT_EQ("123", res2);
+ for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
+ ++it) {
+ const RegExpTestContext& context = **it;
+ const scoped_ptr<RegExpInput> input(
+ context.factory->CreateInput("1-123-456-789"));
+
+ string res1, res2;
+ ASSERT_TRUE(context.two_digit_groups->Consume(
+ input.get(), true, &res1, &res2, NULL)) << ErrorMessage(context);
+ ASSERT_EQ("-456-789", input->ToString()) << ErrorMessage(context);
+ ASSERT_EQ("1", res1) << ErrorMessage(context);
+ ASSERT_EQ("123", res2) << ErrorMessage(context);
+ }
}
TEST_F(RegExpAdapterTest, TestFindAndConsume) {
- const scoped_ptr<RegExpInput> input(RegExpInput::Create("+1-123-456-789"));
-
- // When 'false' is passed to Consume(), the match can occur from any place in
- // the input.
- ASSERT_TRUE(digits_->Consume(input.get(), false, NULL, NULL, NULL));
- ASSERT_EQ("-123-456-789", input->ToString());
-
- ASSERT_TRUE(digits_->Consume(input.get(), false, NULL, NULL, NULL));
- ASSERT_EQ("-456-789", input->ToString());
-
- ASSERT_FALSE(parentheses_digits_->Consume(
- input.get(), false, NULL, NULL, NULL));
- ASSERT_EQ("-456-789", input->ToString());
-
- string res1, res2;
- ASSERT_TRUE(two_digit_groups_->Consume(
- input.get(), false, &res1, &res2, NULL));
- ASSERT_EQ("", input->ToString());
- ASSERT_EQ("456", res1);
- ASSERT_EQ("789", res2);
+ for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
+ ++it) {
+ const RegExpTestContext& context = **it;
+ const scoped_ptr<RegExpInput> input(
+ context.factory->CreateInput("+1-123-456-789"));
+
+ // When 'false' is passed to Consume(), the match can occur from any place
+ // in the input.
+ ASSERT_TRUE(context.digits->Consume(input.get(), false, NULL, NULL, NULL))
+ << ErrorMessage(context);
+ ASSERT_EQ("-123-456-789", input->ToString()) << ErrorMessage(context);
+
+ ASSERT_TRUE(context.digits->Consume(input.get(), false, NULL, NULL, NULL))
+ << ErrorMessage(context);
+ ASSERT_EQ("-456-789", input->ToString()) << ErrorMessage(context);
+
+ ASSERT_FALSE(context.parentheses_digits->Consume(
+ input.get(), false, NULL, NULL, NULL)) << ErrorMessage(context);
+ ASSERT_EQ("-456-789", input->ToString()) << ErrorMessage(context);
+
+ string res1, res2;
+ ASSERT_TRUE(context.two_digit_groups->Consume(
+ input.get(), false, &res1, &res2, NULL)) << ErrorMessage(context);
+ ASSERT_EQ("", input->ToString()) << ErrorMessage(context);
+ ASSERT_EQ("456", res1) << ErrorMessage(context);
+ ASSERT_EQ("789", res2) << ErrorMessage(context);
+ }
}
-TEST(RegExpAdapter, TestPartialMatch) {
- const scoped_ptr<const RegExp> reg_exp(RegExp::Create("([\\da-z]+)"));
- string matched;
-
- EXPECT_TRUE(reg_exp->PartialMatch("12345af", &matched));
- EXPECT_EQ("12345af", matched);
-
- EXPECT_TRUE(reg_exp->PartialMatch("12345af", NULL));
-
- EXPECT_TRUE(reg_exp->PartialMatch("[12]", &matched));
- EXPECT_EQ("12", matched);
-
- matched.clear();
- EXPECT_FALSE(reg_exp->PartialMatch("[]", &matched));
- EXPECT_EQ("", matched);
+TEST_F(RegExpAdapterTest, TestPartialMatch) {
+ for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
+ ++it) {
+ const RegExpTestContext& context = **it;
+ const AbstractRegExpFactory& factory = *context.factory;
+ const scoped_ptr<const RegExp> reg_exp(factory.CreateRegExp("([\\da-z]+)"));
+ string matched;
+
+ EXPECT_TRUE(reg_exp->PartialMatch("12345af", &matched))
+ << ErrorMessage(context);
+ EXPECT_EQ("12345af", matched) << ErrorMessage(context);
+
+ EXPECT_TRUE(reg_exp->PartialMatch("12345af", NULL))
+ << ErrorMessage(context);
+
+ EXPECT_TRUE(reg_exp->PartialMatch("[12]", &matched))
+ << ErrorMessage(context);
+ EXPECT_EQ("12", matched) << ErrorMessage(context);
+
+ matched.clear();
+ EXPECT_FALSE(reg_exp->PartialMatch("[]", &matched))
+ << ErrorMessage(context);
+ EXPECT_EQ("", matched) << ErrorMessage(context);
+ }
}
-TEST(RegExpAdapter, TestFullMatch) {
- const scoped_ptr<const RegExp> reg_exp(RegExp::Create("([\\da-z]+)"));
- string matched;
+TEST_F(RegExpAdapterTest, TestFullMatch) {
+ for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
+ ++it) {
+ const RegExpTestContext& context = **it;
+ const AbstractRegExpFactory& factory = *context.factory;
+ const scoped_ptr<const RegExp> reg_exp(factory.CreateRegExp("([\\da-z]+)"));
+ string matched;
- EXPECT_TRUE(reg_exp->FullMatch("12345af", &matched));
- EXPECT_EQ("12345af", matched);
+ EXPECT_TRUE(reg_exp->FullMatch("12345af", &matched))
+ << ErrorMessage(context);
+ EXPECT_EQ("12345af", matched) << ErrorMessage(context);
- EXPECT_TRUE(reg_exp->FullMatch("12345af", NULL));
+ EXPECT_TRUE(reg_exp->FullMatch("12345af", NULL)) << ErrorMessage(context);
- matched.clear();
- EXPECT_FALSE(reg_exp->FullMatch("[12]", &matched));
- EXPECT_EQ("", matched);
+ matched.clear();
+ EXPECT_FALSE(reg_exp->FullMatch("[12]", &matched)) << ErrorMessage(context);
+ EXPECT_EQ("", matched) << ErrorMessage(context);
- matched.clear();
- EXPECT_FALSE(reg_exp->FullMatch("[]", &matched));
- EXPECT_EQ("", matched);
+ matched.clear();
+ EXPECT_FALSE(reg_exp->FullMatch("[]", &matched)) << ErrorMessage(context);
+ EXPECT_EQ("", matched) << ErrorMessage(context);
+ }
}
TEST_F(RegExpAdapterTest, TestReplace) {
- string input("123-4567 ");
-
- ASSERT_TRUE(single_digit_->Replace(&input, "+"));
- ASSERT_EQ("+23-4567 ", input);
-
- ASSERT_TRUE(single_digit_->Replace(&input, "+"));
- ASSERT_EQ("++3-4567 ", input);
-
- const scoped_ptr<const RegExp> single_letter(RegExp::Create("[a-z]"));
- ASSERT_FALSE(single_letter->Replace(&input, "+"));
- ASSERT_EQ("++3-4567 ", input);
+ for (vector<const RegExpTestContext*>::const_iterator it = contexts_.begin();
+ it != contexts_.end();
+ ++it) {
+ const RegExpTestContext& context = **it;
+ string input("123-4567 ");
+
+ ASSERT_TRUE(context.single_digit->Replace(&input, "+"))
+ << ErrorMessage(context);
+ ASSERT_EQ("+23-4567 ", input) << ErrorMessage(context);
+
+ ASSERT_TRUE(context.single_digit->Replace(&input, "+"))
+ << ErrorMessage(context);
+ ASSERT_EQ("++3-4567 ", input) << ErrorMessage(context);
+
+ const scoped_ptr<const RegExp> single_letter(
+ context.factory->CreateRegExp("[a-z]"));
+ ASSERT_FALSE(single_letter->Replace(&input, "+")) << ErrorMessage(context);
+ ASSERT_EQ("++3-4567 ", input) << ErrorMessage(context);
+ }
}
TEST_F(RegExpAdapterTest, TestReplaceWithGroup) {
- // Make sure referencing groups in the regexp in the replacement string works.
- // $[0-9] notation is used.
- string input = "123-4567 abc";
- ASSERT_TRUE(two_digit_groups_->Replace(&input, "$2"));
- ASSERT_EQ("4567 abc", input);
-
- input = "123-4567";
- ASSERT_TRUE(two_digit_groups_->Replace(&input, "$1"));
- ASSERT_EQ("123", input);
-
- input = "123-4567";
- ASSERT_TRUE(two_digit_groups_->Replace(&input, "$2"));
- ASSERT_EQ("4567", input);
-
- input = "123-4567";
- ASSERT_TRUE(two_digit_groups_->Replace(&input, "$1 $2"));
- ASSERT_EQ("123 4567", input);
+ for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
+ ++it) {
+ const RegExpTestContext& context = **it;
+
+ // Make sure referencing groups in the regexp in the replacement string
+ // works. $[0-9] notation is used.
+ string input = "123-4567 abc";
+ ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$2"))
+ << ErrorMessage(context);
+ ASSERT_EQ("4567 abc", input) << ErrorMessage(context);
+
+ input = "123-4567";
+ ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$1"))
+ << ErrorMessage(context);
+ ASSERT_EQ("123", input) << ErrorMessage(context);
+
+ input = "123-4567";
+ ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$2"))
+ << ErrorMessage(context);
+ ASSERT_EQ("4567", input) << ErrorMessage(context);
+
+ input = "123-4567";
+ ASSERT_TRUE(context.two_digit_groups->Replace(&input, "$1 $2"))
+ << ErrorMessage(context);
+ ASSERT_EQ("123 4567", input) << ErrorMessage(context);
+ }
}
TEST_F(RegExpAdapterTest, TestReplaceWithDollarSign) {
- // Make sure '$' can be used in the replacement string when escaped.
- string input = "123-4567";
- ASSERT_TRUE(two_digit_groups_->Replace(&input, "\\$1 \\$2"));
- ASSERT_EQ("$1 $2", input);
+ for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
+ ++it) {
+ const RegExpTestContext& context = **it;
+
+ // Make sure '$' can be used in the replacement string when escaped.
+ string input = "123-4567";
+ ASSERT_TRUE(context.two_digit_groups->Replace(&input, "\\$1 \\$2"))
+ << ErrorMessage(context);
+
+ ASSERT_EQ("$1 $2", input) << ErrorMessage(context);
+ }
}
TEST_F(RegExpAdapterTest, TestGlobalReplace) {
- string input("123-4567 ");
+ for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
+ ++it) {
+ const RegExpTestContext& context = **it;
+
+ string input("123-4567 ");
- ASSERT_TRUE(single_digit_->GlobalReplace(&input, "*"));
- ASSERT_EQ("***-**** ", input);
+ ASSERT_TRUE(context.single_digit->GlobalReplace(&input, "*"))
+ << ErrorMessage(context);
+ ASSERT_EQ("***-**** ", input) << ErrorMessage(context);
- ASSERT_FALSE(single_digit_->GlobalReplace(&input, "*"));
- ASSERT_EQ("***-**** ", input);
+ ASSERT_FALSE(context.single_digit->GlobalReplace(&input, "*"))
+ << ErrorMessage(context);
+ ASSERT_EQ("***-**** ", input) << ErrorMessage(context);
+ }
}
-TEST(RegExpAdapter, TestUtf8) {
- const scoped_ptr<const RegExp> reg_exp(RegExp::Create(
- "\xE2\x84\xA1\xE2\x8A\x8F([\xCE\xB1-\xCF\x89]*)\xE2\x8A\x90"
- /* "℡⊏([α-ω]*)⊐" */));
- string matched;
-
- EXPECT_FALSE(reg_exp->Match(
- "\xE2\x84\xA1\xE2\x8A\x8F" "123\xE2\x8A\x90" /* "℡⊏123⊐" */, true,
- &matched));
- EXPECT_TRUE(reg_exp->Match(
- "\xE2\x84\xA1\xE2\x8A\x8F\xCE\xB1\xCE\xB2\xE2\x8A\x90"
- /* "℡⊏αβ⊐" */, true, &matched));
- EXPECT_EQ("\xCE\xB1\xCE\xB2" /* "αβ" */, matched);
+TEST_F(RegExpAdapterTest, TestUtf8) {
+ for (TestContextIterator it = contexts_.begin(); it != contexts_.end();
+ ++it) {
+ const RegExpTestContext& context = **it;
+ const AbstractRegExpFactory& factory = *context.factory;
+
+ const scoped_ptr<const RegExp> reg_exp(factory.CreateRegExp(
+ "\xE2\x84\xA1\xE2\x8A\x8F([\xCE\xB1-\xCF\x89]*)\xE2\x8A\x90"
+ /* "℡⊏([α-ω]*)⊐" */));
+ string matched;
+
+ EXPECT_FALSE(reg_exp->Match(
+ "\xE2\x84\xA1\xE2\x8A\x8F" "123\xE2\x8A\x90" /* "℡⊏123⊐" */, true,
+ &matched)) << ErrorMessage(context);
+ EXPECT_TRUE(reg_exp->Match(
+ "\xE2\x84\xA1\xE2\x8A\x8F\xCE\xB1\xCE\xB2\xE2\x8A\x90"
+ /* "℡⊏αβ⊐" */, true, &matched)) << ErrorMessage(context);
+
+ EXPECT_EQ("\xCE\xB1\xCE\xB2" /* "αβ" */, matched) << ErrorMessage(context);
+ }
}
} // namespace phonenumbers
using base::AutoLock;
-RegExpCache::RegExpCache(size_t min_items)
+RegExpCache::RegExpCache(const AbstractRegExpFactory& regexp_factory,
+ size_t min_items)
+ : regexp_factory_(regexp_factory),
#ifdef USE_TR1_UNORDERED_MAP
- : cache_impl_(new CacheImpl(min_items))
+ cache_impl_(new CacheImpl(min_items))
#else
- : cache_impl_(new CacheImpl())
+ cache_impl_(new CacheImpl())
#endif
{}
CacheImpl::const_iterator it = cache_impl_->find(pattern);
if (it != cache_impl_->end()) return *it->second;
- const RegExp* regexp = RegExp::Create(pattern);
+ const RegExp* regexp = regexp_factory_.CreateRegExp(pattern);
cache_impl_->insert(make_pair(pattern, regexp));
return *regexp;
}
using std::string;
+class AbstractRegExpFactory;
class RegExp;
class RegExpCache {
#endif
public:
- explicit RegExpCache(size_t min_items);
+ explicit RegExpCache(const AbstractRegExpFactory& regexp_factory,
+ size_t min_items);
~RegExpCache();
const RegExp& GetRegExp(const string& pattern);
private:
+ const AbstractRegExpFactory& regexp_factory_;
base::Lock lock_; // protects cache_impl_
scoped_ptr<CacheImpl> cache_impl_; // protected by lock_
friend class RegExpCacheTest_CacheConstructor_Test;
#include <gtest/gtest.h>
-#include "phonenumbers/regexp_adapter.h"
#include "phonenumbers/regexp_cache.h"
+#include "phonenumbers/regexp_factory.h"
namespace i18n {
namespace phonenumbers {
protected:
static const size_t min_items_ = 2;
- RegExpCacheTest() : cache_(min_items_) {}
+ RegExpCacheTest() : cache_(regexp_factory_, min_items_) {}
virtual ~RegExpCacheTest() {}
+ RegExpFactory regexp_factory_;
RegExpCache cache_;
};
--- /dev/null
+// Copyright (C) 2011 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Author: Philippe Liard
+
+#ifndef I18N_PHONENUMBERS_REGEXP_ADAPTER_FACTORY_H_
+#define I18N_PHONENUMBERS_REGEXP_ADAPTER_FACTORY_H_
+
+// This file selects the right implementation of the abstract regexp factory at
+// compile time depending on the compilation flags (USE_RE2). The default
+// abstract regexp factory implementation can be obtained using the type
+// RegExpFactory. This will be set to RE2RegExpFactory if RE2 is used or
+// ICURegExpFactory otherwise.
+
+#ifdef USE_RE2
+#include "phonenumbers/regexp_adapter_re2.h"
+#else
+#include "phonenumbers/regexp_adapter_icu.h"
+#endif // USE_RE2
+
+namespace i18n {
+namespace phonenumbers {
+
+#ifdef USE_RE2
+typedef RE2RegExpFactory RegExpFactory;
+#else
+typedef ICURegExpFactory RegExpFactory;
+#endif // USE_RE2
+
+} // namespace phonenumbers
+} // namespace i18n
+
+#endif // I18N_PHONENUMBERS_REGEXP_ADAPTER_FACTORY_H_
}
}
+// Deletes the pointers contained in the provided container.
+template <typename T>
+void STLDeleteElements(T* container) {
+ for (typename T::iterator it = container->begin(); it != container->end();
+ ++it) {
+ delete *it;
+ }
+}
+
} // namespace phonenumbers
} // namespace i18n