From 699157fe9bc719fa3df151a6ef324e4a295e5783 Mon Sep 17 00:00:00 2001 From: "dbeaumont@google.com" Date: Tue, 24 Jul 2012 14:09:14 +0000 Subject: [PATCH] CPP: Ported alternate phone number format support. git-svn-id: http://libphonenumber.googlecode.com/svn/trunk@509 ee073f10-1060-11df-b6a4-87a95322a99c --- cpp/src/phonenumbers/phonenumbermatcher.cc | 83 ++++++++++++++++++++++++ cpp/src/phonenumbers/phonenumbermatcher.h | 5 ++ cpp/test/phonenumbers/phonenumbermatcher_test.cc | 21 +++++- 3 files changed, 106 insertions(+), 3 deletions(-) diff --git a/cpp/src/phonenumbers/phonenumbermatcher.cc b/cpp/src/phonenumbers/phonenumbermatcher.cc index 3e1adbe..d0c3ad0 100644 --- a/cpp/src/phonenumbers/phonenumbermatcher.cc +++ b/cpp/src/phonenumbers/phonenumbermatcher.cc @@ -25,6 +25,8 @@ #endif // USE_ICU_REGEXP #include +#include +#include #include #include #include @@ -35,6 +37,7 @@ #include "base/logging.h" #include "base/memory/scoped_ptr.h" #include "base/memory/singleton.h" +#include "phonenumbers/alternate_format.h" #include "phonenumbers/callback.h" #include "phonenumbers/default_logger.h" #include "phonenumbers/encoding_utils.h" @@ -51,6 +54,10 @@ #include "phonenumbers/regexp_adapter_re2.h" #endif // USE_RE2_AND_ICU +using std::cerr; +using std::endl; +using std::make_pair; +using std::map; using std::numeric_limits; using std::string; using std::vector; @@ -149,6 +156,15 @@ bool AllNumberGroupsRemainGrouped( return normalized_candidate.substr(from_index) .find(phone_number.extension()) != string::npos; } + +bool LoadAlternateFormats(PhoneMetadataCollection* alternate_formats) { + if (!alternate_formats->ParseFromArray(alternate_format_get(), + alternate_format_size())) { + cerr << "Could not parse binary data." << endl; + return false; + } + return true; +} } // namespace #ifdef USE_GOOGLE_BASE @@ -298,12 +314,61 @@ class PhoneNumberMatcherRegExps : public Singleton { DISALLOW_COPY_AND_ASSIGN(PhoneNumberMatcherRegExps); }; +#ifdef USE_GOOGLE_BASE +class AlternateFormats { + friend struct DefaultSingletonTraits; +#else +class AlternateFormats : public Singleton { + friend class Singleton; +#endif // USE_GOOGLE_BASE + public: + PhoneMetadataCollection format_data_; + + map calling_code_to_alternate_formats_map_; + +#ifdef USE_GOOGLE_BASE + static AlternateFormats* GetInstance() { + return Singleton::get(); + } +#endif // USE_GOOGLE_BASE + + AlternateFormats() + : format_data_(), + calling_code_to_alternate_formats_map_() { + if (!LoadAlternateFormats(&format_data_)) { + LOG(DFATAL) << "Could not parse compiled-in metadata."; + return; + } + for (RepeatedPtrField::const_iterator it = + format_data_.metadata().begin(); + it != format_data_.metadata().end(); + ++it) { + calling_code_to_alternate_formats_map_.insert( + make_pair(it->country_code(), &*it)); + } + } + + const PhoneMetadata* GetAlternateFormatsForCountry(int country_calling_code) + const { + map::const_iterator it = + calling_code_to_alternate_formats_map_.find(country_calling_code); + if (it != calling_code_to_alternate_formats_map_.end()) { + return it->second; + } + return NULL; + } + + private: + DISALLOW_COPY_AND_ASSIGN(AlternateFormats); +}; + PhoneNumberMatcher::PhoneNumberMatcher(const PhoneNumberUtil& util, const string& text, const string& region_code, PhoneNumberMatcher::Leniency leniency, int max_tries) : reg_exps_(PhoneNumberMatcherRegExps::GetInstance()), + alternate_formats_(AlternateFormats::GetInstance()), phone_util_(util), text_(text), preferred_region_(region_code), @@ -317,6 +382,7 @@ PhoneNumberMatcher::PhoneNumberMatcher(const PhoneNumberUtil& util, PhoneNumberMatcher::PhoneNumberMatcher(const string& text, const string& region_code) : reg_exps_(PhoneNumberMatcherRegExps::GetInstance()), + alternate_formats_(NULL), // Not used. phone_util_(*PhoneNumberUtil::GetInstance()), text_(text), preferred_region_(region_code), @@ -613,6 +679,23 @@ bool PhoneNumberMatcher::CheckNumberGroupingIsValid( formatted_number_groups)) { return true; } + // If this didn't pass, see if there are any alternate formats, and try them + // instead. + const PhoneMetadata* alternate_formats = + alternate_formats_->GetAlternateFormatsForCountry( + phone_number.country_code()); + if (alternate_formats) { + for (RepeatedPtrField::const_iterator it = + alternate_formats->number_format().begin(); + it != alternate_formats->number_format().end(); ++it) { + formatted_number_groups.clear(); + GetNationalNumberGroups(phone_number, &*it, &formatted_number_groups); + if (checker->Run(phone_util_, phone_number, normalized_candidate, + formatted_number_groups)) { + return true; + } + } + } return false; } diff --git a/cpp/src/phonenumbers/phonenumbermatcher.h b/cpp/src/phonenumbers/phonenumbermatcher.h index efa5673..ad061ae 100644 --- a/cpp/src/phonenumbers/phonenumbermatcher.h +++ b/cpp/src/phonenumbers/phonenumbermatcher.h @@ -39,6 +39,7 @@ template using std::string; using std::vector; +class AlternateFormats; class NumberFormat; class PhoneNumber; class PhoneNumberMatch; @@ -156,6 +157,10 @@ class PhoneNumberMatcher { // Helper class holding useful regular expressions. const PhoneNumberMatcherRegExps* reg_exps_; + // Helper class holding loaded data containing alternate ways phone numbers + // might be formatted for certain regions. + const AlternateFormats* alternate_formats_; + // The phone number utility; const PhoneNumberUtil& phone_util_; diff --git a/cpp/test/phonenumbers/phonenumbermatcher_test.cc b/cpp/test/phonenumbers/phonenumbermatcher_test.cc index 962b614..073cea7 100644 --- a/cpp/test/phonenumbers/phonenumbermatcher_test.cc +++ b/cpp/test/phonenumbers/phonenumbermatcher_test.cc @@ -737,9 +737,12 @@ static const NumberTest kValidCases[] = { "\x2D\xEF\xBC\x97\xEF\xBC\x97\xEF\xBC\x97\xEF\xBC\x97", RegionCode::US()), NumberTest("2012-0102 08", RegionCode::US()), // Very strange formatting. NumberTest("2012-01-02 08", RegionCode::US()), - // Breakdown assistance number. - NumberTest("1800-10-10 22", RegionCode::AU()), -}; + // Breakdown assistance number with unexpected formatting. + NumberTest("1800-1-0-10 22", RegionCode::AU()), + NumberTest("030-3-2 23 12 34", RegionCode::DE()), + NumberTest("03 0 -3 2 23 12 34", RegionCode::DE()), + NumberTest("(0)3 0 -3 2 23 12 34", RegionCode::DE()), + NumberTest("0 3 0 -3 2 23 12 34", RegionCode::DE()),}; // Strings with number-like things that should only be found up to and including // the "strict_grouping" leniency level. @@ -749,6 +752,11 @@ static const NumberTest kStrictGroupingCases[] = { // Should be found by strict grouping but not exact grouping, as the last two // groups are formatted together as a block. NumberTest("0800-2491234", RegionCode::DE()), + // Doesn't match any formatting in the test file, but almost matches an + // alternate format (the last two groups have been squashed together here). + NumberTest("0900-1 123123", RegionCode::DE()), + NumberTest("(0)900-1 123123", RegionCode::DE()), + NumberTest("0 900-1 123123", RegionCode::DE()), }; // Strings with number-like things that should be found at all levels. @@ -780,6 +788,13 @@ static const NumberTest kExactGroupingCases[] = { NumberTest("0494949 ext. 49", RegionCode::DE()), NumberTest("01 (33) 3461 2234", RegionCode::MX()), // Optional NP present NumberTest("(33) 3461 2234", RegionCode::MX()), // Optional NP omitted + // Breakdown assistance number with normal formatting. + NumberTest("1800-10-10 22", RegionCode::AU()), + // Doesn't match any formatting in the test file, but matches an alternate + // format exactly. + NumberTest("0900-1 123 123", RegionCode::DE()), + NumberTest("(0)900-1 123 123", RegionCode::DE()), + NumberTest("0 900-1 123 123", RegionCode::DE()), }; TEST_F(PhoneNumberMatcherTest, MatchesWithPossibleLeniency) { -- 2.7.4