using google::protobuf::RepeatedPtrField;
// static
-const char PhoneNumberUtil::kPlusChars[] = "++";
+const char PhoneNumberUtil::kPlusChars[] = "+\xEF\xBC\x8B"; /* "++" */
// To find out the unicode code-point of the characters below in vim, highlight
// the character and type 'ga'. Note that the - is used to express ranges of
// full-width punctuation below, as well as being present in the expression
// unicode character.
// static
const char PhoneNumberUtil::kValidPunctuation[] =
- "-x‐-―−ー--/ ()()[].\\[\\]/~⁓∼";
+ /* "-x‐-―−ー--/ <U+200B><U+2060> ()()[].\\[\\]/~⁓∼" */
+ "-x\xE2\x80\x90-\xE2\x80\x95\xE2\x88\x92\xE3\x83\xBC\xEF\xBC\x8D-\xEF\xBC"
+ "\x8F \xC2\xA0\xE2\x80\x8B\xE2\x81\xA0\xE3\x80\x80()\xEF\xBC\x88\xEF\xBC"
+ "\x89\xEF\xBC\xBB\xEF\xBC\xBD.\\[\\]/~\xE2\x81\x93\xE2\x88\xBC";
namespace {
all_plus_number_grouping_symbols->insert(
make_pair(ToUnicodeCodepoint("-"), '-'));
all_plus_number_grouping_symbols->insert(
- make_pair(ToUnicodeCodepoint("-"), '-'));
+ make_pair(ToUnicodeCodepoint("\xEF\xBC\x8D" /* "-" */), '-'));
all_plus_number_grouping_symbols->insert(
- make_pair(ToUnicodeCodepoint("‐"), '-'));
+ make_pair(ToUnicodeCodepoint("\xE2\x80\x90" /* "‐" */), '-'));
all_plus_number_grouping_symbols->insert(
- make_pair(ToUnicodeCodepoint("‑"), '-'));
+ make_pair(ToUnicodeCodepoint("\xE2\x80\x91" /* "‑" */), '-'));
all_plus_number_grouping_symbols->insert(
- make_pair(ToUnicodeCodepoint("‒"), '-'));
+ make_pair(ToUnicodeCodepoint("\xE2\x80\x92" /* "‒" */), '-'));
all_plus_number_grouping_symbols->insert(
- make_pair(ToUnicodeCodepoint("–"), '-'));
+ make_pair(ToUnicodeCodepoint("\xE2\x80\x93" /* "–" */), '-'));
all_plus_number_grouping_symbols->insert(
- make_pair(ToUnicodeCodepoint("—"), '-'));
+ make_pair(ToUnicodeCodepoint("\xE2\x80\x94" /* "—" */), '-'));
all_plus_number_grouping_symbols->insert(
- make_pair(ToUnicodeCodepoint("―"), '-'));
+ make_pair(ToUnicodeCodepoint("\xE2\x80\x95" /* "―" */), '-'));
all_plus_number_grouping_symbols->insert(
- make_pair(ToUnicodeCodepoint("−"), '-'));
+ make_pair(ToUnicodeCodepoint("\xE2\x88\x92" /* "−" */), '-'));
all_plus_number_grouping_symbols->insert(
make_pair(ToUnicodeCodepoint("/"), '/'));
all_plus_number_grouping_symbols->insert(
- make_pair(ToUnicodeCodepoint("/"), '/'));
+ make_pair(ToUnicodeCodepoint("\xEF\xBC\x8F" /* "/" */), '/'));
all_plus_number_grouping_symbols->insert(
make_pair(ToUnicodeCodepoint(" "), ' '));
all_plus_number_grouping_symbols->insert(
- make_pair(ToUnicodeCodepoint(" "), ' '));
+ make_pair(ToUnicodeCodepoint("\xE3\x80\x80" /* " " */), ' '));
all_plus_number_grouping_symbols->insert(
- make_pair(ToUnicodeCodepoint(""), ' '));
+ make_pair(ToUnicodeCodepoint("\xE2\x81\xA0"), ' '));
all_plus_number_grouping_symbols->insert(
make_pair(ToUnicodeCodepoint("."), '.'));
all_plus_number_grouping_symbols->insert(
- make_pair(ToUnicodeCodepoint("."), '.'));
+ make_pair(ToUnicodeCodepoint("\xEF\xBC\x8E" /* "." */), '.'));
// Only the upper-case letters are added here - the lower-case versions are
// added programmatically.
alpha_mappings->insert(make_pair(ToUnicodeCodepoint("A"), '2'));
void PhoneNumberUtil::CreateRegularExpressions() const {
unique_international_prefix.reset(RegExp::Create(
- "[\\d]+(?:[~⁓∼~][\\d]+)?"));
+ /* "[\\d]+(?:[~⁓∼~][\\d]+)?" */
+ "[\\d]+(?:[~\xE2\x81\x93\xE2\x88\xBC\xEF\xBD\x9E][\\d]+)?"));
// The first_group_capturing_pattern was originally set to $1 but there are
// some countries for which the first group is not used in the national
// pattern (e.g. Argentina) so the $1 group does not match correctly.
const string capturing_extn_digits = StrCat("([", kDigits, "]{1,7})");
known_extn_patterns.reset(new string(
StrCat(kRfc3966ExtnPrefix, capturing_extn_digits, "|"
- "[ \\t,]*(?:ext(?:ensi(?:ó?|ó))?n?|extn?|[,xx##~~]|"
+ /* "[ \\t,]*(?:ext(?:ensi(?:ó?|ó))?n?|extn?|[,xx##~~]|"
"int|int|anexo)"
- "[:\\..]?[ \\t,-]*", capturing_extn_digits, "#?|"
- "[- ]+([", kDigits, "]{1,5})#")));
+ "[:\\..]?[ \\t,-]*", capturing_extn_digits, "#?|" */
+ "[ \xC2\xA0\\t,]*(?:ext(?:ensi(?:o\xCC\x81?|\xC3\xB3))?n?|\xEF\xBD"
+ "\x85\xEF\xBD\x98\xEF\xBD\x94\xEF\xBD\x8E?|[,x\xEF\xBD\x98#\xEF"
+ "\xBC\x83~\xEF\xBD\x9E]|"
+ "int|\xEF\xBD\x89\xEF\xBD\x8E\xEF\xBD\x94|anexo)"
+ "[:\\.\xEF\xBC\x8E]?[ \xC2\xA0\\t,-]*", capturing_extn_digits,
+ "#?|[- ]+([", kDigits, "]{1,5})#")));
+
extn_pattern.reset(RegExp::Create(
StrCat("(?i)(?:", *known_extn_patterns, ")$")));
valid_phone_number_pattern.reset(RegExp::Create(
ExtractPossibleNumber("Tel:+800-345-600", &extracted_number);
EXPECT_EQ("+800-345-600", extracted_number);
// Should recognise wide digits as possible start values.
- ExtractPossibleNumber("023", &extracted_number);
- EXPECT_EQ("023", extracted_number);
+ ExtractPossibleNumber("\xEF\xBC\x90\xEF\xBC\x92\xEF\xBC\x93" /* "023" */,
+ &extracted_number);
+ EXPECT_EQ("\xEF\xBC\x90\xEF\xBC\x92\xEF\xBC\x93" /* "023" */,
+ extracted_number);
// Dashes are not possible start values and should be removed.
- ExtractPossibleNumber("Num-123", &extracted_number);
- EXPECT_EQ("123", extracted_number);
+ ExtractPossibleNumber("Num-\xEF\xBC\x91\xEF\xBC\x92\xEF\xBC\x93"
+ /* "Num-123" */, &extracted_number);
+ EXPECT_EQ("\xEF\xBC\x91\xEF\xBC\x92\xEF\xBC\x93" /* "123" */,
+ extracted_number);
// If not possible number present, return empty string.
ExtractPossibleNumber("Num-....", &extracted_number);
EXPECT_EQ("", extracted_number);
ExtractPossibleNumber("(650) 253-0000.", &extracted_number);
EXPECT_EQ("650) 253-0000", extracted_number);
// This case has a trailing RTL char.
- ExtractPossibleNumber("(650) 253-0000", &extracted_number);
+ ExtractPossibleNumber("(650) 253-0000\xE2\x80\x8F"
+ /* "(650) 253-0000" */, &extracted_number);
EXPECT_EQ("650) 253-0000", extracted_number);
}
EXPECT_TRUE(IsViablePhoneNumber("0800-4-PIZZA"));
// Only one or two digits before possible punctuation followed by more digits.
// The punctuation used here is the unicode character u+3000.
- EXPECT_TRUE(IsViablePhoneNumber("1 34"));
- EXPECT_FALSE(IsViablePhoneNumber("1 3+4"));
+ EXPECT_TRUE(IsViablePhoneNumber("1\xE3\x80\x80" "34" /* "1 34" */));
+ EXPECT_FALSE(IsViablePhoneNumber("1\xE3\x80\x80" "3+4" /* "1 3+4" */));
// Unicode variants of possible starting character and other allowed
// punctuation/digits.
- EXPECT_TRUE(IsViablePhoneNumber("(1) 3456789"));
+ EXPECT_TRUE(IsViablePhoneNumber("\xEF\xBC\x88" "1\xEF\xBC\x89\xE3\x80\x80"
+ "3456789" /* "(1) 3456789" */ ));
// Testing a leading + is okay.
- EXPECT_TRUE(IsViablePhoneNumber("+1) 3456789"));
+ EXPECT_TRUE(IsViablePhoneNumber("+1\xEF\xBC\x89\xE3\x80\x80"
+ "3456789" /* "+1) 3456789" */));
}
TEST_F(PhoneNumberUtilTest, ConvertAlphaCharactersInNumber) {
EXPECT_EQ(kExpectedOutput, input);
// Try with some non-ASCII characters.
- input.assign("1 (800) ABC-DEF");
- static const string kExpectedFullwidthOutput = "1 (800) 222-333";
+ input.assign("1\xE3\x80\x80\xEF\xBC\x88" "800) ABC-DEF"
+ /* "1 (800) ABCD-DEF" */);
+ static const string kExpectedFullwidthOutput =
+ "1\xE3\x80\x80\xEF\xBC\x88" "800) 222-333" /* "1 (800) 222-333" */;
phone_util_.ConvertAlphaCharactersInNumber(&input);
EXPECT_EQ(kExpectedFullwidthOutput, input);
}
TEST_F(PhoneNumberUtilTest, NormaliseOtherDigits) {
// The first digit is a full-width 2, the last digit is an Arabic-indic digit
// 5.
- string input_number("25٥");
+ string input_number("\xEF\xBC\x92" "5\xD9\xA5" /* "25٥" */);
Normalize(&input_number);
static const string kExpectedOutput("255");
EXPECT_EQ(kExpectedOutput, input_number)
<< "Conversion did not correctly replace non-latin digits";
// The first digit is an Eastern-Arabic 5, the latter an Eastern-Arabic 0.
- string eastern_arabic_input_number("۵2۰");
+ string eastern_arabic_input_number("\xDB\xB5" "2\xDB\xB0" /* "۵2۰" */);
Normalize(&eastern_arabic_input_number);
static const string kExpectedOutput2("520");
EXPECT_EQ(kExpectedOutput2, eastern_arabic_input_number)
// Using a full-width plus sign.
test_number.Clear();
EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR,
- phone_util_.Parse("+1 (650) 333-6000",
+ phone_util_.Parse("\xEF\xBC\x8B" "1 (650) 333-6000",
+ /* "+1 (650) 333-6000" */
RegionCode::SG(), &test_number));
EXPECT_EQ(us_number, test_number);
// The whole number, including punctuation, is here represented in full-width
// form.
test_number.Clear();
EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR,
- phone_util_.Parse("+1 (650) 333-6000",
+ phone_util_.Parse("\xEF\xBC\x8B\xEF\xBC\x91\xE3\x80\x80\xEF\xBC\x88"
+ "\xEF\xBC\x96\xEF\xBC\x95\xEF\xBC\x90\xEF\xBC\x89"
+ "\xE3\x80\x80\xEF\xBC\x93\xEF\xBC\x93\xEF\xBC\x93"
+ "\xEF\xBC\x8D\xEF\xBC\x96\xEF\xBC\x90\xEF\xBC\x90"
+ "\xEF\xBC\x90",
+ /* "+1 (650) 333-6000" */
RegionCode::SG(), &test_number));
EXPECT_EQ(us_number, test_number);
// Using the U+30FC dash.
test_number.Clear();
EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR,
- phone_util_.Parse("+1 (650) 333ー6000",
+ phone_util_.Parse("\xEF\xBC\x8B\xEF\xBC\x91\xE3\x80\x80\xEF\xBC\x88"
+ "\xEF\xBC\x96\xEF\xBC\x95\xEF\xBC\x90\xEF\xBC\x89"
+ "\xE3\x80\x80\xEF\xBC\x93\xEF\xBC\x93\xEF\xBC\x93"
+ "\xE3\x83\xBC\xEF\xBC\x96\xEF\xBC\x90\xEF\xBC\x90"
+ "\xEF\xBC\x90",
+ /* "+1 (650) 333ー6000" */
RegionCode::SG(), &test_number));
EXPECT_EQ(us_number, test_number);
}
// Test with full-width plus.
result_proto.Clear();
EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR,
- phone_util_.Parse("+64 3 331 6005", RegionCode::GetUnknown(),
- &result_proto));
+ phone_util_.Parse("\xEF\xBC\x8B" "64 3 331 6005",
+ /* "+64 3 331 6005" */
+ RegionCode::GetUnknown(), &result_proto));
EXPECT_EQ(nz_number, result_proto);
// Test with normal plus but leading characters that need to be stripped.
EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR,
EXPECT_EQ(us_with_extension, test_number);
test_number.Clear();
EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR,
- phone_util_.Parse("(800) 901-3355 ,extensión 7246433",
+ phone_util_.Parse("(800) 901-3355 ,extensi\xC3\xB3n 7246433",
+ /* "(800) 901-3355 ,extensión 7246433" */
RegionCode::US(),
&test_number));
EXPECT_EQ(us_with_extension, test_number);
// Repeat with the small letter o with acute accent created by combining
// characters.
EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR,
- phone_util_.Parse("(800) 901-3355 ,extensión 7246433",
+ phone_util_.Parse("(800) 901-3355 ,extensio\xCC\x81n 7246433",
+ /* "(800) 901-3355 ,extensión 7246433" */
RegionCode::US(),
&test_number));
EXPECT_EQ(us_with_extension, test_number);