#include "phonenumbers/asyoutypeformatter.h"
+#include <math.h>
#include <cctype>
#include <list>
#include <string>
#include <google/protobuf/message_lite.h>
-#include "base/logging.h"
-#include "base/memory/scoped_ptr.h"
+#include "phonenumbers/base/logging.h"
+#include "phonenumbers/base/memory/scoped_ptr.h"
#include "phonenumbers/phonemetadata.pb.h"
#include "phonenumbers/phonenumberutil.h"
#include "phonenumbers/regexp_cache.h"
// the punctuation space.
const char kDigitPlaceholder[] = "\xE2\x80\x88"; /* " " */
+// Character used when appropriate to separate a prefix, such as a long NDD or a
+// country calling code, from the national number.
+const char kSeparatorBeforeNationalNumber = ' ';
+
+// A set of characters that, if found in a national prefix formatting rules, are
+// an indicator to us that we should separate the national prefix from the
+// number when formatting.
+const char kNationalPrefixSeparatorsPattern[] = "[- ]";
+
// Replaces any standalone digit in the pattern (not any inside a {} grouping)
// with \d. This function replaces the standalone digit regex used in the Java
// version which is currently not supported by RE2 because it uses a special
void ReplacePatternDigits(string* pattern) {
DCHECK(pattern);
string new_pattern;
+ // This is needed since sometimes there is more than one digit in between the
+ // curly braces.
+ bool is_in_braces = false;
for (string::const_iterator it = pattern->begin(); it != pattern->end();
++it) {
const char current_char = *it;
if (isdigit(current_char)) {
- if (it + 1 != pattern->end()) {
- const char next_char = it[1];
-
- if (next_char != ',' && next_char != '}') {
- new_pattern += "\\d";
- } else {
- new_pattern += current_char;
- }
+ if (is_in_braces) {
+ new_pattern += current_char;
} else {
new_pattern += "\\d";
}
} else {
new_pattern += current_char;
+ if (current_char == '{') {
+ is_in_braces = true;
+ } else if (current_char == '}') {
+ is_in_braces = false;
+ }
}
}
pattern->assign(new_pattern);
accrued_input_without_formatting_(),
able_to_format_(true),
input_has_formatting_(false),
- is_international_formatting_(false),
+ is_complete_number_(false),
is_expecting_country_code_(false),
phone_util_(*PhoneNumberUtil::GetInstance()),
default_country_(region_code),
original_position_(0),
position_to_remember_(0),
prefix_before_national_number_(),
- national_prefix_extracted_(),
+ should_add_space_after_national_prefix_(false),
+ extracted_national_prefix_(),
national_number_(),
possible_formats_() {
}
}
if (CreateFormattingTemplate(number_format)) {
current_formatting_pattern_ = pattern;
+ SetShouldAddSpaceAfterNationalPrefix(number_format);
// With a new formatting template, the matched position using the old
// template needs to be reset.
last_match_position_ = 0;
return false;
}
-void AsYouTypeFormatter::GetAvailableFormats(
- const string& leading_three_digits) {
+void AsYouTypeFormatter::GetAvailableFormats(const string& leading_digits) {
const RepeatedPtrField<NumberFormat>& format_list =
- (is_international_formatting_ &&
+ (is_complete_number_ &&
current_metadata_->intl_number_format().size() > 0)
? current_metadata_->intl_number_format()
: current_metadata_->number_format();
-
+ bool national_prefix_used_by_country =
+ current_metadata_->has_national_prefix();
for (RepeatedPtrField<NumberFormat>::const_iterator it = format_list.begin();
it != format_list.end(); ++it) {
- if (phone_util_.IsFormatEligibleForAsYouTypeFormatter(it->format())) {
- possible_formats_.push_back(&*it);
+ if (!national_prefix_used_by_country || is_complete_number_ ||
+ it->national_prefix_optional_when_formatting() ||
+ phone_util_.FormattingRuleHasFirstGroupOnly(
+ it->national_prefix_formatting_rule())) {
+ if (phone_util_.IsFormatEligibleForAsYouTypeFormatter(it->format())) {
+ possible_formats_.push_back(&*it);
+ }
}
}
- NarrowDownPossibleFormats(leading_three_digits);
+ NarrowDownPossibleFormats(leading_digits);
}
void AsYouTypeFormatter::NarrowDownPossibleFormats(
it != possible_formats_.end(); ) {
DCHECK(*it);
const NumberFormat& format = **it;
-
- if (format.leading_digits_pattern_size() >
- index_of_leading_digits_pattern) {
- const scoped_ptr<RegExpInput> input(
- regexp_factory_->CreateInput(leading_digits));
- if (!regexp_cache_.GetRegExp(format.leading_digits_pattern().Get(
- index_of_leading_digits_pattern)).Consume(input.get())) {
- it = possible_formats_.erase(it);
- continue;
- }
- } // else the particular format has no more specific leadingDigitsPattern,
- // and it should be retained.
+ if (format.leading_digits_pattern_size() == 0) {
+ // Keep everything that isn't restricted by leading digits.
+ ++it;
+ continue;
+ }
+ int last_leading_digits_pattern =
+ std::min(index_of_leading_digits_pattern,
+ format.leading_digits_pattern_size() - 1);
+ const scoped_ptr<RegExpInput> input(
+ regexp_factory_->CreateInput(leading_digits));
+ if (!regexp_cache_.GetRegExp(format.leading_digits_pattern().Get(
+ last_leading_digits_pattern)).Consume(input.get())) {
+ it = possible_formats_.erase(it);
+ continue;
+ }
++it;
}
}
+void AsYouTypeFormatter::SetShouldAddSpaceAfterNationalPrefix(
+ const NumberFormat& format) {
+ static const scoped_ptr<const RegExp> national_prefix_separators_pattern(
+ regexp_factory_->CreateRegExp(kNationalPrefixSeparatorsPattern));
+ should_add_space_after_national_prefix_ =
+ national_prefix_separators_pattern->PartialMatch(
+ format.national_prefix_formatting_rule());
+}
+
bool AsYouTypeFormatter::CreateFormattingTemplate(const NumberFormat& format) {
string number_pattern = format.pattern();
last_match_position_ = 0;
current_formatting_pattern_.clear();
prefix_before_national_number_.clear();
- national_prefix_extracted_.clear();
+ extracted_national_prefix_.clear();
national_number_.clear();
able_to_format_ = true;
input_has_formatting_ = false;
position_to_remember_ = 0;
original_position_ = 0;
- is_international_formatting_ = false;
+ is_complete_number_ = false;
is_expecting_country_code_ = false;
possible_formats_.clear();
+ should_add_space_after_national_prefix_ = false;
if (current_metadata_ != default_metadata_) {
current_metadata_ = GetMetadataForRegion(default_country_);
}
} else if (AbleToExtractLongerNdd()) {
// Add an additional space to separate long NDD and national significant
- // number for readability.
- prefix_before_national_number_.append(" ");
+ // number for readability. We don't set
+ // should_add_space_after_national_prefix_ to true, since we don't want
+ // this to change later when we choose formatting templates.
+ prefix_before_national_number_.push_back(kSeparatorBeforeNationalNumber);
AttemptToChoosePatternWithPrefixExtracted(phone_number);
return;
}
case 3:
if (AttemptToExtractIdd()) {
is_expecting_country_code_ = true;
+ // FALLTHROUGH_INTENDED
} else {
// No IDD or plus sign is found, might be entering in national format.
- RemoveNationalPrefixFromNationalNumber(&national_prefix_extracted_);
+ RemoveNationalPrefixFromNationalNumber(&extracted_national_prefix_);
AttemptToChooseFormattingPattern(phone_number);
return;
}
return;
}
if (possible_formats_.size() > 0) {
- // The formatting pattern is already chosen.
+ // The formatting patterns are already chosen.
string temp_national_number;
InputDigitHelper(normalized_next_char, &temp_national_number);
// See if accrued digits can be formatted properly already. If not, use
InputAccruedNationalNumber(phone_number);
return;
}
- phone_number->assign(able_to_format_
- ? prefix_before_national_number_ + temp_national_number
- : temp_national_number);
+ if (able_to_format_) {
+ AppendNationalNumber(temp_national_number, phone_number);
+ } else {
+ phone_number->clear();
+ accrued_input_.toUTF8String(*phone_number);
+ }
+ return;
} else {
AttemptToChooseFormattingPattern(phone_number);
}
able_to_format_ = true;
is_expecting_country_code_ = false;
possible_formats_.clear();
+ last_match_position_ = 0;
+ formatting_template_.remove();
+ current_formatting_pattern_.clear();
AttemptToChooseFormattingPattern(formatted_number);
}
+const string& AsYouTypeFormatter::GetExtractedNationalPrefix() const {
+ return extracted_national_prefix_;
+}
+
bool AsYouTypeFormatter::AbleToExtractLongerNdd() {
- if (national_prefix_extracted_.length() > 0) {
+ if (extracted_national_prefix_.length() > 0) {
// Put the extracted NDD back to the national number before attempting to
// extract a new NDD.
- national_number_.insert(0, national_prefix_extracted_);
+ national_number_.insert(0, extracted_national_prefix_);
// Remove the previously extracted NDD from prefixBeforeNationalNumber. We
- // cannot simply set it to empty string because people sometimes enter
- // national prefix after country code, e.g +44 (0)20-1234-5678.
+ // cannot simply set it to empty string because people sometimes incorrectly
+ // enter national prefix after the country code, e.g. +44 (0)20-1234-5678.
int index_of_previous_ndd =
- prefix_before_national_number_.find_last_of(national_prefix_extracted_);
+ prefix_before_national_number_.find_last_of(extracted_national_prefix_);
prefix_before_national_number_.resize(index_of_previous_ndd);
}
string new_national_prefix;
RemoveNationalPrefixFromNationalNumber(&new_national_prefix);
- return national_prefix_extracted_ != new_national_prefix;
+ return extracted_national_prefix_ != new_national_prefix;
}
void AsYouTypeFormatter::AttemptToFormatAccruedDigits(
- string* formatted_number) {
- DCHECK(formatted_number);
+ string* formatted_result) {
+ DCHECK(formatted_result);
for (list<const NumberFormat*>::const_iterator it = possible_formats_.begin();
it != possible_formats_.end(); ++it) {
DCHECK(*it);
- const NumberFormat& num_format = **it;
- string pattern = num_format.pattern();
+ const NumberFormat& number_format = **it;
+ const string& pattern = number_format.pattern();
if (regexp_cache_.GetRegExp(pattern).FullMatch(national_number_)) {
- formatted_number->assign(national_number_);
- string new_formatted_number(*formatted_number);
- string format = num_format.format();
+ SetShouldAddSpaceAfterNationalPrefix(number_format);
+ string formatted_number(national_number_);
bool status = regexp_cache_.GetRegExp(pattern).GlobalReplace(
- &new_formatted_number, format);
+ &formatted_number, number_format.format());
DCHECK(status);
- formatted_number->assign(prefix_before_national_number_);
- formatted_number->append(new_formatted_number);
+ AppendNationalNumber(formatted_number, formatted_result);
return;
}
}
- formatted_number->clear();
}
int AsYouTypeFormatter::GetRememberedPosition() const {
return ConvertUnicodeStringPosition(current_output, current_output_index);
}
+void AsYouTypeFormatter::AppendNationalNumber(const string& national_number,
+ string* phone_number) const {
+ int prefix_before_national_number_length =
+ prefix_before_national_number_.size();
+ if (should_add_space_after_national_prefix_ &&
+ prefix_before_national_number_length > 0 &&
+ prefix_before_national_number_.at(
+ prefix_before_national_number_length - 1) !=
+ kSeparatorBeforeNationalNumber) {
+ // We want to add a space after the national prefix if the national prefix
+ // formatting rule indicates that this would normally be done, with the
+ // exception of the case where we already appended a space because the NDD
+ // was surprisingly long.
+ phone_number->assign(prefix_before_national_number_);
+ phone_number->push_back(kSeparatorBeforeNationalNumber);
+ StrAppend(phone_number, national_number);
+ } else {
+ phone_number->assign(
+ StrCat(prefix_before_national_number_, national_number));
+ }
+}
+
void AsYouTypeFormatter::AttemptToChooseFormattingPattern(
string* formatted_number) {
DCHECK(formatted_number);
-
+ // We start to attempt to format only when at least MIN_LEADING_DIGITS_LENGTH
+ // digits of national number (excluding national prefix) have been entered.
if (national_number_.length() >= kMinLeadingDigitsLength) {
- const string leading_digits =
- national_number_.substr(0, kMinLeadingDigitsLength);
-
- GetAvailableFormats(leading_digits);
- MaybeCreateNewTemplate();
- InputAccruedNationalNumber(formatted_number);
+ GetAvailableFormats(national_number_);
+ formatted_number->clear();
+ AttemptToFormatAccruedDigits(formatted_number);
+ // See if the accrued digits can be formatted properly already.
+ if (formatted_number->length() > 0) {
+ return;
+ }
+ if (MaybeCreateNewTemplate()) {
+ InputAccruedNationalNumber(formatted_number);
+ } else {
+ formatted_number->clear();
+ accrued_input_.toUTF8String(*formatted_number);
+ }
+ return;
} else {
- formatted_number->assign(prefix_before_national_number_ + national_number_);
+ AppendNationalNumber(national_number_, formatted_number);
}
}
temp_national_number.clear();
InputDigitHelper(national_number_[i], &temp_national_number);
}
- number->assign(able_to_format_
- ? prefix_before_national_number_ + temp_national_number
- : temp_national_number);
+ if (able_to_format_) {
+ AppendNationalNumber(temp_national_number, number);
+ } else {
+ number->clear();
+ accrued_input_.toUTF8String(*number);
+ }
+ return;
} else {
number->assign(prefix_before_national_number_);
}
}
+bool AsYouTypeFormatter::IsNanpaNumberWithNationalPrefix() const {
+ // For NANPA numbers beginning with 1[2-9], treat the 1 as the national
+ // prefix. The reason is that national significant numbers in NANPA always
+ // start with [2-9] after the national prefix. Numbers beginning with 1[01]
+ // can only be short/emergency numbers, which don't need the national
+ // prefix.
+ return (current_metadata_->country_code() == 1) &&
+ (national_number_[0] == '1') && (national_number_[1] != '0') &&
+ (national_number_[1] != '1');
+}
+
void AsYouTypeFormatter::RemoveNationalPrefixFromNationalNumber(
string* national_prefix) {
int start_of_national_number = 0;
- if (current_metadata_->country_code() == 1 && national_number_[0] == '1') {
+ if (IsNanpaNumberWithNationalPrefix()) {
start_of_national_number = 1;
- prefix_before_national_number_.append("1 ");
- is_international_formatting_ = true;
+ prefix_before_national_number_.append("1");
+ prefix_before_national_number_.push_back(kSeparatorBeforeNationalNumber);
+ is_complete_number_ = true;
} else if (current_metadata_->has_national_prefix_for_parsing()) {
const scoped_ptr<RegExpInput> consumed_input(
regexp_factory_->CreateInput(national_number_));
const RegExp& pattern = regexp_cache_.GetRegExp(
current_metadata_->national_prefix_for_parsing());
+ // Since some national prefix patterns are entirely optional, check that a
+ // national prefix could actually be extracted.
if (pattern.Consume(consumed_input.get())) {
- // When the national prefix is detected, we use international formatting
- // rules instead of national ones, because national formatting rules could
- // countain local formatting rules for numbers entered without area code.
- is_international_formatting_ = true;
start_of_national_number =
national_number_.length() - consumed_input->ToString().length();
- prefix_before_national_number_.append(
- national_number_.substr(0, start_of_national_number));
+ if (start_of_national_number > 0) {
+ // When the national prefix is detected, we use international formatting
+ // rules instead of national ones, because national formatting rules
+ // could countain local formatting rules for numbers entered without
+ // area code.
+ is_complete_number_ = true;
+ prefix_before_national_number_.append(
+ national_number_.substr(0, start_of_national_number));
+ }
}
}
national_prefix->assign(national_number_, 0, start_of_national_number);
current_metadata_->international_prefix()));
if (international_prefix.Consume(consumed_input.get())) {
- is_international_formatting_ = true;
+ is_complete_number_ = true;
const int start_of_country_code =
accrued_input_without_formatting_.length() -
consumed_input->ToString().length();
prefix_before_national_number_.append(before_country_code);
if (accrued_input_without_formatting_[0] != kPlusSign) {
- prefix_before_national_number_.append(" ");
+ prefix_before_national_number_.push_back(kSeparatorBeforeNationalNumber);
}
return true;
}
national_number_.assign(number_without_country_code);
string new_region_code;
phone_util_.GetRegionCodeForCountryCode(country_code, &new_region_code);
-
- if (new_region_code != default_country_) {
+ if (PhoneNumberUtil::kRegionCodeForNonGeoEntity == new_region_code) {
+ current_metadata_ =
+ phone_util_.GetMetadataForNonGeographicalRegion(country_code);
+ } else if (new_region_code != default_country_) {
current_metadata_ = GetMetadataForRegion(new_region_code);
}
- StrAppend(&prefix_before_national_number_, country_code, " ");
+ StrAppend(&prefix_before_national_number_, country_code);
+ prefix_before_national_number_.push_back(kSeparatorBeforeNationalNumber);
+ // When we have successfully extracted the IDD, the previously extracted NDD
+ // should be cleared because it is no longer valid.
+ extracted_national_prefix_.clear();
return true;
}
void AsYouTypeFormatter::InputDigitHelper(char next_char, string* number) {
DCHECK(number);
number->clear();
+ // Note that formattingTemplate is not guaranteed to have a value, it could be
+ // empty, e.g. when the next digit is entered after extracting an IDD or NDD.
const char32 placeholder_codepoint = UnicodeString(kDigitPlaceholder)[0];
int placeholder_pos = formatting_template_
.tempSubString(last_match_position_).indexOf(placeholder_codepoint);