Add comment
[platform/upstream/libphonenumber.git] / cpp / src / phonenumbers / asyoutypeformatter.cc
index d75924a..0a5d958 100644 (file)
 
 #include "phonenumbers/asyoutypeformatter.h"
 
+#include <math.h>
 #include <cctype>
 #include <list>
 #include <string>
 
 #include <google/protobuf/message_lite.h>
 
-#include "base/logging.h"
-#include "base/memory/scoped_ptr.h"
+#include "phonenumbers/base/logging.h"
+#include "phonenumbers/base/memory/scoped_ptr.h"
 #include "phonenumbers/phonemetadata.pb.h"
 #include "phonenumbers/phonenumberutil.h"
 #include "phonenumbers/regexp_cache.h"
@@ -52,6 +53,15 @@ const size_t kMinLeadingDigitsLength = 3;
 // the punctuation space.
 const char kDigitPlaceholder[] = "\xE2\x80\x88"; /* " " */
 
+// Character used when appropriate to separate a prefix, such as a long NDD or a
+// country calling code, from the national number.
+const char kSeparatorBeforeNationalNumber = ' ';
+
+// A set of characters that, if found in a national prefix formatting rules, are
+// an indicator to us that we should separate the national prefix from the
+// number when formatting.
+const char kNationalPrefixSeparatorsPattern[] = "[- ]";
+
 // Replaces any standalone digit in the pattern (not any inside a {} grouping)
 // with \d. This function replaces the standalone digit regex used in the Java
 // version which is currently not supported by RE2 because it uses a special
@@ -59,25 +69,27 @@ const char kDigitPlaceholder[] = "\xE2\x80\x88"; /* " " */
 void ReplacePatternDigits(string* pattern) {
   DCHECK(pattern);
   string new_pattern;
+  // This is needed since sometimes there is more than one digit in between the
+  // curly braces.
+  bool is_in_braces = false;
 
   for (string::const_iterator it = pattern->begin(); it != pattern->end();
        ++it) {
     const char current_char = *it;
 
     if (isdigit(current_char)) {
-      if (it + 1 != pattern->end()) {
-        const char next_char = it[1];
-
-        if (next_char != ',' && next_char != '}') {
-          new_pattern += "\\d";
-        } else {
-          new_pattern += current_char;
-        }
+      if (is_in_braces) {
+        new_pattern += current_char;
       } else {
         new_pattern += "\\d";
       }
     } else {
       new_pattern += current_char;
+      if (current_char == '{') {
+        is_in_braces = true;
+      } else if (current_char == '}') {
+        is_in_braces = false;
+      }
     }
   }
   pattern->assign(new_pattern);
@@ -122,7 +134,7 @@ AsYouTypeFormatter::AsYouTypeFormatter(const string& region_code)
       accrued_input_without_formatting_(),
       able_to_format_(true),
       input_has_formatting_(false),
-      is_international_formatting_(false),
+      is_complete_number_(false),
       is_expecting_country_code_(false),
       phone_util_(*PhoneNumberUtil::GetInstance()),
       default_country_(region_code),
@@ -133,7 +145,8 @@ AsYouTypeFormatter::AsYouTypeFormatter(const string& region_code)
       original_position_(0),
       position_to_remember_(0),
       prefix_before_national_number_(),
-      national_prefix_extracted_(),
+      should_add_space_after_national_prefix_(false),
+      extracted_national_prefix_(),
       national_number_(),
       possible_formats_() {
 }
@@ -170,6 +183,7 @@ bool AsYouTypeFormatter::MaybeCreateNewTemplate() {
     }
     if (CreateFormattingTemplate(number_format)) {
       current_formatting_pattern_ = pattern;
+      SetShouldAddSpaceAfterNationalPrefix(number_format);
       // With a new formatting template, the matched position using the old
       // template needs to be reset.
       last_match_position_ = 0;
@@ -180,21 +194,26 @@ bool AsYouTypeFormatter::MaybeCreateNewTemplate() {
   return false;
 }
 
-void AsYouTypeFormatter::GetAvailableFormats(
-    const string& leading_three_digits) {
+void AsYouTypeFormatter::GetAvailableFormats(const string& leading_digits) {
   const RepeatedPtrField<NumberFormat>& format_list =
-      (is_international_formatting_ &&
+      (is_complete_number_ &&
        current_metadata_->intl_number_format().size() > 0)
           ? current_metadata_->intl_number_format()
           : current_metadata_->number_format();
-
+  bool national_prefix_used_by_country =
+      current_metadata_->has_national_prefix();
   for (RepeatedPtrField<NumberFormat>::const_iterator it = format_list.begin();
        it != format_list.end(); ++it) {
-    if (phone_util_.IsFormatEligibleForAsYouTypeFormatter(it->format())) {
-      possible_formats_.push_back(&*it);
+    if (!national_prefix_used_by_country || is_complete_number_ ||
+        it->national_prefix_optional_when_formatting() ||
+        phone_util_.FormattingRuleHasFirstGroupOnly(
+            it->national_prefix_formatting_rule())) {
+      if (phone_util_.IsFormatEligibleForAsYouTypeFormatter(it->format())) {
+        possible_formats_.push_back(&*it);
+      }
     }
   }
-  NarrowDownPossibleFormats(leading_three_digits);
+  NarrowDownPossibleFormats(leading_digits);
 }
 
 void AsYouTypeFormatter::NarrowDownPossibleFormats(
@@ -206,22 +225,34 @@ void AsYouTypeFormatter::NarrowDownPossibleFormats(
        it != possible_formats_.end(); ) {
     DCHECK(*it);
     const NumberFormat& format = **it;
-
-    if (format.leading_digits_pattern_size() >
-        index_of_leading_digits_pattern) {
-      const scoped_ptr<RegExpInput> input(
-          regexp_factory_->CreateInput(leading_digits));
-      if (!regexp_cache_.GetRegExp(format.leading_digits_pattern().Get(
-              index_of_leading_digits_pattern)).Consume(input.get())) {
-        it = possible_formats_.erase(it);
-        continue;
-      }
-    }  // else the particular format has no more specific leadingDigitsPattern,
-       // and it should be retained.
+    if (format.leading_digits_pattern_size() == 0) {
+      // Keep everything that isn't restricted by leading digits.
+      ++it;
+      continue;
+    }
+    int last_leading_digits_pattern =
+        std::min(index_of_leading_digits_pattern,
+                 format.leading_digits_pattern_size() - 1);
+    const scoped_ptr<RegExpInput> input(
+        regexp_factory_->CreateInput(leading_digits));
+    if (!regexp_cache_.GetRegExp(format.leading_digits_pattern().Get(
+            last_leading_digits_pattern)).Consume(input.get())) {
+      it = possible_formats_.erase(it);
+      continue;
+    }
     ++it;
   }
 }
 
+void AsYouTypeFormatter::SetShouldAddSpaceAfterNationalPrefix(
+    const NumberFormat& format) {
+  static const scoped_ptr<const RegExp> national_prefix_separators_pattern(
+      regexp_factory_->CreateRegExp(kNationalPrefixSeparatorsPattern));
+  should_add_space_after_national_prefix_ =
+      national_prefix_separators_pattern->PartialMatch(
+          format.national_prefix_formatting_rule());
+}
+
 bool AsYouTypeFormatter::CreateFormattingTemplate(const NumberFormat& format) {
   string number_pattern = format.pattern();
 
@@ -285,15 +316,16 @@ void AsYouTypeFormatter::Clear() {
   last_match_position_ = 0;
   current_formatting_pattern_.clear();
   prefix_before_national_number_.clear();
-  national_prefix_extracted_.clear();
+  extracted_national_prefix_.clear();
   national_number_.clear();
   able_to_format_ = true;
   input_has_formatting_ = false;
   position_to_remember_ = 0;
   original_position_ = 0;
-  is_international_formatting_ = false;
+  is_complete_number_ = false;
   is_expecting_country_code_ = false;
   possible_formats_.clear();
+  should_add_space_after_national_prefix_ = false;
 
   if (current_metadata_ != default_metadata_) {
     current_metadata_ = GetMetadataForRegion(default_country_);
@@ -357,8 +389,10 @@ void AsYouTypeFormatter::InputDigitWithOptionToRememberPosition(
       }
     } else if (AbleToExtractLongerNdd()) {
       // Add an additional space to separate long NDD and national significant
-      // number for readability.
-      prefix_before_national_number_.append(" ");
+      // number for readability. We don't set
+      // should_add_space_after_national_prefix_ to true, since we don't want
+      // this to change later when we choose formatting templates.
+      prefix_before_national_number_.push_back(kSeparatorBeforeNationalNumber);
       AttemptToChoosePatternWithPrefixExtracted(phone_number);
       return;
     }
@@ -380,9 +414,10 @@ void AsYouTypeFormatter::InputDigitWithOptionToRememberPosition(
     case 3:
       if (AttemptToExtractIdd()) {
         is_expecting_country_code_ = true;
+        // FALLTHROUGH_INTENDED
       } else {
         // No IDD or plus sign is found, might be entering in national format.
-        RemoveNationalPrefixFromNationalNumber(&national_prefix_extracted_);
+        RemoveNationalPrefixFromNationalNumber(&extracted_national_prefix_);
         AttemptToChooseFormattingPattern(phone_number);
         return;
       }
@@ -396,7 +431,7 @@ void AsYouTypeFormatter::InputDigitWithOptionToRememberPosition(
         return;
       }
       if (possible_formats_.size() > 0) {
-        // The formatting pattern is already chosen.
+        // The formatting patterns are already chosen.
         string temp_national_number;
         InputDigitHelper(normalized_next_char, &temp_national_number);
         // See if accrued digits can be formatted properly already. If not, use
@@ -413,9 +448,13 @@ void AsYouTypeFormatter::InputDigitWithOptionToRememberPosition(
           InputAccruedNationalNumber(phone_number);
           return;
         }
-        phone_number->assign(able_to_format_
-            ? prefix_before_national_number_ + temp_national_number
-            : temp_national_number);
+        if (able_to_format_) {
+          AppendNationalNumber(temp_national_number, phone_number);
+        } else {
+          phone_number->clear();
+          accrued_input_.toUTF8String(*phone_number);
+        }
+        return;
       } else {
         AttemptToChooseFormattingPattern(phone_number);
       }
@@ -427,51 +466,55 @@ void AsYouTypeFormatter::AttemptToChoosePatternWithPrefixExtracted(
   able_to_format_ = true;
   is_expecting_country_code_ = false;
   possible_formats_.clear();
+  last_match_position_ = 0;
+  formatting_template_.remove();
+  current_formatting_pattern_.clear();
   AttemptToChooseFormattingPattern(formatted_number);
 }
 
+const string& AsYouTypeFormatter::GetExtractedNationalPrefix() const {
+  return extracted_national_prefix_;
+}
+
 bool AsYouTypeFormatter::AbleToExtractLongerNdd() {
-  if (national_prefix_extracted_.length() > 0) {
+  if (extracted_national_prefix_.length() > 0) {
     // Put the extracted NDD back to the national number before attempting to
     // extract a new NDD.
-    national_number_.insert(0, national_prefix_extracted_);
+    national_number_.insert(0, extracted_national_prefix_);
     // Remove the previously extracted NDD from prefixBeforeNationalNumber. We
-    // cannot simply set it to empty string because people sometimes enter
-    // national prefix after country code, e.g +44 (0)20-1234-5678.
+    // cannot simply set it to empty string because people sometimes incorrectly
+    // enter national prefix after the country code, e.g. +44 (0)20-1234-5678.
     int index_of_previous_ndd =
-        prefix_before_national_number_.find_last_of(national_prefix_extracted_);
+        prefix_before_national_number_.find_last_of(extracted_national_prefix_);
     prefix_before_national_number_.resize(index_of_previous_ndd);
   }
   string new_national_prefix;
   RemoveNationalPrefixFromNationalNumber(&new_national_prefix);
-  return national_prefix_extracted_ != new_national_prefix;
+  return extracted_national_prefix_ != new_national_prefix;
 }
 
 void AsYouTypeFormatter::AttemptToFormatAccruedDigits(
-    string* formatted_number) {
-  DCHECK(formatted_number);
+    string* formatted_result) {
+  DCHECK(formatted_result);
 
   for (list<const NumberFormat*>::const_iterator it = possible_formats_.begin();
        it != possible_formats_.end(); ++it) {
     DCHECK(*it);
-    const NumberFormat& num_format = **it;
-    string pattern = num_format.pattern();
+    const NumberFormat& number_format = **it;
+    const string& pattern = number_format.pattern();
 
     if (regexp_cache_.GetRegExp(pattern).FullMatch(national_number_)) {
-      formatted_number->assign(national_number_);
-      string new_formatted_number(*formatted_number);
-      string format = num_format.format();
+      SetShouldAddSpaceAfterNationalPrefix(number_format);
 
+      string formatted_number(national_number_);
       bool status = regexp_cache_.GetRegExp(pattern).GlobalReplace(
-          &new_formatted_number, format);
+          &formatted_number, number_format.format());
       DCHECK(status);
 
-      formatted_number->assign(prefix_before_national_number_);
-      formatted_number->append(new_formatted_number);
+      AppendNationalNumber(formatted_number, formatted_result);
       return;
     }
   }
-  formatted_number->clear();
 }
 
 int AsYouTypeFormatter::GetRememberedPosition() const {
@@ -493,19 +536,50 @@ int AsYouTypeFormatter::GetRememberedPosition() const {
   return ConvertUnicodeStringPosition(current_output, current_output_index);
 }
 
+void AsYouTypeFormatter::AppendNationalNumber(const string& national_number,
+                                              string* phone_number) const {
+  int prefix_before_national_number_length =
+      prefix_before_national_number_.size();
+  if (should_add_space_after_national_prefix_ &&
+      prefix_before_national_number_length > 0 &&
+      prefix_before_national_number_.at(
+          prefix_before_national_number_length - 1) !=
+      kSeparatorBeforeNationalNumber) {
+    // We want to add a space after the national prefix if the national prefix
+    // formatting rule indicates that this would normally be done, with the
+    // exception of the case where we already appended a space because the NDD
+    // was surprisingly long.
+    phone_number->assign(prefix_before_national_number_);
+    phone_number->push_back(kSeparatorBeforeNationalNumber);
+    StrAppend(phone_number, national_number);
+  } else {
+    phone_number->assign(
+        StrCat(prefix_before_national_number_, national_number));
+  }
+}
+
 void AsYouTypeFormatter::AttemptToChooseFormattingPattern(
     string* formatted_number) {
   DCHECK(formatted_number);
-
+  // We start to attempt to format only when at least MIN_LEADING_DIGITS_LENGTH
+  // digits of national number (excluding national prefix) have been entered.
   if (national_number_.length() >= kMinLeadingDigitsLength) {
-    const string leading_digits =
-        national_number_.substr(0, kMinLeadingDigitsLength);
-
-    GetAvailableFormats(leading_digits);
-    MaybeCreateNewTemplate();
-    InputAccruedNationalNumber(formatted_number);
+    GetAvailableFormats(national_number_);
+    formatted_number->clear();
+    AttemptToFormatAccruedDigits(formatted_number);
+    // See if the accrued digits can be formatted properly already.
+    if (formatted_number->length() > 0) {
+      return;
+    }
+    if (MaybeCreateNewTemplate()) {
+      InputAccruedNationalNumber(formatted_number);
+    } else {
+      formatted_number->clear();
+      accrued_input_.toUTF8String(*formatted_number);
+    }
+    return;
   } else {
-    formatted_number->assign(prefix_before_national_number_ + national_number_);
+    AppendNationalNumber(national_number_, formatted_number);
   }
 }
 
@@ -520,37 +594,58 @@ void AsYouTypeFormatter::InputAccruedNationalNumber(string* number) {
       temp_national_number.clear();
       InputDigitHelper(national_number_[i], &temp_national_number);
     }
-    number->assign(able_to_format_
-        ? prefix_before_national_number_ + temp_national_number
-        : temp_national_number);
+    if (able_to_format_) {
+      AppendNationalNumber(temp_national_number, number);
+    } else {
+      number->clear();
+      accrued_input_.toUTF8String(*number);
+    }
+    return;
   } else {
     number->assign(prefix_before_national_number_);
   }
 }
 
+bool AsYouTypeFormatter::IsNanpaNumberWithNationalPrefix() const {
+  // For NANPA numbers beginning with 1[2-9], treat the 1 as the national
+  // prefix. The reason is that national significant numbers in NANPA always
+  // start with [2-9] after the national prefix.  Numbers beginning with 1[01]
+  // can only be short/emergency numbers, which don't need the national
+  // prefix.
+  return (current_metadata_->country_code() == 1) &&
+         (national_number_[0] == '1') && (national_number_[1] != '0') &&
+         (national_number_[1] != '1');
+}
+
 void AsYouTypeFormatter::RemoveNationalPrefixFromNationalNumber(
     string* national_prefix) {
   int start_of_national_number = 0;
 
-  if (current_metadata_->country_code() == 1 && national_number_[0] == '1') {
+  if (IsNanpaNumberWithNationalPrefix()) {
     start_of_national_number = 1;
-    prefix_before_national_number_.append("1 ");
-    is_international_formatting_ = true;
+    prefix_before_national_number_.append("1");
+    prefix_before_national_number_.push_back(kSeparatorBeforeNationalNumber);
+    is_complete_number_ = true;
   } else if (current_metadata_->has_national_prefix_for_parsing()) {
     const scoped_ptr<RegExpInput> consumed_input(
         regexp_factory_->CreateInput(national_number_));
     const RegExp& pattern = regexp_cache_.GetRegExp(
         current_metadata_->national_prefix_for_parsing());
 
+    // Since some national prefix patterns are entirely optional, check that a
+    // national prefix could actually be extracted.
     if (pattern.Consume(consumed_input.get())) {
-      // When the national prefix is detected, we use international formatting
-      // rules instead of national ones, because national formatting rules could
-      // countain local formatting rules for numbers entered without area code.
-      is_international_formatting_ = true;
       start_of_national_number =
           national_number_.length() - consumed_input->ToString().length();
-      prefix_before_national_number_.append(
-          national_number_.substr(0, start_of_national_number));
+      if (start_of_national_number > 0) {
+        // When the national prefix is detected, we use international formatting
+        // rules instead of national ones, because national formatting rules
+        // could countain local formatting rules for numbers entered without
+        // area code.
+        is_complete_number_ = true;
+        prefix_before_national_number_.append(
+            national_number_.substr(0, start_of_national_number));
+      }
     }
   }
   national_prefix->assign(national_number_, 0, start_of_national_number);
@@ -568,7 +663,7 @@ bool AsYouTypeFormatter::AttemptToExtractIdd() {
              current_metadata_->international_prefix()));
 
   if (international_prefix.Consume(consumed_input.get())) {
-    is_international_formatting_ = true;
+    is_complete_number_ = true;
     const int start_of_country_code =
         accrued_input_without_formatting_.length() -
         consumed_input->ToString().length();
@@ -584,7 +679,7 @@ bool AsYouTypeFormatter::AttemptToExtractIdd() {
     prefix_before_national_number_.append(before_country_code);
 
     if (accrued_input_without_formatting_[0] != kPlusSign) {
-      prefix_before_national_number_.append(" ");
+      prefix_before_national_number_.push_back(kSeparatorBeforeNationalNumber);
     }
     return true;
   }
@@ -604,11 +699,17 @@ bool AsYouTypeFormatter::AttemptToExtractCountryCode() {
   national_number_.assign(number_without_country_code);
   string new_region_code;
   phone_util_.GetRegionCodeForCountryCode(country_code, &new_region_code);
-
-  if (new_region_code != default_country_) {
+  if (PhoneNumberUtil::kRegionCodeForNonGeoEntity == new_region_code) {
+    current_metadata_ =
+        phone_util_.GetMetadataForNonGeographicalRegion(country_code);
+  } else if (new_region_code != default_country_) {
     current_metadata_ = GetMetadataForRegion(new_region_code);
   }
-  StrAppend(&prefix_before_national_number_, country_code, " ");
+  StrAppend(&prefix_before_national_number_, country_code);
+  prefix_before_national_number_.push_back(kSeparatorBeforeNationalNumber);
+  // When we have successfully extracted the IDD, the previously extracted NDD
+  // should be cleared because it is no longer valid.
+  extracted_national_prefix_.clear();
 
   return true;
 }
@@ -637,6 +738,8 @@ char AsYouTypeFormatter::NormalizeAndAccrueDigitsAndPlusSign(
 void AsYouTypeFormatter::InputDigitHelper(char next_char, string* number) {
   DCHECK(number);
   number->clear();
+  // Note that formattingTemplate is not guaranteed to have a value, it could be
+  // empty, e.g. when the next digit is entered after extracting an IDD or NDD.
   const char32 placeholder_codepoint = UnicodeString(kDigitPlaceholder)[0];
   int placeholder_pos = formatting_template_
       .tempSubString(last_match_position_).indexOf(placeholder_codepoint);