CPP: libphonenumber v5.7 changes - small improvement to phone number extraction,...
[platform/upstream/libphonenumber.git] / cpp / src / phonenumbers / asyoutypeformatter.cc
1 // Copyright (C) 2011 The Libphonenumber Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "phonenumbers/asyoutypeformatter.h"
16
17 #include <cctype>
18 #include <list>
19 #include <string>
20
21 #include <google/protobuf/message_lite.h>
22
23 #include "phonenumbers/base/logging.h"
24 #include "phonenumbers/base/memory/scoped_ptr.h"
25 #include "phonenumbers/phonemetadata.pb.h"
26 #include "phonenumbers/phonenumberutil.h"
27 #include "phonenumbers/regexp_cache.h"
28 #include "phonenumbers/regexp_factory.h"
29 #include "phonenumbers/stringutil.h"
30 #include "phonenumbers/unicodestring.h"
31
32 namespace i18n {
33 namespace phonenumbers {
34
35 using google::protobuf::RepeatedPtrField;
36
37 namespace {
38
39 const char kPlusSign = '+';
40
41 // A pattern that is used to match character classes in regular expressions.
42 // An example of a character class is [1-4].
43 const char kCharacterClassPattern[] = "\\[([^\\[\\]])*\\]";
44
45 // This is the minimum length of national number accrued that is required to
46 // trigger the formatter. The first element of the leading_digits_pattern of
47 // each number_format contains a regular expression that matches up to this
48 // number of digits.
49 const size_t kMinLeadingDigitsLength = 3;
50
51 // The digits that have not been entered yet will be represented by a \u2008,
52 // the punctuation space.
53 const char kDigitPlaceholder[] = "\xE2\x80\x88"; /* " " */
54
55 // Character used when appropriate to separate a prefix, such as a long NDD or a
56 // country calling code, from the national number.
57 const char kSeparatorBeforeNationalNumber = ' ';
58
59 // A set of characters that, if found in a national prefix formatting rules, are
60 // an indicator to us that we should separate the national prefix from the
61 // number when formatting.
62 const char kNationalPrefixSeparatorsPattern[] = "[- ]";
63
64 // Replaces any standalone digit in the pattern (not any inside a {} grouping)
65 // with \d. This function replaces the standalone digit regex used in the Java
66 // version which is currently not supported by RE2 because it uses a special
67 // construct (?=).
68 void ReplacePatternDigits(string* pattern) {
69   DCHECK(pattern);
70   string new_pattern;
71   // This is needed since sometimes there is more than one digit in between the
72   // curly braces.
73   bool is_in_braces = false;
74
75   for (string::const_iterator it = pattern->begin(); it != pattern->end();
76        ++it) {
77     const char current_char = *it;
78
79     if (isdigit(current_char)) {
80       if (is_in_braces) {
81         new_pattern += current_char;
82       } else {
83         new_pattern += "\\d";
84       }
85     } else {
86       new_pattern += current_char;
87       if (current_char == '{') {
88         is_in_braces = true;
89       } else if (current_char == '}') {
90         is_in_braces = false;
91       }
92     }
93   }
94   pattern->assign(new_pattern);
95 }
96
97 // Matches all the groups contained in 'input' against 'pattern'.
98 void MatchAllGroups(const string& pattern,
99                     const string& input,
100                     const AbstractRegExpFactory& regexp_factory,
101                     RegExpCache* cache,
102                     string* group) {
103   DCHECK(cache);
104   DCHECK(group);
105   string new_pattern(pattern);
106
107   // Transforms pattern "(...)(...)(...)" to "(.........)".
108   strrmm(&new_pattern, "()");
109   new_pattern = StrCat("(", new_pattern, ")");
110
111   const scoped_ptr<RegExpInput> consume_input(
112       regexp_factory.CreateInput(input));
113   bool status =
114       cache->GetRegExp(new_pattern).Consume(consume_input.get(), group);
115   DCHECK(status);
116 }
117
118 PhoneMetadata CreateEmptyMetadata() {
119   PhoneMetadata metadata;
120   metadata.set_international_prefix("NA");
121   return metadata;
122 }
123
124 }  // namespace
125
126 AsYouTypeFormatter::AsYouTypeFormatter(const string& region_code)
127     : regexp_factory_(new RegExpFactory()),
128       regexp_cache_(*regexp_factory_.get(), 64),
129       current_output_(),
130       formatting_template_(),
131       current_formatting_pattern_(),
132       accrued_input_(),
133       accrued_input_without_formatting_(),
134       able_to_format_(true),
135       input_has_formatting_(false),
136       is_complete_number_(false),
137       is_expecting_country_code_(false),
138       phone_util_(*PhoneNumberUtil::GetInstance()),
139       default_country_(region_code),
140       empty_metadata_(CreateEmptyMetadata()),
141       default_metadata_(GetMetadataForRegion(region_code)),
142       current_metadata_(default_metadata_),
143       last_match_position_(0),
144       original_position_(0),
145       position_to_remember_(0),
146       prefix_before_national_number_(),
147       should_add_space_after_national_prefix_(false),
148       national_prefix_extracted_(),
149       national_number_(),
150       possible_formats_() {
151 }
152
153 // The metadata needed by this class is the same for all regions sharing the
154 // same country calling code. Therefore, we return the metadata for "main"
155 // region for this country calling code.
156 const PhoneMetadata* AsYouTypeFormatter::GetMetadataForRegion(
157     const string& region_code) const {
158   int country_calling_code = phone_util_.GetCountryCodeForRegion(region_code);
159   string main_country;
160   phone_util_.GetRegionCodeForCountryCode(country_calling_code, &main_country);
161   const PhoneMetadata* const metadata =
162       phone_util_.GetMetadataForRegion(main_country);
163   if (metadata) {
164     return metadata;
165   }
166   // Set to a default instance of the metadata. This allows us to function with
167   // an incorrect region code, even if formatting only works for numbers
168   // specified with "+".
169   return &empty_metadata_;
170 }
171
172 bool AsYouTypeFormatter::MaybeCreateNewTemplate() {
173   // When there are multiple available formats, the formatter uses the first
174   // format where a formatting template could be created.
175   for (list<const NumberFormat*>::const_iterator it = possible_formats_.begin();
176        it != possible_formats_.end(); ++it) {
177     DCHECK(*it);
178     const NumberFormat& number_format = **it;
179     const string& pattern = number_format.pattern();
180     if (current_formatting_pattern_ == pattern) {
181       return false;
182     }
183     if (CreateFormattingTemplate(number_format)) {
184       current_formatting_pattern_ = pattern;
185       SetShouldAddSpaceAfterNationalPrefix(number_format);
186       // With a new formatting template, the matched position using the old
187       // template needs to be reset.
188       last_match_position_ = 0;
189       return true;
190     }
191   }
192   able_to_format_ = false;
193   return false;
194 }
195
196 void AsYouTypeFormatter::GetAvailableFormats(
197     const string& leading_three_digits) {
198   const RepeatedPtrField<NumberFormat>& format_list =
199       (is_complete_number_ &&
200        current_metadata_->intl_number_format().size() > 0)
201           ? current_metadata_->intl_number_format()
202           : current_metadata_->number_format();
203   bool national_prefix_used_by_country =
204       current_metadata_->has_national_prefix();
205   for (RepeatedPtrField<NumberFormat>::const_iterator it = format_list.begin();
206        it != format_list.end(); ++it) {
207     if (!national_prefix_used_by_country || is_complete_number_ ||
208         it->national_prefix_optional_when_formatting() ||
209         phone_util_.FormattingRuleHasFirstGroupOnly(
210             it->national_prefix_formatting_rule())) {
211       if (phone_util_.IsFormatEligibleForAsYouTypeFormatter(it->format())) {
212         possible_formats_.push_back(&*it);
213       }
214     }
215   }
216   NarrowDownPossibleFormats(leading_three_digits);
217 }
218
219 void AsYouTypeFormatter::NarrowDownPossibleFormats(
220     const string& leading_digits) {
221   const int index_of_leading_digits_pattern =
222       leading_digits.length() - kMinLeadingDigitsLength;
223
224   for (list<const NumberFormat*>::iterator it = possible_formats_.begin();
225        it != possible_formats_.end(); ) {
226     DCHECK(*it);
227     const NumberFormat& format = **it;
228
229     if (format.leading_digits_pattern_size() >
230         index_of_leading_digits_pattern) {
231       const scoped_ptr<RegExpInput> input(
232           regexp_factory_->CreateInput(leading_digits));
233       if (!regexp_cache_.GetRegExp(format.leading_digits_pattern().Get(
234               index_of_leading_digits_pattern)).Consume(input.get())) {
235         it = possible_formats_.erase(it);
236         continue;
237       }
238     }  // else the particular format has no more specific leadingDigitsPattern,
239        // and it should be retained.
240     ++it;
241   }
242 }
243
244 void AsYouTypeFormatter::SetShouldAddSpaceAfterNationalPrefix(
245     const NumberFormat& format) {
246   static const scoped_ptr<const RegExp> national_prefix_separators_pattern(
247       regexp_factory_->CreateRegExp(kNationalPrefixSeparatorsPattern));
248   should_add_space_after_national_prefix_ =
249       national_prefix_separators_pattern->PartialMatch(
250           format.national_prefix_formatting_rule());
251 }
252
253 bool AsYouTypeFormatter::CreateFormattingTemplate(const NumberFormat& format) {
254   string number_pattern = format.pattern();
255
256   // The formatter doesn't format numbers when numberPattern contains "|", e.g.
257   // (20|3)\d{4}. In those cases we quickly return.
258   if (number_pattern.find('|') != string::npos) {
259     return false;
260   }
261   // Replace anything in the form of [..] with \d.
262   static const scoped_ptr<const RegExp> character_class_pattern(
263       regexp_factory_->CreateRegExp(kCharacterClassPattern));
264   character_class_pattern->GlobalReplace(&number_pattern, "\\\\d");
265
266   // Replace any standalone digit (not the one in d{}) with \d.
267   ReplacePatternDigits(&number_pattern);
268
269   string number_format = format.format();
270   formatting_template_.remove();
271   UnicodeString temp_template;
272   GetFormattingTemplate(number_pattern, number_format, &temp_template);
273
274   if (temp_template.length() > 0) {
275     formatting_template_.append(temp_template);
276     return true;
277   }
278   return false;
279 }
280
281 void AsYouTypeFormatter::GetFormattingTemplate(
282     const string& number_pattern,
283     const string& number_format,
284     UnicodeString* formatting_template) {
285   DCHECK(formatting_template);
286
287   // Creates a phone number consisting only of the digit 9 that matches the
288   // number_pattern by applying the pattern to the longest_phone_number string.
289   static const char longest_phone_number[] = "999999999999999";
290   string a_phone_number;
291
292   MatchAllGroups(number_pattern, longest_phone_number, *regexp_factory_,
293                  &regexp_cache_, &a_phone_number);
294   // No formatting template can be created if the number of digits entered so
295   // far is longer than the maximum the current formatting rule can accommodate.
296   if (a_phone_number.length() < national_number_.length()) {
297     formatting_template->remove();
298     return;
299   }
300   // Formats the number according to number_format.
301   regexp_cache_.GetRegExp(number_pattern).GlobalReplace(
302       &a_phone_number, number_format);
303   // Replaces each digit with character kDigitPlaceholder.
304   GlobalReplaceSubstring("9", kDigitPlaceholder, &a_phone_number);
305   formatting_template->setTo(a_phone_number.c_str(), a_phone_number.size());
306 }
307
308 void AsYouTypeFormatter::Clear() {
309   current_output_.clear();
310   accrued_input_.remove();
311   accrued_input_without_formatting_.remove();
312   formatting_template_.remove();
313   last_match_position_ = 0;
314   current_formatting_pattern_.clear();
315   prefix_before_national_number_.clear();
316   national_prefix_extracted_.clear();
317   national_number_.clear();
318   able_to_format_ = true;
319   input_has_formatting_ = false;
320   position_to_remember_ = 0;
321   original_position_ = 0;
322   is_complete_number_ = false;
323   is_expecting_country_code_ = false;
324   possible_formats_.clear();
325   should_add_space_after_national_prefix_ = false;
326
327   if (current_metadata_ != default_metadata_) {
328     current_metadata_ = GetMetadataForRegion(default_country_);
329   }
330 }
331
332 const string& AsYouTypeFormatter::InputDigit(char32 next_char, string* result) {
333   DCHECK(result);
334
335   InputDigitWithOptionToRememberPosition(next_char, false, &current_output_);
336   result->assign(current_output_);
337   return *result;
338 }
339
340 const string& AsYouTypeFormatter::InputDigitAndRememberPosition(
341     char32 next_char,
342     string* result) {
343   DCHECK(result);
344
345   InputDigitWithOptionToRememberPosition(next_char, true, &current_output_);
346   result->assign(current_output_);
347   return *result;
348 }
349
350 void AsYouTypeFormatter::InputDigitWithOptionToRememberPosition(
351     char32 next_char,
352     bool remember_position,
353     string* phone_number) {
354   DCHECK(phone_number);
355
356   accrued_input_.append(next_char);
357   if (remember_position) {
358     original_position_ = accrued_input_.length();
359   }
360   // We do formatting on-the-fly only when each character entered is either a
361   // plus sign (accepted at the start of the number only).
362   string next_char_string;
363   UnicodeString(next_char).toUTF8String(next_char_string);
364
365   char normalized_next_char = '\0';
366   if (!(phone_util_.ContainsOnlyValidDigits(next_char_string) ||
367       (accrued_input_.length() == 1 && next_char == kPlusSign))) {
368     able_to_format_ = false;
369     input_has_formatting_ = true;
370   } else {
371     normalized_next_char =
372         NormalizeAndAccrueDigitsAndPlusSign(next_char, remember_position);
373   }
374   if (!able_to_format_) {
375     // When we are unable to format because of reasons other than that
376     // formatting chars have been entered, it can be due to really long IDDs or
377     // NDDs. If that is the case, we might be able to do formatting again after
378     // extracting them.
379     if (input_has_formatting_) {
380       phone_number->clear();
381       accrued_input_.toUTF8String(*phone_number);
382     } else if (AttemptToExtractIdd()) {
383       if (AttemptToExtractCountryCode()) {
384         AttemptToChoosePatternWithPrefixExtracted(phone_number);
385         return;
386       }
387     } else if (AbleToExtractLongerNdd()) {
388       // Add an additional space to separate long NDD and national significant
389       // number for readability. We don't set
390       // should_add_space_after_national_prefix_ to true, since we don't want
391       // this to change later when we choose formatting templates.
392       prefix_before_national_number_.push_back(kSeparatorBeforeNationalNumber);
393       AttemptToChoosePatternWithPrefixExtracted(phone_number);
394       return;
395     }
396     phone_number->clear();
397     accrued_input_.toUTF8String(*phone_number);
398     return;
399   }
400
401   // We start to attempt to format only when at least kMinLeadingDigitsLength
402   // digits (the plus sign is counted as a digit as well for this purpose) have
403   // been entered.
404   switch (accrued_input_without_formatting_.length()) {
405     case 0:
406     case 1:
407     case 2:
408       phone_number->clear();
409       accrued_input_.toUTF8String(*phone_number);
410       return;
411     case 3:
412       if (AttemptToExtractIdd()) {
413         is_expecting_country_code_ = true;
414       } else {
415         // No IDD or plus sign is found, might be entering in national format.
416         RemoveNationalPrefixFromNationalNumber(&national_prefix_extracted_);
417         AttemptToChooseFormattingPattern(phone_number);
418         return;
419       }
420     default:
421       if (is_expecting_country_code_) {
422         if (AttemptToExtractCountryCode()) {
423           is_expecting_country_code_ = false;
424         }
425         phone_number->assign(prefix_before_national_number_);
426         phone_number->append(national_number_);
427         return;
428       }
429       if (possible_formats_.size() > 0) {
430         // The formatting pattern is already chosen.
431         string temp_national_number;
432         InputDigitHelper(normalized_next_char, &temp_national_number);
433         // See if accrued digits can be formatted properly already. If not, use
434         // the results from InputDigitHelper, which does formatting based on the
435         // formatting pattern chosen.
436         string formatted_number;
437         AttemptToFormatAccruedDigits(&formatted_number);
438         if (formatted_number.length() > 0) {
439           phone_number->assign(formatted_number);
440           return;
441         }
442         NarrowDownPossibleFormats(national_number_);
443         if (MaybeCreateNewTemplate()) {
444           InputAccruedNationalNumber(phone_number);
445           return;
446         }
447         if (able_to_format_) {
448           AppendNationalNumber(temp_national_number, phone_number);
449         } else {
450           phone_number->clear();
451           accrued_input_.toUTF8String(*phone_number);
452         }
453         return;
454       } else {
455         AttemptToChooseFormattingPattern(phone_number);
456       }
457   }
458 }
459
460 void AsYouTypeFormatter::AttemptToChoosePatternWithPrefixExtracted(
461     string* formatted_number) {
462   able_to_format_ = true;
463   is_expecting_country_code_ = false;
464   possible_formats_.clear();
465   AttemptToChooseFormattingPattern(formatted_number);
466 }
467
468 bool AsYouTypeFormatter::AbleToExtractLongerNdd() {
469   if (national_prefix_extracted_.length() > 0) {
470     // Put the extracted NDD back to the national number before attempting to
471     // extract a new NDD.
472     national_number_.insert(0, national_prefix_extracted_);
473     // Remove the previously extracted NDD from prefixBeforeNationalNumber. We
474     // cannot simply set it to empty string because people sometimes incorrectly
475     // enter national prefix after the country code, e.g. +44 (0)20-1234-5678.
476     int index_of_previous_ndd =
477         prefix_before_national_number_.find_last_of(national_prefix_extracted_);
478     prefix_before_national_number_.resize(index_of_previous_ndd);
479   }
480   string new_national_prefix;
481   RemoveNationalPrefixFromNationalNumber(&new_national_prefix);
482   return national_prefix_extracted_ != new_national_prefix;
483 }
484
485 void AsYouTypeFormatter::AttemptToFormatAccruedDigits(
486     string* formatted_result) {
487   DCHECK(formatted_result);
488
489   for (list<const NumberFormat*>::const_iterator it = possible_formats_.begin();
490        it != possible_formats_.end(); ++it) {
491     DCHECK(*it);
492     const NumberFormat& number_format = **it;
493     const string& pattern = number_format.pattern();
494
495     if (regexp_cache_.GetRegExp(pattern).FullMatch(national_number_)) {
496       SetShouldAddSpaceAfterNationalPrefix(number_format);
497
498       string formatted_number(national_number_);
499       bool status = regexp_cache_.GetRegExp(pattern).GlobalReplace(
500           &formatted_number, number_format.format());
501       DCHECK(status);
502
503       AppendNationalNumber(formatted_number, formatted_result);
504       return;
505     }
506   }
507 }
508
509 int AsYouTypeFormatter::GetRememberedPosition() const {
510   UnicodeString current_output(current_output_.c_str());
511   if (!able_to_format_) {
512     return ConvertUnicodeStringPosition(current_output, original_position_);
513   }
514   int accrued_input_index = 0;
515   int current_output_index = 0;
516
517   while (accrued_input_index < position_to_remember_ &&
518          current_output_index < current_output.length()) {
519     if (accrued_input_without_formatting_[accrued_input_index] ==
520         current_output[current_output_index]) {
521       ++accrued_input_index;
522     }
523     ++current_output_index;
524   }
525   return ConvertUnicodeStringPosition(current_output, current_output_index);
526 }
527
528 void AsYouTypeFormatter::AppendNationalNumber(const string& national_number,
529                                               string* phone_number) const {
530   int prefix_before_national_number_length =
531       prefix_before_national_number_.size();
532   if (should_add_space_after_national_prefix_ &&
533       prefix_before_national_number_length > 0 &&
534       prefix_before_national_number_.at(
535           prefix_before_national_number_length - 1) !=
536       kSeparatorBeforeNationalNumber) {
537     // We want to add a space after the national prefix if the national prefix
538     // formatting rule indicates that this would normally be done, with the
539     // exception of the case where we already appended a space because the NDD
540     // was surprisingly long.
541     phone_number->assign(prefix_before_national_number_);
542     phone_number->push_back(kSeparatorBeforeNationalNumber);
543     StrAppend(phone_number, national_number);
544   } else {
545     phone_number->assign(
546         StrCat(prefix_before_national_number_, national_number));
547   }
548 }
549
550 void AsYouTypeFormatter::AttemptToChooseFormattingPattern(
551     string* formatted_number) {
552   DCHECK(formatted_number);
553
554   if (national_number_.length() >= kMinLeadingDigitsLength) {
555     const string leading_digits =
556         national_number_.substr(0, kMinLeadingDigitsLength);
557
558     GetAvailableFormats(leading_digits);
559     if (MaybeCreateNewTemplate()) {
560       InputAccruedNationalNumber(formatted_number);
561     } else {
562       formatted_number->clear();
563       accrued_input_.toUTF8String(*formatted_number);
564     }
565     return;
566   } else {
567     AppendNationalNumber(national_number_, formatted_number);
568   }
569 }
570
571 void AsYouTypeFormatter::InputAccruedNationalNumber(string* number) {
572   DCHECK(number);
573   int length_of_national_number = national_number_.length();
574
575   if (length_of_national_number > 0) {
576     string temp_national_number;
577
578     for (int i = 0; i < length_of_national_number; ++i) {
579       temp_national_number.clear();
580       InputDigitHelper(national_number_[i], &temp_national_number);
581     }
582     if (able_to_format_) {
583       AppendNationalNumber(temp_national_number, number);
584     } else {
585       number->clear();
586       accrued_input_.toUTF8String(*number);
587     }
588     return;
589   } else {
590     number->assign(prefix_before_national_number_);
591   }
592 }
593
594 bool AsYouTypeFormatter::IsNanpaNumberWithNationalPrefix() const {
595   // For NANPA numbers beginning with 1[2-9], treat the 1 as the national
596   // prefix. The reason is that national significant numbers in NANPA always
597   // start with [2-9] after the national prefix.  Numbers beginning with 1[01]
598   // can only be short/emergency numbers, which don't need the national
599   // prefix.
600   return (current_metadata_->country_code() == 1) &&
601          (national_number_[0] == '1') && (national_number_[1] != '0') &&
602          (national_number_[1] != '1');
603 }
604
605 void AsYouTypeFormatter::RemoveNationalPrefixFromNationalNumber(
606     string* national_prefix) {
607   int start_of_national_number = 0;
608
609   if (IsNanpaNumberWithNationalPrefix()) {
610     start_of_national_number = 1;
611     prefix_before_national_number_.append("1");
612     prefix_before_national_number_.push_back(kSeparatorBeforeNationalNumber);
613     is_complete_number_ = true;
614   } else if (current_metadata_->has_national_prefix_for_parsing()) {
615     const scoped_ptr<RegExpInput> consumed_input(
616         regexp_factory_->CreateInput(national_number_));
617     const RegExp& pattern = regexp_cache_.GetRegExp(
618         current_metadata_->national_prefix_for_parsing());
619
620     if (pattern.Consume(consumed_input.get())) {
621       // When the national prefix is detected, we use international formatting
622       // rules instead of national ones, because national formatting rules could
623       // countain local formatting rules for numbers entered without area code.
624       is_complete_number_ = true;
625       start_of_national_number =
626           national_number_.length() - consumed_input->ToString().length();
627       prefix_before_national_number_.append(
628           national_number_.substr(0, start_of_national_number));
629     }
630   }
631   national_prefix->assign(national_number_, 0, start_of_national_number);
632   national_number_.erase(0, start_of_national_number);
633 }
634
635 bool AsYouTypeFormatter::AttemptToExtractIdd() {
636   string accrued_input_without_formatting_stdstring;
637   accrued_input_without_formatting_
638       .toUTF8String(accrued_input_without_formatting_stdstring);
639   const scoped_ptr<RegExpInput> consumed_input(
640       regexp_factory_->CreateInput(accrued_input_without_formatting_stdstring));
641   const RegExp& international_prefix = regexp_cache_.GetRegExp(
642       StrCat("\\", string(&kPlusSign, 1), "|",
643              current_metadata_->international_prefix()));
644
645   if (international_prefix.Consume(consumed_input.get())) {
646     is_complete_number_ = true;
647     const int start_of_country_code =
648         accrued_input_without_formatting_.length() -
649         consumed_input->ToString().length();
650
651     national_number_.clear();
652     accrued_input_without_formatting_.tempSubString(start_of_country_code)
653         .toUTF8String(national_number_);
654
655     string before_country_code;
656     accrued_input_without_formatting_.tempSubString(0, start_of_country_code)
657         .toUTF8String(before_country_code);
658     prefix_before_national_number_.clear();
659     prefix_before_national_number_.append(before_country_code);
660
661     if (accrued_input_without_formatting_[0] != kPlusSign) {
662       prefix_before_national_number_.push_back(kSeparatorBeforeNationalNumber);
663     }
664     return true;
665   }
666   return false;
667 }
668
669 bool AsYouTypeFormatter::AttemptToExtractCountryCode() {
670   if (national_number_.length() == 0) {
671     return false;
672   }
673   string number_without_country_code(national_number_);
674   int country_code =
675     phone_util_.ExtractCountryCode(&number_without_country_code);
676   if (country_code == 0) {
677     return false;
678   }
679   national_number_.assign(number_without_country_code);
680   string new_region_code;
681   phone_util_.GetRegionCodeForCountryCode(country_code, &new_region_code);
682   if (PhoneNumberUtil::kRegionCodeForNonGeoEntity == new_region_code) {
683     current_metadata_ =
684         phone_util_.GetMetadataForNonGeographicalRegion(country_code);
685   } else if (new_region_code != default_country_) {
686     current_metadata_ = GetMetadataForRegion(new_region_code);
687   }
688   StrAppend(&prefix_before_national_number_, country_code);
689   prefix_before_national_number_.push_back(kSeparatorBeforeNationalNumber);
690
691   return true;
692 }
693
694 char AsYouTypeFormatter::NormalizeAndAccrueDigitsAndPlusSign(
695     char32 next_char,
696     bool remember_position) {
697   char normalized_char = next_char;
698
699   if (next_char == kPlusSign) {
700     accrued_input_without_formatting_.append(next_char);
701   } else {
702     string number;
703     UnicodeString(next_char).toUTF8String(number);
704     phone_util_.NormalizeDigitsOnly(&number);
705     accrued_input_without_formatting_.append(next_char);
706     national_number_.append(number);
707     normalized_char = number[0];
708   }
709   if (remember_position) {
710     position_to_remember_ = accrued_input_without_formatting_.length();
711   }
712   return normalized_char;
713 }
714
715 void AsYouTypeFormatter::InputDigitHelper(char next_char, string* number) {
716   DCHECK(number);
717   number->clear();
718   const char32 placeholder_codepoint = UnicodeString(kDigitPlaceholder)[0];
719   int placeholder_pos = formatting_template_
720       .tempSubString(last_match_position_).indexOf(placeholder_codepoint);
721   if (placeholder_pos != -1) {
722     UnicodeString temp_template = formatting_template_;
723     placeholder_pos = temp_template.indexOf(placeholder_codepoint);
724     temp_template.setCharAt(placeholder_pos, UnicodeString(next_char)[0]);
725     last_match_position_ = placeholder_pos;
726     formatting_template_.replace(0, temp_template.length(), temp_template);
727     formatting_template_.tempSubString(0, last_match_position_ + 1)
728         .toUTF8String(*number);
729   } else {
730     if (possible_formats_.size() == 1) {
731       // More digits are entered than we could handle, and there are no other
732       // valid patterns to try.
733       able_to_format_ = false;
734     }  // else, we just reset the formatting pattern.
735     current_formatting_pattern_.clear();
736     accrued_input_.toUTF8String(*number);
737   }
738 }
739
740 // Returns the number of bytes contained in the given UnicodeString up to the
741 // specified position.
742 // static
743 int AsYouTypeFormatter::ConvertUnicodeStringPosition(const UnicodeString& s,
744                                                      int pos) {
745   if (pos > s.length()) {
746     return -1;
747   }
748   string substring;
749   s.tempSubString(0, pos).toUTF8String(substring);
750   return substring.length();
751 }
752
753 }  // namespace phonenumbers
754 }  // namespace i18n