Clean compile with CMAKE_BUILD_TYPE=RELEASE
[platform/upstream/libphonenumber.git] / cpp / src / phonenumbers / asyoutypeformatter.cc
1 // Copyright (C) 2011 The Libphonenumber Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "phonenumbers/asyoutypeformatter.h"
16
17 #include <math.h>
18 #include <cctype>
19 #include <list>
20 #include <string>
21
22 #include <google/protobuf/message_lite.h>
23
24 #include "phonenumbers/base/logging.h"
25 #include "phonenumbers/base/memory/scoped_ptr.h"
26 #include "phonenumbers/phonemetadata.pb.h"
27 #include "phonenumbers/phonenumberutil.h"
28 #include "phonenumbers/regexp_cache.h"
29 #include "phonenumbers/regexp_factory.h"
30 #include "phonenumbers/stringutil.h"
31 #include "phonenumbers/unicodestring.h"
32
33 namespace i18n {
34 namespace phonenumbers {
35
36 using google::protobuf::RepeatedPtrField;
37
38 namespace {
39
40 const char kPlusSign = '+';
41
42 // A pattern that is used to match character classes in regular expressions.
43 // An example of a character class is [1-4].
44 const char kCharacterClassPattern[] = "\\[([^\\[\\]])*\\]";
45
46 // This is the minimum length of national number accrued that is required to
47 // trigger the formatter. The first element of the leading_digits_pattern of
48 // each number_format contains a regular expression that matches up to this
49 // number of digits.
50 const size_t kMinLeadingDigitsLength = 3;
51
52 // The digits that have not been entered yet will be represented by a \u2008,
53 // the punctuation space.
54 const char kDigitPlaceholder[] = "\xE2\x80\x88"; /* " " */
55
56 // Character used when appropriate to separate a prefix, such as a long NDD or a
57 // country calling code, from the national number.
58 const char kSeparatorBeforeNationalNumber = ' ';
59
60 // A set of characters that, if found in a national prefix formatting rules, are
61 // an indicator to us that we should separate the national prefix from the
62 // number when formatting.
63 const char kNationalPrefixSeparatorsPattern[] = "[- ]";
64
65 // Replaces any standalone digit in the pattern (not any inside a {} grouping)
66 // with \d. This function replaces the standalone digit regex used in the Java
67 // version which is currently not supported by RE2 because it uses a special
68 // construct (?=).
69 void ReplacePatternDigits(string* pattern) {
70   DCHECK(pattern);
71   string new_pattern;
72   // This is needed since sometimes there is more than one digit in between the
73   // curly braces.
74   bool is_in_braces = false;
75
76   for (string::const_iterator it = pattern->begin(); it != pattern->end();
77        ++it) {
78     const char current_char = *it;
79
80     if (isdigit(current_char)) {
81       if (is_in_braces) {
82         new_pattern += current_char;
83       } else {
84         new_pattern += "\\d";
85       }
86     } else {
87       new_pattern += current_char;
88       if (current_char == '{') {
89         is_in_braces = true;
90       } else if (current_char == '}') {
91         is_in_braces = false;
92       }
93     }
94   }
95   pattern->assign(new_pattern);
96 }
97
98 // Matches all the groups contained in 'input' against 'pattern'.
99 void MatchAllGroups(const string& pattern,
100                     const string& input,
101                     const AbstractRegExpFactory& regexp_factory,
102                     RegExpCache* cache,
103                     string* group) {
104   DCHECK(cache);
105   DCHECK(group);
106   string new_pattern(pattern);
107
108   // Transforms pattern "(...)(...)(...)" to "(.........)".
109   strrmm(&new_pattern, "()");
110   new_pattern = StrCat("(", new_pattern, ")");
111
112   const scoped_ptr<RegExpInput> consume_input(
113       regexp_factory.CreateInput(input));
114   bool status =
115       cache->GetRegExp(new_pattern).Consume(consume_input.get(), group);
116   DCHECK(status);
117   IGNORE_UNUSED(status);
118 }
119
120 PhoneMetadata CreateEmptyMetadata() {
121   PhoneMetadata metadata;
122   metadata.set_international_prefix("NA");
123   return metadata;
124 }
125
126 }  // namespace
127
128 AsYouTypeFormatter::AsYouTypeFormatter(const string& region_code)
129     : regexp_factory_(new RegExpFactory()),
130       regexp_cache_(*regexp_factory_.get(), 64),
131       current_output_(),
132       formatting_template_(),
133       current_formatting_pattern_(),
134       accrued_input_(),
135       accrued_input_without_formatting_(),
136       able_to_format_(true),
137       input_has_formatting_(false),
138       is_complete_number_(false),
139       is_expecting_country_code_(false),
140       phone_util_(*PhoneNumberUtil::GetInstance()),
141       default_country_(region_code),
142       empty_metadata_(CreateEmptyMetadata()),
143       default_metadata_(GetMetadataForRegion(region_code)),
144       current_metadata_(default_metadata_),
145       last_match_position_(0),
146       original_position_(0),
147       position_to_remember_(0),
148       prefix_before_national_number_(),
149       should_add_space_after_national_prefix_(false),
150       extracted_national_prefix_(),
151       national_number_(),
152       possible_formats_() {
153 }
154
155 // The metadata needed by this class is the same for all regions sharing the
156 // same country calling code. Therefore, we return the metadata for "main"
157 // region for this country calling code.
158 const PhoneMetadata* AsYouTypeFormatter::GetMetadataForRegion(
159     const string& region_code) const {
160   int country_calling_code = phone_util_.GetCountryCodeForRegion(region_code);
161   string main_country;
162   phone_util_.GetRegionCodeForCountryCode(country_calling_code, &main_country);
163   const PhoneMetadata* const metadata =
164       phone_util_.GetMetadataForRegion(main_country);
165   if (metadata) {
166     return metadata;
167   }
168   // Set to a default instance of the metadata. This allows us to function with
169   // an incorrect region code, even if formatting only works for numbers
170   // specified with "+".
171   return &empty_metadata_;
172 }
173
174 bool AsYouTypeFormatter::MaybeCreateNewTemplate() {
175   // When there are multiple available formats, the formatter uses the first
176   // format where a formatting template could be created.
177   for (list<const NumberFormat*>::const_iterator it = possible_formats_.begin();
178        it != possible_formats_.end(); ++it) {
179     DCHECK(*it);
180     const NumberFormat& number_format = **it;
181     const string& pattern = number_format.pattern();
182     if (current_formatting_pattern_ == pattern) {
183       return false;
184     }
185     if (CreateFormattingTemplate(number_format)) {
186       current_formatting_pattern_ = pattern;
187       SetShouldAddSpaceAfterNationalPrefix(number_format);
188       // With a new formatting template, the matched position using the old
189       // template needs to be reset.
190       last_match_position_ = 0;
191       return true;
192     }
193   }
194   able_to_format_ = false;
195   return false;
196 }
197
198 void AsYouTypeFormatter::GetAvailableFormats(const string& leading_digits) {
199   const RepeatedPtrField<NumberFormat>& format_list =
200       (is_complete_number_ &&
201        current_metadata_->intl_number_format().size() > 0)
202           ? current_metadata_->intl_number_format()
203           : current_metadata_->number_format();
204   bool national_prefix_used_by_country =
205       current_metadata_->has_national_prefix();
206   for (RepeatedPtrField<NumberFormat>::const_iterator it = format_list.begin();
207        it != format_list.end(); ++it) {
208     if (!national_prefix_used_by_country || is_complete_number_ ||
209         it->national_prefix_optional_when_formatting() ||
210         phone_util_.FormattingRuleHasFirstGroupOnly(
211             it->national_prefix_formatting_rule())) {
212       if (phone_util_.IsFormatEligibleForAsYouTypeFormatter(it->format())) {
213         possible_formats_.push_back(&*it);
214       }
215     }
216   }
217   NarrowDownPossibleFormats(leading_digits);
218 }
219
220 void AsYouTypeFormatter::NarrowDownPossibleFormats(
221     const string& leading_digits) {
222   const int index_of_leading_digits_pattern =
223       leading_digits.length() - kMinLeadingDigitsLength;
224
225   for (list<const NumberFormat*>::iterator it = possible_formats_.begin();
226        it != possible_formats_.end(); ) {
227     DCHECK(*it);
228     const NumberFormat& format = **it;
229     if (format.leading_digits_pattern_size() == 0) {
230       // Keep everything that isn't restricted by leading digits.
231       ++it;
232       continue;
233     }
234     int last_leading_digits_pattern =
235         std::min(index_of_leading_digits_pattern,
236                  format.leading_digits_pattern_size() - 1);
237     const scoped_ptr<RegExpInput> input(
238         regexp_factory_->CreateInput(leading_digits));
239     if (!regexp_cache_.GetRegExp(format.leading_digits_pattern().Get(
240             last_leading_digits_pattern)).Consume(input.get())) {
241       it = possible_formats_.erase(it);
242       continue;
243     }
244     ++it;
245   }
246 }
247
248 void AsYouTypeFormatter::SetShouldAddSpaceAfterNationalPrefix(
249     const NumberFormat& format) {
250   static const scoped_ptr<const RegExp> national_prefix_separators_pattern(
251       regexp_factory_->CreateRegExp(kNationalPrefixSeparatorsPattern));
252   should_add_space_after_national_prefix_ =
253       national_prefix_separators_pattern->PartialMatch(
254           format.national_prefix_formatting_rule());
255 }
256
257 bool AsYouTypeFormatter::CreateFormattingTemplate(const NumberFormat& format) {
258   string number_pattern = format.pattern();
259
260   // The formatter doesn't format numbers when numberPattern contains "|", e.g.
261   // (20|3)\d{4}. In those cases we quickly return.
262   if (number_pattern.find('|') != string::npos) {
263     return false;
264   }
265   // Replace anything in the form of [..] with \d.
266   static const scoped_ptr<const RegExp> character_class_pattern(
267       regexp_factory_->CreateRegExp(kCharacterClassPattern));
268   character_class_pattern->GlobalReplace(&number_pattern, "\\\\d");
269
270   // Replace any standalone digit (not the one in d{}) with \d.
271   ReplacePatternDigits(&number_pattern);
272
273   string number_format = format.format();
274   formatting_template_.remove();
275   UnicodeString temp_template;
276   GetFormattingTemplate(number_pattern, number_format, &temp_template);
277
278   if (temp_template.length() > 0) {
279     formatting_template_.append(temp_template);
280     return true;
281   }
282   return false;
283 }
284
285 void AsYouTypeFormatter::GetFormattingTemplate(
286     const string& number_pattern,
287     const string& number_format,
288     UnicodeString* formatting_template) {
289   DCHECK(formatting_template);
290
291   // Creates a phone number consisting only of the digit 9 that matches the
292   // number_pattern by applying the pattern to the longest_phone_number string.
293   static const char longest_phone_number[] = "999999999999999";
294   string a_phone_number;
295
296   MatchAllGroups(number_pattern, longest_phone_number, *regexp_factory_,
297                  &regexp_cache_, &a_phone_number);
298   // No formatting template can be created if the number of digits entered so
299   // far is longer than the maximum the current formatting rule can accommodate.
300   if (a_phone_number.length() < national_number_.length()) {
301     formatting_template->remove();
302     return;
303   }
304   // Formats the number according to number_format.
305   regexp_cache_.GetRegExp(number_pattern).GlobalReplace(
306       &a_phone_number, number_format);
307   // Replaces each digit with character kDigitPlaceholder.
308   GlobalReplaceSubstring("9", kDigitPlaceholder, &a_phone_number);
309   formatting_template->setTo(a_phone_number.c_str(), a_phone_number.size());
310 }
311
312 void AsYouTypeFormatter::Clear() {
313   current_output_.clear();
314   accrued_input_.remove();
315   accrued_input_without_formatting_.remove();
316   formatting_template_.remove();
317   last_match_position_ = 0;
318   current_formatting_pattern_.clear();
319   prefix_before_national_number_.clear();
320   extracted_national_prefix_.clear();
321   national_number_.clear();
322   able_to_format_ = true;
323   input_has_formatting_ = false;
324   position_to_remember_ = 0;
325   original_position_ = 0;
326   is_complete_number_ = false;
327   is_expecting_country_code_ = false;
328   possible_formats_.clear();
329   should_add_space_after_national_prefix_ = false;
330
331   if (current_metadata_ != default_metadata_) {
332     current_metadata_ = GetMetadataForRegion(default_country_);
333   }
334 }
335
336 const string& AsYouTypeFormatter::InputDigit(char32 next_char, string* result) {
337   DCHECK(result);
338
339   InputDigitWithOptionToRememberPosition(next_char, false, &current_output_);
340   result->assign(current_output_);
341   return *result;
342 }
343
344 const string& AsYouTypeFormatter::InputDigitAndRememberPosition(
345     char32 next_char,
346     string* result) {
347   DCHECK(result);
348
349   InputDigitWithOptionToRememberPosition(next_char, true, &current_output_);
350   result->assign(current_output_);
351   return *result;
352 }
353
354 void AsYouTypeFormatter::InputDigitWithOptionToRememberPosition(
355     char32 next_char,
356     bool remember_position,
357     string* phone_number) {
358   DCHECK(phone_number);
359
360   accrued_input_.append(next_char);
361   if (remember_position) {
362     original_position_ = accrued_input_.length();
363   }
364   // We do formatting on-the-fly only when each character entered is either a
365   // plus sign (accepted at the start of the number only).
366   string next_char_string;
367   UnicodeString(next_char).toUTF8String(next_char_string);
368
369   char normalized_next_char = '\0';
370   if (!(phone_util_.ContainsOnlyValidDigits(next_char_string) ||
371       (accrued_input_.length() == 1 && next_char == kPlusSign))) {
372     able_to_format_ = false;
373     input_has_formatting_ = true;
374   } else {
375     normalized_next_char =
376         NormalizeAndAccrueDigitsAndPlusSign(next_char, remember_position);
377   }
378   if (!able_to_format_) {
379     // When we are unable to format because of reasons other than that
380     // formatting chars have been entered, it can be due to really long IDDs or
381     // NDDs. If that is the case, we might be able to do formatting again after
382     // extracting them.
383     if (input_has_formatting_) {
384       phone_number->clear();
385       accrued_input_.toUTF8String(*phone_number);
386     } else if (AttemptToExtractIdd()) {
387       if (AttemptToExtractCountryCode()) {
388         AttemptToChoosePatternWithPrefixExtracted(phone_number);
389         return;
390       }
391     } else if (AbleToExtractLongerNdd()) {
392       // Add an additional space to separate long NDD and national significant
393       // number for readability. We don't set
394       // should_add_space_after_national_prefix_ to true, since we don't want
395       // this to change later when we choose formatting templates.
396       prefix_before_national_number_.push_back(kSeparatorBeforeNationalNumber);
397       AttemptToChoosePatternWithPrefixExtracted(phone_number);
398       return;
399     }
400     phone_number->clear();
401     accrued_input_.toUTF8String(*phone_number);
402     return;
403   }
404
405   // We start to attempt to format only when at least kMinLeadingDigitsLength
406   // digits (the plus sign is counted as a digit as well for this purpose) have
407   // been entered.
408   switch (accrued_input_without_formatting_.length()) {
409     case 0:
410     case 1:
411     case 2:
412       phone_number->clear();
413       accrued_input_.toUTF8String(*phone_number);
414       return;
415     case 3:
416       if (AttemptToExtractIdd()) {
417         is_expecting_country_code_ = true;
418         // FALLTHROUGH_INTENDED
419       } else {
420         // No IDD or plus sign is found, might be entering in national format.
421         RemoveNationalPrefixFromNationalNumber(&extracted_national_prefix_);
422         AttemptToChooseFormattingPattern(phone_number);
423         return;
424       }
425     default:
426       if (is_expecting_country_code_) {
427         if (AttemptToExtractCountryCode()) {
428           is_expecting_country_code_ = false;
429         }
430         phone_number->assign(prefix_before_national_number_);
431         phone_number->append(national_number_);
432         return;
433       }
434       if (possible_formats_.size() > 0) {
435         // The formatting patterns are already chosen.
436         string temp_national_number;
437         InputDigitHelper(normalized_next_char, &temp_national_number);
438         // See if accrued digits can be formatted properly already. If not, use
439         // the results from InputDigitHelper, which does formatting based on the
440         // formatting pattern chosen.
441         string formatted_number;
442         AttemptToFormatAccruedDigits(&formatted_number);
443         if (formatted_number.length() > 0) {
444           phone_number->assign(formatted_number);
445           return;
446         }
447         NarrowDownPossibleFormats(national_number_);
448         if (MaybeCreateNewTemplate()) {
449           InputAccruedNationalNumber(phone_number);
450           return;
451         }
452         if (able_to_format_) {
453           AppendNationalNumber(temp_national_number, phone_number);
454         } else {
455           phone_number->clear();
456           accrued_input_.toUTF8String(*phone_number);
457         }
458         return;
459       } else {
460         AttemptToChooseFormattingPattern(phone_number);
461       }
462   }
463 }
464
465 void AsYouTypeFormatter::AttemptToChoosePatternWithPrefixExtracted(
466     string* formatted_number) {
467   able_to_format_ = true;
468   is_expecting_country_code_ = false;
469   possible_formats_.clear();
470   last_match_position_ = 0;
471   formatting_template_.remove();
472   current_formatting_pattern_.clear();
473   AttemptToChooseFormattingPattern(formatted_number);
474 }
475
476 const string& AsYouTypeFormatter::GetExtractedNationalPrefix() const {
477   return extracted_national_prefix_;
478 }
479
480 bool AsYouTypeFormatter::AbleToExtractLongerNdd() {
481   if (extracted_national_prefix_.length() > 0) {
482     // Put the extracted NDD back to the national number before attempting to
483     // extract a new NDD.
484     national_number_.insert(0, extracted_national_prefix_);
485     // Remove the previously extracted NDD from prefixBeforeNationalNumber. We
486     // cannot simply set it to empty string because people sometimes incorrectly
487     // enter national prefix after the country code, e.g. +44 (0)20-1234-5678.
488     int index_of_previous_ndd =
489         prefix_before_national_number_.find_last_of(extracted_national_prefix_);
490     prefix_before_national_number_.resize(index_of_previous_ndd);
491   }
492   string new_national_prefix;
493   RemoveNationalPrefixFromNationalNumber(&new_national_prefix);
494   return extracted_national_prefix_ != new_national_prefix;
495 }
496
497 void AsYouTypeFormatter::AttemptToFormatAccruedDigits(
498     string* formatted_result) {
499   DCHECK(formatted_result);
500
501   for (list<const NumberFormat*>::const_iterator it = possible_formats_.begin();
502        it != possible_formats_.end(); ++it) {
503     DCHECK(*it);
504     const NumberFormat& number_format = **it;
505     const string& pattern = number_format.pattern();
506
507     if (regexp_cache_.GetRegExp(pattern).FullMatch(national_number_)) {
508       SetShouldAddSpaceAfterNationalPrefix(number_format);
509
510       string formatted_number(national_number_);
511       bool status = regexp_cache_.GetRegExp(pattern).GlobalReplace(
512           &formatted_number, number_format.format());
513       DCHECK(status);
514       IGNORE_UNUSED(status);
515
516       AppendNationalNumber(formatted_number, formatted_result);
517       return;
518     }
519   }
520 }
521
522 int AsYouTypeFormatter::GetRememberedPosition() const {
523   UnicodeString current_output(current_output_.c_str());
524   if (!able_to_format_) {
525     return ConvertUnicodeStringPosition(current_output, original_position_);
526   }
527   int accrued_input_index = 0;
528   int current_output_index = 0;
529
530   while (accrued_input_index < position_to_remember_ &&
531          current_output_index < current_output.length()) {
532     if (accrued_input_without_formatting_[accrued_input_index] ==
533         current_output[current_output_index]) {
534       ++accrued_input_index;
535     }
536     ++current_output_index;
537   }
538   return ConvertUnicodeStringPosition(current_output, current_output_index);
539 }
540
541 void AsYouTypeFormatter::AppendNationalNumber(const string& national_number,
542                                               string* phone_number) const {
543   int prefix_before_national_number_length =
544       prefix_before_national_number_.size();
545   if (should_add_space_after_national_prefix_ &&
546       prefix_before_national_number_length > 0 &&
547       prefix_before_national_number_.at(
548           prefix_before_national_number_length - 1) !=
549       kSeparatorBeforeNationalNumber) {
550     // We want to add a space after the national prefix if the national prefix
551     // formatting rule indicates that this would normally be done, with the
552     // exception of the case where we already appended a space because the NDD
553     // was surprisingly long.
554     phone_number->assign(prefix_before_national_number_);
555     phone_number->push_back(kSeparatorBeforeNationalNumber);
556     StrAppend(phone_number, national_number);
557   } else {
558     phone_number->assign(
559         StrCat(prefix_before_national_number_, national_number));
560   }
561 }
562
563 void AsYouTypeFormatter::AttemptToChooseFormattingPattern(
564     string* formatted_number) {
565   DCHECK(formatted_number);
566   // We start to attempt to format only when at least MIN_LEADING_DIGITS_LENGTH
567   // digits of national number (excluding national prefix) have been entered.
568   if (national_number_.length() >= kMinLeadingDigitsLength) {
569     GetAvailableFormats(national_number_);
570     formatted_number->clear();
571     AttemptToFormatAccruedDigits(formatted_number);
572     // See if the accrued digits can be formatted properly already.
573     if (formatted_number->length() > 0) {
574       return;
575     }
576     if (MaybeCreateNewTemplate()) {
577       InputAccruedNationalNumber(formatted_number);
578     } else {
579       formatted_number->clear();
580       accrued_input_.toUTF8String(*formatted_number);
581     }
582     return;
583   } else {
584     AppendNationalNumber(national_number_, formatted_number);
585   }
586 }
587
588 void AsYouTypeFormatter::InputAccruedNationalNumber(string* number) {
589   DCHECK(number);
590   int length_of_national_number = national_number_.length();
591
592   if (length_of_national_number > 0) {
593     string temp_national_number;
594
595     for (int i = 0; i < length_of_national_number; ++i) {
596       temp_national_number.clear();
597       InputDigitHelper(national_number_[i], &temp_national_number);
598     }
599     if (able_to_format_) {
600       AppendNationalNumber(temp_national_number, number);
601     } else {
602       number->clear();
603       accrued_input_.toUTF8String(*number);
604     }
605     return;
606   } else {
607     number->assign(prefix_before_national_number_);
608   }
609 }
610
611 bool AsYouTypeFormatter::IsNanpaNumberWithNationalPrefix() const {
612   // For NANPA numbers beginning with 1[2-9], treat the 1 as the national
613   // prefix. The reason is that national significant numbers in NANPA always
614   // start with [2-9] after the national prefix.  Numbers beginning with 1[01]
615   // can only be short/emergency numbers, which don't need the national
616   // prefix.
617   return (current_metadata_->country_code() == 1) &&
618          (national_number_[0] == '1') && (national_number_[1] != '0') &&
619          (national_number_[1] != '1');
620 }
621
622 void AsYouTypeFormatter::RemoveNationalPrefixFromNationalNumber(
623     string* national_prefix) {
624   int start_of_national_number = 0;
625
626   if (IsNanpaNumberWithNationalPrefix()) {
627     start_of_national_number = 1;
628     prefix_before_national_number_.append("1");
629     prefix_before_national_number_.push_back(kSeparatorBeforeNationalNumber);
630     is_complete_number_ = true;
631   } else if (current_metadata_->has_national_prefix_for_parsing()) {
632     const scoped_ptr<RegExpInput> consumed_input(
633         regexp_factory_->CreateInput(national_number_));
634     const RegExp& pattern = regexp_cache_.GetRegExp(
635         current_metadata_->national_prefix_for_parsing());
636
637     // Since some national prefix patterns are entirely optional, check that a
638     // national prefix could actually be extracted.
639     if (pattern.Consume(consumed_input.get())) {
640       start_of_national_number =
641           national_number_.length() - consumed_input->ToString().length();
642       if (start_of_national_number > 0) {
643         // When the national prefix is detected, we use international formatting
644         // rules instead of national ones, because national formatting rules
645         // could countain local formatting rules for numbers entered without
646         // area code.
647         is_complete_number_ = true;
648         prefix_before_national_number_.append(
649             national_number_.substr(0, start_of_national_number));
650       }
651     }
652   }
653   national_prefix->assign(national_number_, 0, start_of_national_number);
654   national_number_.erase(0, start_of_national_number);
655 }
656
657 bool AsYouTypeFormatter::AttemptToExtractIdd() {
658   string accrued_input_without_formatting_stdstring;
659   accrued_input_without_formatting_
660       .toUTF8String(accrued_input_without_formatting_stdstring);
661   const scoped_ptr<RegExpInput> consumed_input(
662       regexp_factory_->CreateInput(accrued_input_without_formatting_stdstring));
663   const RegExp& international_prefix = regexp_cache_.GetRegExp(
664       StrCat("\\", string(&kPlusSign, 1), "|",
665              current_metadata_->international_prefix()));
666
667   if (international_prefix.Consume(consumed_input.get())) {
668     is_complete_number_ = true;
669     const int start_of_country_code =
670         accrued_input_without_formatting_.length() -
671         consumed_input->ToString().length();
672
673     national_number_.clear();
674     accrued_input_without_formatting_.tempSubString(start_of_country_code)
675         .toUTF8String(national_number_);
676
677     string before_country_code;
678     accrued_input_without_formatting_.tempSubString(0, start_of_country_code)
679         .toUTF8String(before_country_code);
680     prefix_before_national_number_.clear();
681     prefix_before_national_number_.append(before_country_code);
682
683     if (accrued_input_without_formatting_[0] != kPlusSign) {
684       prefix_before_national_number_.push_back(kSeparatorBeforeNationalNumber);
685     }
686     return true;
687   }
688   return false;
689 }
690
691 bool AsYouTypeFormatter::AttemptToExtractCountryCode() {
692   if (national_number_.length() == 0) {
693     return false;
694   }
695   string number_without_country_code(national_number_);
696   int country_code =
697     phone_util_.ExtractCountryCode(&number_without_country_code);
698   if (country_code == 0) {
699     return false;
700   }
701   national_number_.assign(number_without_country_code);
702   string new_region_code;
703   phone_util_.GetRegionCodeForCountryCode(country_code, &new_region_code);
704   if (PhoneNumberUtil::kRegionCodeForNonGeoEntity == new_region_code) {
705     current_metadata_ =
706         phone_util_.GetMetadataForNonGeographicalRegion(country_code);
707   } else if (new_region_code != default_country_) {
708     current_metadata_ = GetMetadataForRegion(new_region_code);
709   }
710   StrAppend(&prefix_before_national_number_, country_code);
711   prefix_before_national_number_.push_back(kSeparatorBeforeNationalNumber);
712   // When we have successfully extracted the IDD, the previously extracted NDD
713   // should be cleared because it is no longer valid.
714   extracted_national_prefix_.clear();
715
716   return true;
717 }
718
719 char AsYouTypeFormatter::NormalizeAndAccrueDigitsAndPlusSign(
720     char32 next_char,
721     bool remember_position) {
722   char normalized_char = next_char;
723
724   if (next_char == kPlusSign) {
725     accrued_input_without_formatting_.append(next_char);
726   } else {
727     string number;
728     UnicodeString(next_char).toUTF8String(number);
729     phone_util_.NormalizeDigitsOnly(&number);
730     accrued_input_without_formatting_.append(next_char);
731     national_number_.append(number);
732     normalized_char = number[0];
733   }
734   if (remember_position) {
735     position_to_remember_ = accrued_input_without_formatting_.length();
736   }
737   return normalized_char;
738 }
739
740 void AsYouTypeFormatter::InputDigitHelper(char next_char, string* number) {
741   DCHECK(number);
742   number->clear();
743   // Note that formattingTemplate is not guaranteed to have a value, it could be
744   // empty, e.g. when the next digit is entered after extracting an IDD or NDD.
745   const char32 placeholder_codepoint = UnicodeString(kDigitPlaceholder)[0];
746   int placeholder_pos = formatting_template_
747       .tempSubString(last_match_position_).indexOf(placeholder_codepoint);
748   if (placeholder_pos != -1) {
749     UnicodeString temp_template = formatting_template_;
750     placeholder_pos = temp_template.indexOf(placeholder_codepoint);
751     temp_template.setCharAt(placeholder_pos, UnicodeString(next_char)[0]);
752     last_match_position_ = placeholder_pos;
753     formatting_template_.replace(0, temp_template.length(), temp_template);
754     formatting_template_.tempSubString(0, last_match_position_ + 1)
755         .toUTF8String(*number);
756   } else {
757     if (possible_formats_.size() == 1) {
758       // More digits are entered than we could handle, and there are no other
759       // valid patterns to try.
760       able_to_format_ = false;
761     }  // else, we just reset the formatting pattern.
762     current_formatting_pattern_.clear();
763     accrued_input_.toUTF8String(*number);
764   }
765 }
766
767 // Returns the number of bytes contained in the given UnicodeString up to the
768 // specified position.
769 // static
770 int AsYouTypeFormatter::ConvertUnicodeStringPosition(const UnicodeString& s,
771                                                      int pos) {
772   if (pos > s.length()) {
773     return -1;
774   }
775   string substring;
776   s.tempSubString(0, pos).toUTF8String(substring);
777   return substring.length();
778 }
779
780 }  // namespace phonenumbers
781 }  // namespace i18n