Fix bug in AsYouTypeFormatter where we hit a IndexOutOfBoundsException in its Java...
[platform/upstream/libphonenumber.git] / cpp / src / phonenumbers / asyoutypeformatter.cc
1 // Copyright (C) 2011 The Libphonenumber Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "phonenumbers/asyoutypeformatter.h"
16
17 #include <math.h>
18 #include <cctype>
19 #include <list>
20 #include <string>
21
22 #include <google/protobuf/message_lite.h>
23
24 #include "phonenumbers/base/logging.h"
25 #include "phonenumbers/base/memory/scoped_ptr.h"
26 #include "phonenumbers/phonemetadata.pb.h"
27 #include "phonenumbers/phonenumberutil.h"
28 #include "phonenumbers/regexp_cache.h"
29 #include "phonenumbers/regexp_factory.h"
30 #include "phonenumbers/stringutil.h"
31 #include "phonenumbers/unicodestring.h"
32
33 namespace i18n {
34 namespace phonenumbers {
35
36 using google::protobuf::RepeatedPtrField;
37
38 namespace {
39
40 const char kPlusSign = '+';
41
42 // A pattern that is used to match character classes in regular expressions.
43 // An example of a character class is [1-4].
44 const char kCharacterClassPattern[] = "\\[([^\\[\\]])*\\]";
45
46 // This is the minimum length of national number accrued that is required to
47 // trigger the formatter. The first element of the leading_digits_pattern of
48 // each number_format contains a regular expression that matches up to this
49 // number of digits.
50 const size_t kMinLeadingDigitsLength = 3;
51
52 // The digits that have not been entered yet will be represented by a \u2008,
53 // the punctuation space.
54 const char kDigitPlaceholder[] = "\xE2\x80\x88"; /* " " */
55
56 // Character used when appropriate to separate a prefix, such as a long NDD or a
57 // country calling code, from the national number.
58 const char kSeparatorBeforeNationalNumber = ' ';
59
60 // A set of characters that, if found in a national prefix formatting rules, are
61 // an indicator to us that we should separate the national prefix from the
62 // number when formatting.
63 const char kNationalPrefixSeparatorsPattern[] = "[- ]";
64
65 // Replaces any standalone digit in the pattern (not any inside a {} grouping)
66 // with \d. This function replaces the standalone digit regex used in the Java
67 // version which is currently not supported by RE2 because it uses a special
68 // construct (?=).
69 void ReplacePatternDigits(string* pattern) {
70   DCHECK(pattern);
71   string new_pattern;
72   // This is needed since sometimes there is more than one digit in between the
73   // curly braces.
74   bool is_in_braces = false;
75
76   for (string::const_iterator it = pattern->begin(); it != pattern->end();
77        ++it) {
78     const char current_char = *it;
79
80     if (isdigit(current_char)) {
81       if (is_in_braces) {
82         new_pattern += current_char;
83       } else {
84         new_pattern += "\\d";
85       }
86     } else {
87       new_pattern += current_char;
88       if (current_char == '{') {
89         is_in_braces = true;
90       } else if (current_char == '}') {
91         is_in_braces = false;
92       }
93     }
94   }
95   pattern->assign(new_pattern);
96 }
97
98 // Matches all the groups contained in 'input' against 'pattern'.
99 void MatchAllGroups(const string& pattern,
100                     const string& input,
101                     const AbstractRegExpFactory& regexp_factory,
102                     RegExpCache* cache,
103                     string* group) {
104   DCHECK(cache);
105   DCHECK(group);
106   string new_pattern(pattern);
107
108   // Transforms pattern "(...)(...)(...)" to "(.........)".
109   strrmm(&new_pattern, "()");
110   new_pattern = StrCat("(", new_pattern, ")");
111
112   const scoped_ptr<RegExpInput> consume_input(
113       regexp_factory.CreateInput(input));
114   bool status =
115       cache->GetRegExp(new_pattern).Consume(consume_input.get(), group);
116   DCHECK(status);
117 }
118
119 PhoneMetadata CreateEmptyMetadata() {
120   PhoneMetadata metadata;
121   metadata.set_international_prefix("NA");
122   return metadata;
123 }
124
125 }  // namespace
126
127 AsYouTypeFormatter::AsYouTypeFormatter(const string& region_code)
128     : regexp_factory_(new RegExpFactory()),
129       regexp_cache_(*regexp_factory_.get(), 64),
130       current_output_(),
131       formatting_template_(),
132       current_formatting_pattern_(),
133       accrued_input_(),
134       accrued_input_without_formatting_(),
135       able_to_format_(true),
136       input_has_formatting_(false),
137       is_complete_number_(false),
138       is_expecting_country_code_(false),
139       phone_util_(*PhoneNumberUtil::GetInstance()),
140       default_country_(region_code),
141       empty_metadata_(CreateEmptyMetadata()),
142       default_metadata_(GetMetadataForRegion(region_code)),
143       current_metadata_(default_metadata_),
144       last_match_position_(0),
145       original_position_(0),
146       position_to_remember_(0),
147       prefix_before_national_number_(),
148       should_add_space_after_national_prefix_(false),
149       extracted_national_prefix_(),
150       national_number_(),
151       possible_formats_() {
152 }
153
154 // The metadata needed by this class is the same for all regions sharing the
155 // same country calling code. Therefore, we return the metadata for "main"
156 // region for this country calling code.
157 const PhoneMetadata* AsYouTypeFormatter::GetMetadataForRegion(
158     const string& region_code) const {
159   int country_calling_code = phone_util_.GetCountryCodeForRegion(region_code);
160   string main_country;
161   phone_util_.GetRegionCodeForCountryCode(country_calling_code, &main_country);
162   const PhoneMetadata* const metadata =
163       phone_util_.GetMetadataForRegion(main_country);
164   if (metadata) {
165     return metadata;
166   }
167   // Set to a default instance of the metadata. This allows us to function with
168   // an incorrect region code, even if formatting only works for numbers
169   // specified with "+".
170   return &empty_metadata_;
171 }
172
173 bool AsYouTypeFormatter::MaybeCreateNewTemplate() {
174   // When there are multiple available formats, the formatter uses the first
175   // format where a formatting template could be created.
176   for (list<const NumberFormat*>::const_iterator it = possible_formats_.begin();
177        it != possible_formats_.end(); ++it) {
178     DCHECK(*it);
179     const NumberFormat& number_format = **it;
180     const string& pattern = number_format.pattern();
181     if (current_formatting_pattern_ == pattern) {
182       return false;
183     }
184     if (CreateFormattingTemplate(number_format)) {
185       current_formatting_pattern_ = pattern;
186       SetShouldAddSpaceAfterNationalPrefix(number_format);
187       // With a new formatting template, the matched position using the old
188       // template needs to be reset.
189       last_match_position_ = 0;
190       return true;
191     }
192   }
193   able_to_format_ = false;
194   return false;
195 }
196
197 void AsYouTypeFormatter::GetAvailableFormats(const string& leading_digits) {
198   const RepeatedPtrField<NumberFormat>& format_list =
199       (is_complete_number_ &&
200        current_metadata_->intl_number_format().size() > 0)
201           ? current_metadata_->intl_number_format()
202           : current_metadata_->number_format();
203   bool national_prefix_used_by_country =
204       current_metadata_->has_national_prefix();
205   for (RepeatedPtrField<NumberFormat>::const_iterator it = format_list.begin();
206        it != format_list.end(); ++it) {
207     if (!national_prefix_used_by_country || is_complete_number_ ||
208         it->national_prefix_optional_when_formatting() ||
209         phone_util_.FormattingRuleHasFirstGroupOnly(
210             it->national_prefix_formatting_rule())) {
211       if (phone_util_.IsFormatEligibleForAsYouTypeFormatter(it->format())) {
212         possible_formats_.push_back(&*it);
213       }
214     }
215   }
216   NarrowDownPossibleFormats(leading_digits);
217 }
218
219 void AsYouTypeFormatter::NarrowDownPossibleFormats(
220     const string& leading_digits) {
221   const int index_of_leading_digits_pattern =
222       leading_digits.length() - kMinLeadingDigitsLength;
223
224   for (list<const NumberFormat*>::iterator it = possible_formats_.begin();
225        it != possible_formats_.end(); ) {
226     DCHECK(*it);
227     const NumberFormat& format = **it;
228     if (format.leading_digits_pattern_size() == 0) {
229       // Keep everything that isn't restricted by leading digits.
230       ++it;
231       continue;
232     }
233     int last_leading_digits_pattern =
234         std::min(index_of_leading_digits_pattern,
235                  format.leading_digits_pattern_size() - 1);
236     const scoped_ptr<RegExpInput> input(
237         regexp_factory_->CreateInput(leading_digits));
238     if (!regexp_cache_.GetRegExp(format.leading_digits_pattern().Get(
239             last_leading_digits_pattern)).Consume(input.get())) {
240       it = possible_formats_.erase(it);
241       continue;
242     }
243     ++it;
244   }
245 }
246
247 void AsYouTypeFormatter::SetShouldAddSpaceAfterNationalPrefix(
248     const NumberFormat& format) {
249   static const scoped_ptr<const RegExp> national_prefix_separators_pattern(
250       regexp_factory_->CreateRegExp(kNationalPrefixSeparatorsPattern));
251   should_add_space_after_national_prefix_ =
252       national_prefix_separators_pattern->PartialMatch(
253           format.national_prefix_formatting_rule());
254 }
255
256 bool AsYouTypeFormatter::CreateFormattingTemplate(const NumberFormat& format) {
257   string number_pattern = format.pattern();
258
259   // The formatter doesn't format numbers when numberPattern contains "|", e.g.
260   // (20|3)\d{4}. In those cases we quickly return.
261   if (number_pattern.find('|') != string::npos) {
262     return false;
263   }
264   // Replace anything in the form of [..] with \d.
265   static const scoped_ptr<const RegExp> character_class_pattern(
266       regexp_factory_->CreateRegExp(kCharacterClassPattern));
267   character_class_pattern->GlobalReplace(&number_pattern, "\\\\d");
268
269   // Replace any standalone digit (not the one in d{}) with \d.
270   ReplacePatternDigits(&number_pattern);
271
272   string number_format = format.format();
273   formatting_template_.remove();
274   UnicodeString temp_template;
275   GetFormattingTemplate(number_pattern, number_format, &temp_template);
276
277   if (temp_template.length() > 0) {
278     formatting_template_.append(temp_template);
279     return true;
280   }
281   return false;
282 }
283
284 void AsYouTypeFormatter::GetFormattingTemplate(
285     const string& number_pattern,
286     const string& number_format,
287     UnicodeString* formatting_template) {
288   DCHECK(formatting_template);
289
290   // Creates a phone number consisting only of the digit 9 that matches the
291   // number_pattern by applying the pattern to the longest_phone_number string.
292   static const char longest_phone_number[] = "999999999999999";
293   string a_phone_number;
294
295   MatchAllGroups(number_pattern, longest_phone_number, *regexp_factory_,
296                  &regexp_cache_, &a_phone_number);
297   // No formatting template can be created if the number of digits entered so
298   // far is longer than the maximum the current formatting rule can accommodate.
299   if (a_phone_number.length() < national_number_.length()) {
300     formatting_template->remove();
301     return;
302   }
303   // Formats the number according to number_format.
304   regexp_cache_.GetRegExp(number_pattern).GlobalReplace(
305       &a_phone_number, number_format);
306   // Replaces each digit with character kDigitPlaceholder.
307   GlobalReplaceSubstring("9", kDigitPlaceholder, &a_phone_number);
308   formatting_template->setTo(a_phone_number.c_str(), a_phone_number.size());
309 }
310
311 void AsYouTypeFormatter::Clear() {
312   current_output_.clear();
313   accrued_input_.remove();
314   accrued_input_without_formatting_.remove();
315   formatting_template_.remove();
316   last_match_position_ = 0;
317   current_formatting_pattern_.clear();
318   prefix_before_national_number_.clear();
319   extracted_national_prefix_.clear();
320   national_number_.clear();
321   able_to_format_ = true;
322   input_has_formatting_ = false;
323   position_to_remember_ = 0;
324   original_position_ = 0;
325   is_complete_number_ = false;
326   is_expecting_country_code_ = false;
327   possible_formats_.clear();
328   should_add_space_after_national_prefix_ = false;
329
330   if (current_metadata_ != default_metadata_) {
331     current_metadata_ = GetMetadataForRegion(default_country_);
332   }
333 }
334
335 const string& AsYouTypeFormatter::InputDigit(char32 next_char, string* result) {
336   DCHECK(result);
337
338   InputDigitWithOptionToRememberPosition(next_char, false, &current_output_);
339   result->assign(current_output_);
340   return *result;
341 }
342
343 const string& AsYouTypeFormatter::InputDigitAndRememberPosition(
344     char32 next_char,
345     string* result) {
346   DCHECK(result);
347
348   InputDigitWithOptionToRememberPosition(next_char, true, &current_output_);
349   result->assign(current_output_);
350   return *result;
351 }
352
353 void AsYouTypeFormatter::InputDigitWithOptionToRememberPosition(
354     char32 next_char,
355     bool remember_position,
356     string* phone_number) {
357   DCHECK(phone_number);
358
359   accrued_input_.append(next_char);
360   if (remember_position) {
361     original_position_ = accrued_input_.length();
362   }
363   // We do formatting on-the-fly only when each character entered is either a
364   // plus sign (accepted at the start of the number only).
365   string next_char_string;
366   UnicodeString(next_char).toUTF8String(next_char_string);
367
368   char normalized_next_char = '\0';
369   if (!(phone_util_.ContainsOnlyValidDigits(next_char_string) ||
370       (accrued_input_.length() == 1 && next_char == kPlusSign))) {
371     able_to_format_ = false;
372     input_has_formatting_ = true;
373   } else {
374     normalized_next_char =
375         NormalizeAndAccrueDigitsAndPlusSign(next_char, remember_position);
376   }
377   if (!able_to_format_) {
378     // When we are unable to format because of reasons other than that
379     // formatting chars have been entered, it can be due to really long IDDs or
380     // NDDs. If that is the case, we might be able to do formatting again after
381     // extracting them.
382     if (input_has_formatting_) {
383       phone_number->clear();
384       accrued_input_.toUTF8String(*phone_number);
385     } else if (AttemptToExtractIdd()) {
386       if (AttemptToExtractCountryCode()) {
387         AttemptToChoosePatternWithPrefixExtracted(phone_number);
388         return;
389       }
390     } else if (AbleToExtractLongerNdd()) {
391       // Add an additional space to separate long NDD and national significant
392       // number for readability. We don't set
393       // should_add_space_after_national_prefix_ to true, since we don't want
394       // this to change later when we choose formatting templates.
395       prefix_before_national_number_.push_back(kSeparatorBeforeNationalNumber);
396       AttemptToChoosePatternWithPrefixExtracted(phone_number);
397       return;
398     }
399     phone_number->clear();
400     accrued_input_.toUTF8String(*phone_number);
401     return;
402   }
403
404   // We start to attempt to format only when at least kMinLeadingDigitsLength
405   // digits (the plus sign is counted as a digit as well for this purpose) have
406   // been entered.
407   switch (accrued_input_without_formatting_.length()) {
408     case 0:
409     case 1:
410     case 2:
411       phone_number->clear();
412       accrued_input_.toUTF8String(*phone_number);
413       return;
414     case 3:
415       if (AttemptToExtractIdd()) {
416         is_expecting_country_code_ = true;
417         // FALLTHROUGH_INTENDED
418       } else {
419         // No IDD or plus sign is found, might be entering in national format.
420         RemoveNationalPrefixFromNationalNumber(&extracted_national_prefix_);
421         AttemptToChooseFormattingPattern(phone_number);
422         return;
423       }
424     default:
425       if (is_expecting_country_code_) {
426         if (AttemptToExtractCountryCode()) {
427           is_expecting_country_code_ = false;
428         }
429         phone_number->assign(prefix_before_national_number_);
430         phone_number->append(national_number_);
431         return;
432       }
433       if (possible_formats_.size() > 0) {
434         // The formatting patterns are already chosen.
435         string temp_national_number;
436         InputDigitHelper(normalized_next_char, &temp_national_number);
437         // See if accrued digits can be formatted properly already. If not, use
438         // the results from InputDigitHelper, which does formatting based on the
439         // formatting pattern chosen.
440         string formatted_number;
441         AttemptToFormatAccruedDigits(&formatted_number);
442         if (formatted_number.length() > 0) {
443           phone_number->assign(formatted_number);
444           return;
445         }
446         NarrowDownPossibleFormats(national_number_);
447         if (MaybeCreateNewTemplate()) {
448           InputAccruedNationalNumber(phone_number);
449           return;
450         }
451         if (able_to_format_) {
452           AppendNationalNumber(temp_national_number, phone_number);
453         } else {
454           phone_number->clear();
455           accrued_input_.toUTF8String(*phone_number);
456         }
457         return;
458       } else {
459         AttemptToChooseFormattingPattern(phone_number);
460       }
461   }
462 }
463
464 void AsYouTypeFormatter::AttemptToChoosePatternWithPrefixExtracted(
465     string* formatted_number) {
466   able_to_format_ = true;
467   is_expecting_country_code_ = false;
468   possible_formats_.clear();
469   last_match_position_ = 0;
470   formatting_template_.remove();
471   current_formatting_pattern_.clear();
472   AttemptToChooseFormattingPattern(formatted_number);
473 }
474
475 const string& AsYouTypeFormatter::GetExtractedNationalPrefix() const {
476   return extracted_national_prefix_;
477 }
478
479 bool AsYouTypeFormatter::AbleToExtractLongerNdd() {
480   if (extracted_national_prefix_.length() > 0) {
481     // Put the extracted NDD back to the national number before attempting to
482     // extract a new NDD.
483     national_number_.insert(0, extracted_national_prefix_);
484     // Remove the previously extracted NDD from prefixBeforeNationalNumber. We
485     // cannot simply set it to empty string because people sometimes incorrectly
486     // enter national prefix after the country code, e.g. +44 (0)20-1234-5678.
487     int index_of_previous_ndd =
488         prefix_before_national_number_.find_last_of(extracted_national_prefix_);
489     prefix_before_national_number_.resize(index_of_previous_ndd);
490   }
491   string new_national_prefix;
492   RemoveNationalPrefixFromNationalNumber(&new_national_prefix);
493   return extracted_national_prefix_ != new_national_prefix;
494 }
495
496 void AsYouTypeFormatter::AttemptToFormatAccruedDigits(
497     string* formatted_result) {
498   DCHECK(formatted_result);
499
500   for (list<const NumberFormat*>::const_iterator it = possible_formats_.begin();
501        it != possible_formats_.end(); ++it) {
502     DCHECK(*it);
503     const NumberFormat& number_format = **it;
504     const string& pattern = number_format.pattern();
505
506     if (regexp_cache_.GetRegExp(pattern).FullMatch(national_number_)) {
507       SetShouldAddSpaceAfterNationalPrefix(number_format);
508
509       string formatted_number(national_number_);
510       bool status = regexp_cache_.GetRegExp(pattern).GlobalReplace(
511           &formatted_number, number_format.format());
512       DCHECK(status);
513
514       AppendNationalNumber(formatted_number, formatted_result);
515       return;
516     }
517   }
518 }
519
520 int AsYouTypeFormatter::GetRememberedPosition() const {
521   UnicodeString current_output(current_output_.c_str());
522   if (!able_to_format_) {
523     return ConvertUnicodeStringPosition(current_output, original_position_);
524   }
525   int accrued_input_index = 0;
526   int current_output_index = 0;
527
528   while (accrued_input_index < position_to_remember_ &&
529          current_output_index < current_output.length()) {
530     if (accrued_input_without_formatting_[accrued_input_index] ==
531         current_output[current_output_index]) {
532       ++accrued_input_index;
533     }
534     ++current_output_index;
535   }
536   return ConvertUnicodeStringPosition(current_output, current_output_index);
537 }
538
539 void AsYouTypeFormatter::AppendNationalNumber(const string& national_number,
540                                               string* phone_number) const {
541   int prefix_before_national_number_length =
542       prefix_before_national_number_.size();
543   if (should_add_space_after_national_prefix_ &&
544       prefix_before_national_number_length > 0 &&
545       prefix_before_national_number_.at(
546           prefix_before_national_number_length - 1) !=
547       kSeparatorBeforeNationalNumber) {
548     // We want to add a space after the national prefix if the national prefix
549     // formatting rule indicates that this would normally be done, with the
550     // exception of the case where we already appended a space because the NDD
551     // was surprisingly long.
552     phone_number->assign(prefix_before_national_number_);
553     phone_number->push_back(kSeparatorBeforeNationalNumber);
554     StrAppend(phone_number, national_number);
555   } else {
556     phone_number->assign(
557         StrCat(prefix_before_national_number_, national_number));
558   }
559 }
560
561 void AsYouTypeFormatter::AttemptToChooseFormattingPattern(
562     string* formatted_number) {
563   DCHECK(formatted_number);
564   // We start to attempt to format only when at least MIN_LEADING_DIGITS_LENGTH
565   // digits of national number (excluding national prefix) have been entered.
566   if (national_number_.length() >= kMinLeadingDigitsLength) {
567     GetAvailableFormats(national_number_);
568     formatted_number->clear();
569     AttemptToFormatAccruedDigits(formatted_number);
570     // See if the accrued digits can be formatted properly already.
571     if (formatted_number->length() > 0) {
572       return;
573     }
574     if (MaybeCreateNewTemplate()) {
575       InputAccruedNationalNumber(formatted_number);
576     } else {
577       formatted_number->clear();
578       accrued_input_.toUTF8String(*formatted_number);
579     }
580     return;
581   } else {
582     AppendNationalNumber(national_number_, formatted_number);
583   }
584 }
585
586 void AsYouTypeFormatter::InputAccruedNationalNumber(string* number) {
587   DCHECK(number);
588   int length_of_national_number = national_number_.length();
589
590   if (length_of_national_number > 0) {
591     string temp_national_number;
592
593     for (int i = 0; i < length_of_national_number; ++i) {
594       temp_national_number.clear();
595       InputDigitHelper(national_number_[i], &temp_national_number);
596     }
597     if (able_to_format_) {
598       AppendNationalNumber(temp_national_number, number);
599     } else {
600       number->clear();
601       accrued_input_.toUTF8String(*number);
602     }
603     return;
604   } else {
605     number->assign(prefix_before_national_number_);
606   }
607 }
608
609 bool AsYouTypeFormatter::IsNanpaNumberWithNationalPrefix() const {
610   // For NANPA numbers beginning with 1[2-9], treat the 1 as the national
611   // prefix. The reason is that national significant numbers in NANPA always
612   // start with [2-9] after the national prefix.  Numbers beginning with 1[01]
613   // can only be short/emergency numbers, which don't need the national
614   // prefix.
615   return (current_metadata_->country_code() == 1) &&
616          (national_number_[0] == '1') && (national_number_[1] != '0') &&
617          (national_number_[1] != '1');
618 }
619
620 void AsYouTypeFormatter::RemoveNationalPrefixFromNationalNumber(
621     string* national_prefix) {
622   int start_of_national_number = 0;
623
624   if (IsNanpaNumberWithNationalPrefix()) {
625     start_of_national_number = 1;
626     prefix_before_national_number_.append("1");
627     prefix_before_national_number_.push_back(kSeparatorBeforeNationalNumber);
628     is_complete_number_ = true;
629   } else if (current_metadata_->has_national_prefix_for_parsing()) {
630     const scoped_ptr<RegExpInput> consumed_input(
631         regexp_factory_->CreateInput(national_number_));
632     const RegExp& pattern = regexp_cache_.GetRegExp(
633         current_metadata_->national_prefix_for_parsing());
634
635     // Since some national prefix patterns are entirely optional, check that a
636     // national prefix could actually be extracted.
637     if (pattern.Consume(consumed_input.get())) {
638       start_of_national_number =
639           national_number_.length() - consumed_input->ToString().length();
640       if (start_of_national_number > 0) {
641         // When the national prefix is detected, we use international formatting
642         // rules instead of national ones, because national formatting rules
643         // could countain local formatting rules for numbers entered without
644         // area code.
645         is_complete_number_ = true;
646         prefix_before_national_number_.append(
647             national_number_.substr(0, start_of_national_number));
648       }
649     }
650   }
651   national_prefix->assign(national_number_, 0, start_of_national_number);
652   national_number_.erase(0, start_of_national_number);
653 }
654
655 bool AsYouTypeFormatter::AttemptToExtractIdd() {
656   string accrued_input_without_formatting_stdstring;
657   accrued_input_without_formatting_
658       .toUTF8String(accrued_input_without_formatting_stdstring);
659   const scoped_ptr<RegExpInput> consumed_input(
660       regexp_factory_->CreateInput(accrued_input_without_formatting_stdstring));
661   const RegExp& international_prefix = regexp_cache_.GetRegExp(
662       StrCat("\\", string(&kPlusSign, 1), "|",
663              current_metadata_->international_prefix()));
664
665   if (international_prefix.Consume(consumed_input.get())) {
666     is_complete_number_ = true;
667     const int start_of_country_code =
668         accrued_input_without_formatting_.length() -
669         consumed_input->ToString().length();
670
671     national_number_.clear();
672     accrued_input_without_formatting_.tempSubString(start_of_country_code)
673         .toUTF8String(national_number_);
674
675     string before_country_code;
676     accrued_input_without_formatting_.tempSubString(0, start_of_country_code)
677         .toUTF8String(before_country_code);
678     prefix_before_national_number_.clear();
679     prefix_before_national_number_.append(before_country_code);
680
681     if (accrued_input_without_formatting_[0] != kPlusSign) {
682       prefix_before_national_number_.push_back(kSeparatorBeforeNationalNumber);
683     }
684     return true;
685   }
686   return false;
687 }
688
689 bool AsYouTypeFormatter::AttemptToExtractCountryCode() {
690   if (national_number_.length() == 0) {
691     return false;
692   }
693   string number_without_country_code(national_number_);
694   int country_code =
695     phone_util_.ExtractCountryCode(&number_without_country_code);
696   if (country_code == 0) {
697     return false;
698   }
699   national_number_.assign(number_without_country_code);
700   string new_region_code;
701   phone_util_.GetRegionCodeForCountryCode(country_code, &new_region_code);
702   if (PhoneNumberUtil::kRegionCodeForNonGeoEntity == new_region_code) {
703     current_metadata_ =
704         phone_util_.GetMetadataForNonGeographicalRegion(country_code);
705   } else if (new_region_code != default_country_) {
706     current_metadata_ = GetMetadataForRegion(new_region_code);
707   }
708   StrAppend(&prefix_before_national_number_, country_code);
709   prefix_before_national_number_.push_back(kSeparatorBeforeNationalNumber);
710   // When we have successfully extracted the IDD, the previously extracted NDD
711   // should be cleared because it is no longer valid.
712   extracted_national_prefix_.clear();
713
714   return true;
715 }
716
717 char AsYouTypeFormatter::NormalizeAndAccrueDigitsAndPlusSign(
718     char32 next_char,
719     bool remember_position) {
720   char normalized_char = next_char;
721
722   if (next_char == kPlusSign) {
723     accrued_input_without_formatting_.append(next_char);
724   } else {
725     string number;
726     UnicodeString(next_char).toUTF8String(number);
727     phone_util_.NormalizeDigitsOnly(&number);
728     accrued_input_without_formatting_.append(next_char);
729     national_number_.append(number);
730     normalized_char = number[0];
731   }
732   if (remember_position) {
733     position_to_remember_ = accrued_input_without_formatting_.length();
734   }
735   return normalized_char;
736 }
737
738 void AsYouTypeFormatter::InputDigitHelper(char next_char, string* number) {
739   DCHECK(number);
740   number->clear();
741   const char32 placeholder_codepoint = UnicodeString(kDigitPlaceholder)[0];
742   int placeholder_pos = formatting_template_
743       .tempSubString(last_match_position_).indexOf(placeholder_codepoint);
744   if (placeholder_pos != -1) {
745     UnicodeString temp_template = formatting_template_;
746     placeholder_pos = temp_template.indexOf(placeholder_codepoint);
747     temp_template.setCharAt(placeholder_pos, UnicodeString(next_char)[0]);
748     last_match_position_ = placeholder_pos;
749     formatting_template_.replace(0, temp_template.length(), temp_template);
750     formatting_template_.tempSubString(0, last_match_position_ + 1)
751         .toUTF8String(*number);
752   } else {
753     if (possible_formats_.size() == 1) {
754       // More digits are entered than we could handle, and there are no other
755       // valid patterns to try.
756       able_to_format_ = false;
757     }  // else, we just reset the formatting pattern.
758     current_formatting_pattern_.clear();
759     accrued_input_.toUTF8String(*number);
760   }
761 }
762
763 // Returns the number of bytes contained in the given UnicodeString up to the
764 // specified position.
765 // static
766 int AsYouTypeFormatter::ConvertUnicodeStringPosition(const UnicodeString& s,
767                                                      int pos) {
768   if (pos > s.length()) {
769     return -1;
770   }
771   string substring;
772   s.tempSubString(0, pos).toUTF8String(substring);
773   return substring.length();
774 }
775
776 }  // namespace phonenumbers
777 }  // namespace i18n