Upstream version 9.38.198.0
[platform/framework/web/crosswalk.git] / src / third_party / libaddressinput / chromium / input_suggester.cc
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "third_party/libaddressinput/chromium/input_suggester.h"
6
7 #include <cstddef>
8 #include <set>
9 #include <utility>
10
11 #include "base/logging.h"
12 #include "third_party/libaddressinput/chromium/trie.h"
13 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_data.h"
14 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/callback.h"
15 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/preload_supplier.h"
16 #include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_data.h"
17
18 namespace autofill {
19
20 using ::i18n::addressinput::AddressData;
21 using ::i18n::addressinput::AddressField;
22 using ::i18n::addressinput::BuildCallback;
23 using ::i18n::addressinput::FieldProblemMap;
24 using ::i18n::addressinput::PreloadSupplier;
25 using ::i18n::addressinput::RegionData;
26 using ::i18n::addressinput::RegionDataBuilder;
27
28 using ::i18n::addressinput::ADMIN_AREA;
29 using ::i18n::addressinput::COUNTRY;
30 using ::i18n::addressinput::DEPENDENT_LOCALITY;
31 using ::i18n::addressinput::LOCALITY;
32 using ::i18n::addressinput::POSTAL_CODE;
33
34 using ::i18n::addressinput::INVALID_FORMAT;
35 using ::i18n::addressinput::MISMATCHING_VALUE;
36
37 namespace {
38
39 // Initial size for the buffer used in the canonicalizer.
40 static const size_t kInitialBufferSize = 32;
41
42 // A region and its metadata useful for constructing a suggestion.
43 struct Suggestion {
44  public:
45   // Builds a suggestion of |region_to_suggest|. Does not take ownership of
46   // |region_to_suggest|, which should not be NULL.
47   Suggestion(const RegionData* region_to_suggest,
48              AddressField matching_address_field,
49              bool region_key_matches)
50       : region_to_suggest(region_to_suggest),
51         matching_address_field(matching_address_field),
52         region_key_matches(region_key_matches) {
53     DCHECK(region_to_suggest);
54   }
55
56   ~Suggestion() {}
57
58   // The region that should be suggested. For example, if the region is ("CA",
59   // "California"), then either "CA" or "California" should be suggested.
60   const RegionData* region_to_suggest;
61
62   // The field in the address for which the suggestion should be made. For
63   // example, ADMIN_AREA in US means the suggestion should be made for the field
64   // labeled "State".
65   AddressField matching_address_field;
66
67   // True if the key of the region matches user input (the name may or may not
68   // match). "CA" should be suggested for a ("CA", "California") region.
69   //
70   // False if only the name of the region matches user input (the key does not
71   // match). "California" should be suggested for a ("CA", "California") region.
72   bool region_key_matches;
73 };
74
75 // Suggestions for an address. Contains lists of suggestions for administrative
76 // area, locality, and dependent locality fields of an address.
77 class AddressSuggestions {
78  public:
79   AddressSuggestions() {}
80   ~AddressSuggestions() {}
81
82   // Marks all regions at |address_field| level as matching user input.
83   void AllRegionsMatchForField(AddressField address_field) {
84     all_regions_match_input_.insert(address_field);
85   }
86
87   // Marks given regions at |address_field| level as matching user input. The
88   // |regions_match_key| parameter contains the regions that match user input by
89   // their keys. The |regions_match_name| parameter contains the regions that
90   // match user input by their names.
91   //
92   // The |address_field| parameter should be either ADMIN_AREA, LOCALITY, or
93   // DEPENDENT_LOCALITY.
94   bool AddRegions(AddressField address_field,
95                   const std::set<const RegionData*>& regions_match_key,
96                   const std::set<const RegionData*>& regions_match_name) {
97     DCHECK(address_field >= ADMIN_AREA);
98     DCHECK(address_field <= DEPENDENT_LOCALITY);
99
100     AddressField parent_address_field =
101         static_cast<AddressField>(address_field - 1);
102
103     bool all_parents_match =
104         parent_address_field == COUNTRY ||
105         all_regions_match_input_.find(parent_address_field) !=
106             all_regions_match_input_.end();
107
108     // Cannot build |address_field| level suggestions if there are no matches in
109     // |parent_address_field| level regions.
110     const RegionsMatchInput* parents = NULL;
111     if (address_field > ADMIN_AREA && !all_parents_match) {
112       parents = &regions_match_input_[parent_address_field];
113       if (parents->keys.empty() && parents->names.empty())
114         return false;
115     }
116
117     RegionsMatchInput* regions = NULL;
118     if (address_field < DEPENDENT_LOCALITY)
119       regions = &regions_match_input_[address_field];
120
121     std::vector<Suggestion>* suggestions = &suggestions_[address_field];
122     bool added_suggestions = false;
123
124     // Iterate over both |regions_match_key| and |regions_match_name| and build
125     // Suggestion objects based on the given RegionData objects. Advance either
126     // one iterator at a time (if they point to different data) or both
127     // iterators at once (if they point to the same data).
128     for (std::set<const RegionData*>::const_iterator
129              key_it = regions_match_key.begin(),
130              name_it = regions_match_name.begin();
131          key_it != regions_match_key.end() ||
132              name_it != regions_match_name.end();) {
133       const RegionData* key_region =
134           key_it != regions_match_key.end() ? *key_it : NULL;
135       const RegionData* name_region =
136           name_it != regions_match_name.end() ? *name_it : NULL;
137
138       // Regions that do not have a parent that also matches input will not
139       // become suggestions.
140       bool key_region_has_parent =
141           all_parents_match ||
142           (parents && !parents->keys.empty() && key_region &&
143            parents->keys.find(&key_region->parent()) != parents->keys.end());
144       bool name_region_has_parent =
145           all_parents_match ||
146           (parents && !parents->names.empty() && name_region &&
147            parents->names.find(&name_region->parent()) != parents->names.end());
148
149       if (name_region && (!key_region || name_region < key_region)) {
150         if (name_region_has_parent) {
151           suggestions->push_back(Suggestion(name_region, address_field, false));
152           added_suggestions = true;
153           if (regions)
154             regions->names.insert(name_region);
155         }
156
157         ++name_it;
158       } else if (key_region && (!name_region || key_region < name_region)) {
159         if (key_region_has_parent) {
160           suggestions->push_back(Suggestion(key_region, address_field, true));
161           added_suggestions = true;
162           if (regions)
163             regions->keys.insert(key_region);
164         }
165
166         ++key_it;
167       } else {
168         if (key_region_has_parent) {
169           suggestions->push_back(Suggestion(key_region, address_field, true));
170           added_suggestions = true;
171           if (regions) {
172             regions->keys.insert(key_region);
173             regions->names.insert(name_region);
174           }
175         }
176
177         ++key_it;
178         ++name_it;
179       }
180     }
181
182     return added_suggestions;
183   }
184
185   // Swaps the suggestions for the smallest sub-region into |suggestions|.
186   // |this| is not usable after this call due to using the swap() operation.
187   //
188   // The |suggestions| parameter should not be NULL.
189   void SwapSmallestSubRegionSuggestions(std::vector<Suggestion>* suggestions) {
190     DCHECK(suggestions);
191     for (int i = DEPENDENT_LOCALITY; i >= ADMIN_AREA; --i) {
192       std::vector<Suggestion>* result =
193           &suggestions_[static_cast<AddressField>(i)];
194       if (!result->empty()) {
195         suggestions->swap(*result);
196         return;
197       }
198     }
199   }
200
201  private:
202   // The sets of non-owned regions used for looking up regions that match user
203   // input by keys and names.
204   struct RegionsMatchInput {
205     std::set<const RegionData*> keys;
206     std::set<const RegionData*> names;
207   };
208
209   // The regions that match user input at ADMIN_AREA and LOCALITY levels.
210   std::map<AddressField, RegionsMatchInput> regions_match_input_;
211
212   // The set of fields for which all regions match user input. Used to avoid
213   // storing a long list in |regions_match_input_| and later looking it up
214   // there.
215   std::set<AddressField> all_regions_match_input_;
216
217   // Suggestions at ADMIN_AREA, LOCALITY, and DEPENDENT_LOCALITY levels.
218   std::map<AddressField, std::vector<Suggestion> > suggestions_;
219
220   DISALLOW_COPY_AND_ASSIGN(AddressSuggestions);
221 };
222
223 }  // namespace
224
225 InputSuggester::StringCanonicalizer::StringCanonicalizer()
226     : buffer_(kInitialBufferSize, 0) {
227   UErrorCode error_code = U_ZERO_ERROR;
228   collator_.reset(
229       icu::Collator::createInstance(icu::Locale::getRoot(), error_code));
230   DCHECK(U_SUCCESS(error_code));
231   collator_->setStrength(icu::Collator::PRIMARY);
232 }
233
234 InputSuggester::StringCanonicalizer::~StringCanonicalizer() {}
235
236 const std::vector<uint8_t>& InputSuggester::StringCanonicalizer::Canonicalize(
237     const std::string& original) const {
238   DCHECK(!original.empty());
239
240   icu::UnicodeString icu_str(original.c_str(),
241                              static_cast<int32_t>(original.length()));
242   int32_t sort_key_size =
243       collator_->getSortKey(icu_str, &buffer_[0], buffer_size());
244   DCHECK_LT(0, sort_key_size);
245
246   if (sort_key_size > buffer_size()) {
247     buffer_.resize(sort_key_size * 2, 0);
248     sort_key_size = collator_->getSortKey(icu_str, &buffer_[0], buffer_size());
249     DCHECK_LT(0, sort_key_size);
250     DCHECK_GT(buffer_size(), sort_key_size);
251   }
252
253   return buffer_;
254 }
255
256 int32_t InputSuggester::StringCanonicalizer::buffer_size() const {
257   return static_cast<int32_t>(buffer_.size());
258 }
259
260 // All sub-regions of a COUNTRY level region, organized into tries for lookup by
261 // region name or key.
262 class InputSuggester::SubRegionData {
263  public:
264   SubRegionData()
265       : initialized_(false),
266         smallest_region_size_(COUNTRY),
267         canonicalizer_(NULL) {}
268
269   ~SubRegionData() {}
270
271   bool is_initialized() const { return initialized_; }
272
273   // Adds the sub-regions of |country_region| into tries. Uses
274   // |shared_canonicalizer| for case and diacritic insensitive lookup of the
275   // sub-regions. Should be called at most once.
276   void Initialize(const RegionData& country_region,
277                   const StringCanonicalizer& shared_canonicalizer) {
278     DCHECK(!initialized_);
279     DCHECK(!country_region.has_parent());
280
281     initialized_ = true;
282     canonicalizer_ = &shared_canonicalizer;
283
284     if (!country_region.sub_regions().empty())
285       AddSubRegionsOf(country_region, COUNTRY);
286   }
287
288   // Adds the suggestions for |user_input| into |suggestions| when user is
289   // typing in |focused_field|.
290   void BuildSuggestions(const AddressData& user_input,
291                         AddressField focused_field,
292                         std::vector<Suggestion>* suggestions) {
293     DCHECK(initialized_);
294
295     // Do not suggest anything if there's no suggestion data for the focused
296     // field.
297     if (focused_field != POSTAL_CODE && smallest_region_size_ < focused_field)
298       return;
299
300     // Non-owned regions that match a field value by region key.
301     std::set<const RegionData*> regions_match_key;
302
303     // Non-owned regions that match a field value by region name.
304     std::set<const RegionData*> regions_match_name;
305
306     AddressSuggestions address_suggestions;
307     for (int i = ADMIN_AREA; i <= focused_field && i <= DEPENDENT_LOCALITY;
308          ++i) {
309       AddressField address_field = static_cast<AddressField>(i);
310       AddressField parent_address_field = static_cast<AddressField>(i - 1);
311
312       const std::string& field_value = user_input.GetFieldValue(address_field);
313       const std::string& parent_field_value =
314           user_input.GetFieldValue(parent_address_field);
315
316       if (field_value.empty() &&
317           (address_field == ADMIN_AREA || parent_field_value.empty())) {
318         address_suggestions.AllRegionsMatchForField(address_field);
319         continue;
320       }
321
322       if (field_value.empty()) {
323         DCHECK_NE(address_field, focused_field);
324         continue;
325       }
326
327       regions_match_key.clear();
328       regions_match_name.clear();
329
330       const FieldTries& field_tries = field_tries_[address_field];
331
332       const std::vector<uint8_t>& canonicalized_value =
333           canonicalizer_->Canonicalize(field_value);
334
335       field_tries.keys.FindDataForKeyPrefix(canonicalized_value,
336                                             &regions_match_key);
337       field_tries.names.FindDataForKeyPrefix(canonicalized_value,
338                                              &regions_match_name);
339
340       bool added_suggestions = address_suggestions.AddRegions(
341           address_field, regions_match_key, regions_match_name);
342
343       // Do not suggest anything if the focused field does not have suggestions.
344       if (address_field == focused_field && !added_suggestions)
345         return;
346     }
347
348     address_suggestions.SwapSmallestSubRegionSuggestions(suggestions);
349   }
350
351  private:
352   // The tries to lookup regions for a specific field by keys and names. For
353   // example, the FieldTries for ADMIN_AREA in US will have keys for "AL", "AK",
354   // "AS", etc and names for "Alabama", "Alaska", "American Samoa", etc. The
355   // struct is uncopyable due to Trie objects being uncopyable.
356   struct FieldTries {
357     Trie<const RegionData*> keys;
358     Trie<const RegionData*> names;
359   };
360
361   // Adds the sub-regions of |parent_region| into tries.
362   void AddSubRegionsOf(const RegionData& parent_region,
363                        AddressField parent_field) {
364     DCHECK(!parent_region.sub_regions().empty());
365
366     AddressField address_field = static_cast<AddressField>(parent_field + 1);
367     DCHECK(address_field >= ADMIN_AREA);
368     DCHECK(address_field <= DEPENDENT_LOCALITY);
369
370     FieldTries* field_tries = &field_tries_[address_field];
371     for (std::vector<const RegionData*>::const_iterator it =
372              parent_region.sub_regions().begin();
373          it != parent_region.sub_regions().end();
374          ++it) {
375       const RegionData* region = *it;
376       DCHECK(region);
377       DCHECK(!region->key().empty());
378       DCHECK(!region->name().empty());
379
380       field_tries->keys.AddDataForKey(
381           canonicalizer_->Canonicalize(region->key()), region);
382
383       field_tries->names.AddDataForKey(
384           canonicalizer_->Canonicalize(region->name()), region);
385
386       if (smallest_region_size_ < address_field)
387         smallest_region_size_ = address_field;
388
389       if (!region->sub_regions().empty())
390         AddSubRegionsOf(*region, address_field);
391     }
392   }
393
394   // True after Initialize() has been called.
395   bool initialized_;
396
397   // The tries to lookup regions for ADMIN_AREA, LOCALITY, and
398   // DEPENDENT_LOCALITY.
399   std::map<AddressField, FieldTries> field_tries_;
400
401   // The smallest size of a sub-region that has data. For example, this is
402   // ADMIN_AREA in US, but DEPENDENT_LOCALITY in CN.
403   AddressField smallest_region_size_;
404
405   // A shared instance of string canonicalizer for case and diacritic comparison
406   // of region keys and names.
407   const StringCanonicalizer* canonicalizer_;
408 };
409
410 InputSuggester::InputSuggester(PreloadSupplier* supplier)
411     : region_data_builder_(supplier),
412       input_helper_(supplier),
413       validator_(supplier),
414       validated_(BuildCallback(this, &InputSuggester::Validated)) {}
415
416 InputSuggester::~InputSuggester() {}
417
418 void InputSuggester::GetSuggestions(const AddressData& user_input,
419                                     AddressField focused_field,
420                                     size_t suggestions_limit,
421                                     std::vector<AddressData>* suggestions) {
422   DCHECK(suggestions);
423   DCHECK(focused_field == POSTAL_CODE ||
424          (focused_field >= ADMIN_AREA && focused_field <= DEPENDENT_LOCALITY));
425
426   AddressData address_copy = user_input;
427
428   // Do not suggest anything if the user input is empty.
429   if (address_copy.IsFieldEmpty(focused_field))
430     return;
431
432   if (focused_field == POSTAL_CODE) {
433     // Do not suggest anything if the user is typing an invalid postal code.
434     FieldProblemMap problems;
435     FieldProblemMap filter;
436     filter.insert(std::make_pair(POSTAL_CODE, INVALID_FORMAT));
437     validator_.Validate(address_copy,
438                         true,   // Allow postal office boxes.
439                         false,  // Do not require recipient name.
440                         &filter,
441                         &problems,
442                         *validated_);
443     if (!problems.empty())
444       return;
445
446     // Fill in the sub-regions based on the postal code.
447     input_helper_.FillAddress(&address_copy);
448   }
449
450   // Lazily initialize the mapping from COUNTRY level regions to all of their
451   // sub-regions with metadata for generating suggestions.
452   std::string unused_best_language;
453   const RegionData& region_data =
454       region_data_builder_.Build(address_copy.region_code,
455                                  address_copy.language_code,
456                                  &unused_best_language);
457   SubRegionData* sub_region_data = &sub_regions_[&region_data];
458   if (!sub_region_data->is_initialized())
459     sub_region_data->Initialize(region_data, canonicalizer_);
460
461   // Build the list of regions that match |address_copy| when the user is typing
462   // in the |focused_field|.
463   std::vector<Suggestion> suggested_regions;
464   sub_region_data->BuildSuggestions(
465       address_copy, focused_field, &suggested_regions);
466
467   FieldProblemMap problems;
468   FieldProblemMap filter;
469   filter.insert(std::make_pair(POSTAL_CODE, MISMATCHING_VALUE));
470
471   // Generate suggestions based on the regions.
472   for (std::vector<Suggestion>::const_iterator suggested_region_it =
473            suggested_regions.begin();
474        suggested_region_it != suggested_regions.end();
475        ++suggested_region_it) {
476     AddressData address;
477     address.region_code = address_copy.region_code;
478     address.postal_code = address_copy.postal_code;
479
480     // Traverse the tree of regions from the smallest |region_to_suggest| to the
481     // country-wide "root" of the tree. Use the region names or keys found at
482     // each of the levels of the tree to build the |address| to suggest.
483     AddressField address_field = suggested_region_it->matching_address_field;
484     for (const RegionData* region = suggested_region_it->region_to_suggest;
485          region->has_parent();
486          region = &region->parent()) {
487       address.SetFieldValue(address_field,
488                             suggested_region_it->region_key_matches
489                                 ? region->key()
490                                 : region->name());
491       address_field = static_cast<AddressField>(address_field - 1);
492     }
493
494     // Do not suggest an address with a mismatching postal code.
495     problems.clear();
496     validator_.Validate(address,
497                         true,   // Allow postal office boxes.
498                         false,  // Do not require recipient name.
499                         &filter,
500                         &problems,
501                         *validated_);
502     if (!problems.empty())
503       continue;
504
505     // Do not add more suggestions than |suggestions_limit|.
506     if (suggestions->size() >= suggestions_limit) {
507       suggestions->clear();
508       return;
509     }
510
511     suggestions->push_back(address);
512   }
513 }
514
515 void InputSuggester::Validated(bool success,
516                                const AddressData&,
517                                const FieldProblemMap&) {
518   DCHECK(success);
519 }
520
521 }  // namespace autofill