Upstream version 7.36.149.0
[platform/framework/web/crosswalk.git] / src / third_party / libaddressinput / chromium / cpp / src / address_validator.cc
1 // Copyright (C) 2013 Google Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <libaddressinput/address_validator.h>
16
17 #include <libaddressinput/address_data.h>
18 #include <libaddressinput/downloader.h>
19 #include <libaddressinput/load_rules_delegate.h>
20 #include <libaddressinput/storage.h>
21 #include <libaddressinput/util/basictypes.h>
22 #include <libaddressinput/util/scoped_ptr.h>
23
24 #include <algorithm>
25 #include <bitset>
26 #include <cassert>
27 #include <cstddef>
28 #include <map>
29 #include <set>
30 #include <string>
31 #include <utility>
32 #include <vector>
33
34 #include <re2/re2.h>
35
36 #include "country_rules_aggregator.h"
37 #include "grit/libaddressinput_strings.h"
38 #include "region_data_constants.h"
39 #include "retriever.h"
40 #include "rule.h"
41 #include "ruleset.h"
42 #include "util/stl_util.h"
43 #include "util/string_util.h"
44
45 namespace i18n {
46 namespace addressinput {
47
48 namespace {
49
50 // A type to store a list of pointers to Ruleset objects.
51 typedef std::set<const Ruleset*> Rulesets;
52
53 // A type to map the field in a rule to rulesets.
54 typedef std::map<Rule::IdentityField, Rulesets> IdentityFieldRulesets;
55
56 // A type to map the field in an address to rulesets.
57 typedef std::map<AddressField, IdentityFieldRulesets> AddressFieldRulesets;
58
59 // A set of Rule::IdentityField values that match user input.
60 typedef std::bitset<Rule::IDENTITY_FIELDS_SIZE> MatchingRuleFields;
61
62 // Returns true if |prefix_regex| matches a prefix of |value|. For example,
63 // "(90|81)" matches a prefix of "90291".
64 bool ValueMatchesPrefixRegex(const std::string& value,
65                              const std::string& prefix_regex) {
66   return RE2::FullMatch(value, "^(" + prefix_regex + ").*");
67 }
68
69 // Returns true if the filter is empty (all problems allowed) or contains the
70 // |field|->|problem| mapping (explicitly allowed).
71 bool FilterAllows(const AddressProblemFilter& filter,
72                   AddressField field,
73                   AddressProblem::Type problem) {
74   if (filter.empty()) {
75     return true;
76   }
77
78   for (AddressProblemFilter::const_iterator it = filter.begin();
79        it != filter.end(); ++it) {
80     if (it->first == field && it->second == problem) {
81       return true;
82     }
83   }
84
85   return false;
86 }
87
88 // Returns |true| if the |street_address| is empty or contains only empty
89 // strings.
90 bool IsEmptyStreetAddress(const std::vector<std::string>& street_address) {
91   for (std::vector<std::string>::const_iterator it = street_address.begin();
92        it != street_address.end(); ++it) {
93     if (!it->empty()) {
94       return false;
95     }
96   }
97   return true;
98 }
99
100 // Collects rulesets based on whether they have a parent in the given list.
101 class ParentedRulesetCollector {
102  public:
103   // Retains a reference to both of the parameters. Does not make a copy of
104   // |parent_rulesets|. Does not take ownership of |rulesets_with_parents|. The
105   // |rulesets_with_parents| parameter should not be NULL.
106   ParentedRulesetCollector(const Rulesets& parent_rulesets,
107                            Rulesets* rulesets_with_parents)
108       : parent_rulesets_(parent_rulesets),
109         rulesets_with_parents_(rulesets_with_parents) {
110     assert(rulesets_with_parents_ != NULL);
111   }
112
113   ~ParentedRulesetCollector() {}
114
115   // Adds |ruleset_to_test| to the |rulesets_with_parents_| collection, if the
116   // given ruleset has a parent in |parent_rulesets_|. The |ruleset_to_test|
117   // parameter should not be NULL.
118   void operator()(const Ruleset* ruleset_to_test) {
119     assert(ruleset_to_test != NULL);
120     if (parent_rulesets_.find(ruleset_to_test->parent()) !=
121             parent_rulesets_.end()) {
122       rulesets_with_parents_->insert(ruleset_to_test);
123     }
124   }
125
126  private:
127   const Rulesets& parent_rulesets_;
128   Rulesets* rulesets_with_parents_;
129 };
130
131 // Validates AddressData structure.
132 class AddressValidatorImpl : public AddressValidator {
133  public:
134   // Takes ownership of |downloader| and |storage|. Does not take ownership of
135   // |load_rules_delegate|.
136   AddressValidatorImpl(const std::string& validation_data_url,
137                        scoped_ptr<Downloader> downloader,
138                        scoped_ptr<Storage> storage,
139                        LoadRulesDelegate* load_rules_delegate)
140     : aggregator_(scoped_ptr<Retriever>(new Retriever(
141           validation_data_url,
142           downloader.Pass(),
143           storage.Pass()))),
144       load_rules_delegate_(load_rules_delegate),
145       loading_rules_(),
146       rules_() {}
147
148   virtual ~AddressValidatorImpl() {
149     STLDeleteValues(&rules_);
150   }
151
152   // AddressValidator implementation.
153   virtual void LoadRules(const std::string& country_code) {
154     if (rules_.find(country_code) == rules_.end() &&
155         loading_rules_.find(country_code) == loading_rules_.end()) {
156       loading_rules_.insert(country_code);
157       aggregator_.AggregateRules(
158           country_code,
159           BuildScopedPtrCallback(this, &AddressValidatorImpl::OnRulesLoaded));
160     }
161   }
162
163   // AddressValidator implementation.
164   virtual Status ValidateAddress(
165       const AddressData& address,
166       const AddressProblemFilter& filter,
167       AddressProblems* problems) const {
168     std::map<std::string, Ruleset*>::const_iterator ruleset_it =
169         rules_.find(address.country_code);
170
171     // We can still validate the required fields even if the full ruleset isn't
172     // ready.
173     if (ruleset_it == rules_.end()) {
174       if (problems != NULL) {
175         Rule rule;
176         rule.CopyFrom(Rule::GetDefault());
177         if (rule.ParseSerializedRule(
178                  RegionDataConstants::GetRegionData(address.country_code))) {
179           EnforceRequiredFields(rule, address, filter, problems);
180         }
181       }
182
183       return loading_rules_.find(address.country_code) != loading_rules_.end()
184           ? RULES_NOT_READY
185           : RULES_UNAVAILABLE;
186     }
187
188     if (problems == NULL) {
189       return SUCCESS;
190     }
191
192     const Ruleset* ruleset = ruleset_it->second;
193     assert(ruleset != NULL);
194     const Rule& country_rule =
195         ruleset->GetLanguageCodeRule(address.language_code);
196     EnforceRequiredFields(country_rule, address, filter, problems);
197
198     // Validate general postal code format. A country-level rule specifies the
199     // regular expression for the whole postal code.
200     if (!address.postal_code.empty() &&
201         !country_rule.GetPostalCodeFormat().empty() &&
202         FilterAllows(filter,
203                      POSTAL_CODE,
204                      AddressProblem::UNRECOGNIZED_FORMAT) &&
205         !RE2::FullMatch(
206             address.postal_code, country_rule.GetPostalCodeFormat())) {
207       problems->push_back(AddressProblem(
208           POSTAL_CODE,
209           AddressProblem::UNRECOGNIZED_FORMAT,
210           country_rule.GetInvalidPostalCodeMessageId()));
211     }
212
213     while (ruleset != NULL) {
214       const Rule& rule = ruleset->GetLanguageCodeRule(address.language_code);
215
216       // Validate the field values, e.g. state names in US.
217       AddressField sub_field_type =
218           static_cast<AddressField>(ruleset->field() + 1);
219       std::string sub_key;
220       const std::string& user_input = address.GetFieldValue(sub_field_type);
221       if (!user_input.empty() &&
222           FilterAllows(filter, sub_field_type, AddressProblem::UNKNOWN_VALUE) &&
223           !rule.CanonicalizeSubKey(user_input, false, &sub_key)) {
224         problems->push_back(AddressProblem(
225             sub_field_type,
226             AddressProblem::UNKNOWN_VALUE,
227             country_rule.GetInvalidFieldMessageId(sub_field_type)));
228       }
229
230       // Validate sub-region specific postal code format. A sub-region specifies
231       // the regular expression for a prefix of the postal code.
232       if (ruleset->field() > COUNTRY &&
233           !address.postal_code.empty() &&
234           !rule.GetPostalCodeFormat().empty() &&
235           FilterAllows(filter,
236                        POSTAL_CODE,
237                        AddressProblem::MISMATCHING_VALUE) &&
238           !ValueMatchesPrefixRegex(
239               address.postal_code, rule.GetPostalCodeFormat())) {
240         problems->push_back(AddressProblem(
241             POSTAL_CODE,
242             AddressProblem::MISMATCHING_VALUE,
243             country_rule.GetInvalidPostalCodeMessageId()));
244       }
245
246       ruleset = ruleset->GetSubRegionRuleset(sub_key);
247     }
248
249     return SUCCESS;
250   }
251
252   // AddressValidator implementation.
253   virtual Status GetSuggestions(const AddressData& user_input,
254                                 AddressField focused_field,
255                                 size_t suggestions_limit,
256                                 std::vector<AddressData>* suggestions) const {
257     std::map<std::string, Ruleset*>::const_iterator ruleset_it =
258         rules_.find(user_input.country_code);
259
260     if (ruleset_it == rules_.end()) {
261       return
262           loading_rules_.find(user_input.country_code) != loading_rules_.end()
263               ? RULES_NOT_READY
264               : RULES_UNAVAILABLE;
265     }
266
267     if (suggestions == NULL) {
268       return SUCCESS;
269     }
270     suggestions->clear();
271
272     assert(ruleset_it->second != NULL);
273
274     // Do not suggest anything if the user is typing in the field for which
275     // there's no validation data.
276     if (focused_field != POSTAL_CODE &&
277         (focused_field < ADMIN_AREA || focused_field > DEPENDENT_LOCALITY)) {
278       return SUCCESS;
279     }
280
281     // Do not suggest anything if the user input is empty.
282     if (user_input.GetFieldValue(focused_field).empty()) {
283       return SUCCESS;
284     }
285
286     const Ruleset& country_ruleset = *ruleset_it->second;
287     const Rule& country_rule =
288         country_ruleset.GetLanguageCodeRule(user_input.language_code);
289
290     // Do not suggest anything if the user is typing the postal code that is not
291     // valid for the country.
292     if (!user_input.postal_code.empty() &&
293         focused_field == POSTAL_CODE &&
294         !country_rule.GetPostalCodeFormat().empty() &&
295         !ValueMatchesPrefixRegex(
296             user_input.postal_code, country_rule.GetPostalCodeFormat())) {
297       return SUCCESS;
298     }
299
300     // Initialize the prefix search index lazily.
301     if (!ruleset_it->second->prefix_search_index_ready()) {
302       ruleset_it->second->BuildPrefixSearchIndex();
303     }
304
305     if (focused_field != POSTAL_CODE &&
306         focused_field > country_ruleset.deepest_ruleset_level()) {
307       return SUCCESS;
308     }
309
310     // Determine the most specific address field that can be suggested.
311     AddressField suggestion_field = focused_field != POSTAL_CODE
312         ? focused_field : DEPENDENT_LOCALITY;
313     if (suggestion_field > country_ruleset.deepest_ruleset_level()) {
314       suggestion_field = country_ruleset.deepest_ruleset_level();
315     }
316     if (focused_field != POSTAL_CODE) {
317       while (user_input.GetFieldValue(suggestion_field).empty() &&
318              suggestion_field > ADMIN_AREA) {
319         suggestion_field = static_cast<AddressField>(suggestion_field - 1);
320       }
321     }
322
323     // Find all rulesets that match user input.
324     AddressFieldRulesets rulesets;
325     for (int i = ADMIN_AREA; i <= suggestion_field; ++i) {
326       for (int j = Rule::KEY; j <= Rule::LATIN_NAME; ++j) {
327         AddressField address_field = static_cast<AddressField>(i);
328         Rule::IdentityField rule_field = static_cast<Rule::IdentityField>(j);
329
330         // Find all rulesets at |address_field| level whose |rule_field| starts
331         // with user input value.
332         country_ruleset.FindRulesetsByPrefix(
333             user_input.language_code, address_field, rule_field,
334             user_input.GetFieldValue(address_field),
335             &rulesets[address_field][rule_field]);
336
337         // Filter out the rulesets whose parents do not match the user input.
338         if (address_field > ADMIN_AREA) {
339           AddressField parent_field =
340               static_cast<AddressField>(address_field - 1);
341           Rulesets rulesets_with_parents;
342           std::for_each(
343               rulesets[address_field][rule_field].begin(),
344               rulesets[address_field][rule_field].end(),
345               ParentedRulesetCollector(rulesets[parent_field][rule_field],
346                                        &rulesets_with_parents));
347           rulesets[address_field][rule_field].swap(rulesets_with_parents);
348         }
349       }
350     }
351
352     // Determine the fields in the rules that match the user input. This
353     // operation converts a map of Rule::IdentityField value -> Ruleset into a
354     // map of Ruleset -> Rule::IdentityField bitset.
355     std::map<const Ruleset*, MatchingRuleFields> suggestion_rulesets;
356     for (IdentityFieldRulesets::const_iterator rule_field_it =
357              rulesets[suggestion_field].begin();
358          rule_field_it != rulesets[suggestion_field].end();
359          ++rule_field_it) {
360       const Rule::IdentityField rule_identity_field = rule_field_it->first;
361       for (Rulesets::const_iterator ruleset_it = rule_field_it->second.begin();
362            ruleset_it != rule_field_it->second.end();
363            ++ruleset_it) {
364         suggestion_rulesets[*ruleset_it].set(rule_identity_field);
365       }
366     }
367
368     // Generate suggestions based on the rulesets. Use a Rule::IdentityField
369     // from the bitset to generate address field values.
370     for (std::map<const Ruleset*, MatchingRuleFields>::const_iterator
371              suggestion_it = suggestion_rulesets.begin();
372          suggestion_it != suggestion_rulesets.end();
373          ++suggestion_it) {
374       const Ruleset& ruleset = *suggestion_it->first;
375       const Rule& rule = ruleset.GetLanguageCodeRule(user_input.language_code);
376       const MatchingRuleFields& matching_rule_fields = suggestion_it->second;
377
378       // Do not suggest this region if the postal code in user input does not
379       // match it.
380       if (!user_input.postal_code.empty() &&
381           !rule.GetPostalCodeFormat().empty() &&
382           !ValueMatchesPrefixRegex(
383               user_input.postal_code, rule.GetPostalCodeFormat())) {
384         continue;
385       }
386
387       // Do not add more suggestions than |suggestions_limit|.
388       if (suggestions->size() >= suggestions_limit) {
389         suggestions->clear();
390         return SUCCESS;
391       }
392
393       // If the user's language is not one of the supported languages of a
394       // country that has latinized names for its regions, then prefer to
395       // suggest the latinized region names. If the user types in local script
396       // instead, then the local script names will be suggested.
397       Rule::IdentityField rule_field = Rule::KEY;
398       if (!country_rule.GetLanguage().empty() &&
399           country_rule.GetLanguage() != user_input.language_code &&
400           !rule.GetLatinName().empty() &&
401           matching_rule_fields.test(Rule::LATIN_NAME)) {
402         rule_field = Rule::LATIN_NAME;
403       } else if (matching_rule_fields.test(Rule::KEY)) {
404         rule_field = Rule::KEY;
405       } else if (matching_rule_fields.test(Rule::NAME)) {
406         rule_field = Rule::NAME;
407       } else if (matching_rule_fields.test(Rule::LATIN_NAME)) {
408         rule_field = Rule::LATIN_NAME;
409       } else {
410         assert(false);
411       }
412
413       AddressData suggestion;
414       suggestion.country_code = user_input.country_code;
415       suggestion.postal_code = user_input.postal_code;
416
417       // Traverse the tree of rulesets from the most specific |ruleset| to the
418       // country-wide "root" of the tree. Use the region names found at each of
419       // the levels of the ruleset tree to build the |suggestion|.
420       for (const Ruleset* suggestion_ruleset = &ruleset;
421            suggestion_ruleset->parent() != NULL;
422            suggestion_ruleset = suggestion_ruleset->parent()) {
423         const Rule& suggestion_rule =
424             suggestion_ruleset->GetLanguageCodeRule(user_input.language_code);
425         suggestion.SetFieldValue(suggestion_ruleset->field(),
426                                  suggestion_rule.GetIdentityField(rule_field));
427       }
428
429       suggestions->push_back(suggestion);
430     }
431
432     return SUCCESS;
433   }
434
435   // AddressValidator implementation.
436   virtual bool CanonicalizeAdministrativeArea(AddressData* address_data) const {
437     std::map<std::string, Ruleset*>::const_iterator ruleset_it =
438         rules_.find(address_data->country_code);
439     if (ruleset_it == rules_.end()) {
440       return false;
441     }
442     const Rule& rule =
443         ruleset_it->second->GetLanguageCodeRule(address_data->language_code);
444
445     return rule.CanonicalizeSubKey(address_data->administrative_area,
446                                    true,  // Keep input latin.
447                                    &address_data->administrative_area);
448   }
449
450  private:
451   // Called when CountryRulesAggregator::AggregateRules loads the |ruleset| for
452   // the |country_code|.
453   void OnRulesLoaded(bool success,
454                      const std::string& country_code,
455                      scoped_ptr<Ruleset> ruleset) {
456     assert(rules_.find(country_code) == rules_.end());
457     loading_rules_.erase(country_code);
458     if (success) {
459       assert(ruleset != NULL);
460       assert(ruleset->field() == COUNTRY);
461       rules_[country_code] = ruleset.release();
462     }
463     if (load_rules_delegate_ != NULL) {
464       load_rules_delegate_->OnAddressValidationRulesLoaded(
465           country_code, success);
466     }
467   }
468
469   // Adds problems for just the required fields portion of |country_rule|.
470   void EnforceRequiredFields(const Rule& country_rule,
471                              const AddressData& address,
472                              const AddressProblemFilter& filter,
473                              AddressProblems* problems) const {
474     assert(problems != NULL);
475     for (std::vector<AddressField>::const_iterator
476              field_it = country_rule.GetRequired().begin();
477          field_it != country_rule.GetRequired().end();
478          ++field_it) {
479       bool field_empty = *field_it != STREET_ADDRESS
480           ? address.GetFieldValue(*field_it).empty()
481           : IsEmptyStreetAddress(address.address_lines);
482       if (field_empty &&
483           FilterAllows(
484               filter, *field_it, AddressProblem::MISSING_REQUIRED_FIELD)) {
485         problems->push_back(AddressProblem(
486             *field_it,
487             AddressProblem::MISSING_REQUIRED_FIELD,
488             IDS_LIBADDRESSINPUT_I18N_MISSING_REQUIRED_FIELD));
489       }
490     }
491   }
492
493   // Loads the ruleset for a country code.
494   CountryRulesAggregator aggregator_;
495
496   // An optional delegate to be invoked when a ruleset finishes loading.
497   LoadRulesDelegate* load_rules_delegate_;
498
499   // A set of country codes for which a ruleset is being loaded.
500   std::set<std::string> loading_rules_;
501
502   // A mapping of a country code to the owned ruleset for that country code.
503   std::map<std::string, Ruleset*> rules_;
504
505   DISALLOW_COPY_AND_ASSIGN(AddressValidatorImpl);
506 };
507
508 }  // namespace
509
510 AddressValidator::~AddressValidator() {}
511
512 // static
513 scoped_ptr<AddressValidator> AddressValidator::Build(
514     scoped_ptr<Downloader> downloader,
515     scoped_ptr<Storage> storage,
516     LoadRulesDelegate* load_rules_delegate) {
517   return scoped_ptr<AddressValidator>(new AddressValidatorImpl(
518       VALIDATION_DATA_URL, downloader.Pass(), storage.Pass(),
519       load_rules_delegate));
520 }
521
522 scoped_ptr<AddressValidator> BuildAddressValidatorForTesting(
523     const std::string& validation_data_url,
524     scoped_ptr<Downloader> downloader,
525     scoped_ptr<Storage> storage,
526     LoadRulesDelegate* load_rules_delegate) {
527   return scoped_ptr<AddressValidator>(new AddressValidatorImpl(
528       validation_data_url, downloader.Pass(), storage.Pass(),
529       load_rules_delegate));
530 }
531
532 }  // namespace addressinput
533 }  // namespace i18n