1 // Copyright (C) 2013 Google Inc.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include <libaddressinput/address_validator.h>
17 #include <libaddressinput/address_data.h>
18 #include <libaddressinput/downloader.h>
19 #include <libaddressinput/load_rules_delegate.h>
20 #include <libaddressinput/storage.h>
21 #include <libaddressinput/util/basictypes.h>
22 #include <libaddressinput/util/scoped_ptr.h>
36 #include "country_rules_aggregator.h"
37 #include "grit/libaddressinput_strings.h"
38 #include "region_data_constants.h"
39 #include "retriever.h"
42 #include "util/stl_util.h"
43 #include "util/string_util.h"
46 namespace addressinput {
50 // A type to store a list of pointers to Ruleset objects.
51 typedef std::set<const Ruleset*> Rulesets;
53 // A type to map the field in a rule to rulesets.
54 typedef std::map<Rule::IdentityField, Rulesets> IdentityFieldRulesets;
56 // A type to map the field in an address to rulesets.
57 typedef std::map<AddressField, IdentityFieldRulesets> AddressFieldRulesets;
59 // A set of Rule::IdentityField values that match user input.
60 typedef std::bitset<Rule::IDENTITY_FIELDS_SIZE> MatchingRuleFields;
62 // Returns true if |prefix_regex| matches a prefix of |value|. For example,
63 // "(90|81)" matches a prefix of "90291".
64 bool ValueMatchesPrefixRegex(const std::string& value,
65 const std::string& prefix_regex) {
66 return RE2::FullMatch(value, "^(" + prefix_regex + ").*");
69 // Returns true if the filter is empty (all problems allowed) or contains the
70 // |field|->|problem| mapping (explicitly allowed).
71 bool FilterAllows(const AddressProblemFilter& filter,
73 AddressProblem::Type problem) {
78 for (AddressProblemFilter::const_iterator it = filter.begin();
79 it != filter.end(); ++it) {
80 if (it->first == field && it->second == problem) {
88 // Returns |true| if the |street_address| is empty or contains only empty
90 bool IsEmptyStreetAddress(const std::vector<std::string>& street_address) {
91 for (std::vector<std::string>::const_iterator it = street_address.begin();
92 it != street_address.end(); ++it) {
100 // Collects rulesets based on whether they have a parent in the given list.
101 class ParentedRulesetCollector {
103 // Retains a reference to both of the parameters. Does not make a copy of
104 // |parent_rulesets|. Does not take ownership of |rulesets_with_parents|. The
105 // |rulesets_with_parents| parameter should not be NULL.
106 ParentedRulesetCollector(const Rulesets& parent_rulesets,
107 Rulesets* rulesets_with_parents)
108 : parent_rulesets_(parent_rulesets),
109 rulesets_with_parents_(rulesets_with_parents) {
110 assert(rulesets_with_parents_ != NULL);
113 ~ParentedRulesetCollector() {}
115 // Adds |ruleset_to_test| to the |rulesets_with_parents_| collection, if the
116 // given ruleset has a parent in |parent_rulesets_|. The |ruleset_to_test|
117 // parameter should not be NULL.
118 void operator()(const Ruleset* ruleset_to_test) {
119 assert(ruleset_to_test != NULL);
120 if (parent_rulesets_.find(ruleset_to_test->parent()) !=
121 parent_rulesets_.end()) {
122 rulesets_with_parents_->insert(ruleset_to_test);
127 const Rulesets& parent_rulesets_;
128 Rulesets* rulesets_with_parents_;
131 // Validates AddressData structure.
132 class AddressValidatorImpl : public AddressValidator {
134 // Takes ownership of |downloader| and |storage|. Does not take ownership of
135 // |load_rules_delegate|.
136 AddressValidatorImpl(const std::string& validation_data_url,
137 scoped_ptr<Downloader> downloader,
138 scoped_ptr<Storage> storage,
139 LoadRulesDelegate* load_rules_delegate)
140 : aggregator_(scoped_ptr<Retriever>(new Retriever(
144 load_rules_delegate_(load_rules_delegate),
148 virtual ~AddressValidatorImpl() {
149 STLDeleteValues(&rules_);
152 // AddressValidator implementation.
153 virtual void LoadRules(const std::string& country_code) {
154 if (rules_.find(country_code) == rules_.end() &&
155 loading_rules_.find(country_code) == loading_rules_.end()) {
156 loading_rules_.insert(country_code);
157 aggregator_.AggregateRules(
159 BuildScopedPtrCallback(this, &AddressValidatorImpl::OnRulesLoaded));
163 // AddressValidator implementation.
164 virtual Status ValidateAddress(
165 const AddressData& address,
166 const AddressProblemFilter& filter,
167 AddressProblems* problems) const {
168 std::map<std::string, Ruleset*>::const_iterator ruleset_it =
169 rules_.find(address.country_code);
171 // We can still validate the required fields even if the full ruleset isn't
173 if (ruleset_it == rules_.end()) {
174 if (problems != NULL) {
176 rule.CopyFrom(Rule::GetDefault());
177 if (rule.ParseSerializedRule(
178 RegionDataConstants::GetRegionData(address.country_code))) {
179 EnforceRequiredFields(rule, address, filter, problems);
183 return loading_rules_.find(address.country_code) != loading_rules_.end()
188 if (problems == NULL) {
192 const Ruleset* ruleset = ruleset_it->second;
193 assert(ruleset != NULL);
194 const Rule& country_rule =
195 ruleset->GetLanguageCodeRule(address.language_code);
196 EnforceRequiredFields(country_rule, address, filter, problems);
198 // Validate general postal code format. A country-level rule specifies the
199 // regular expression for the whole postal code.
200 if (!address.postal_code.empty() &&
201 !country_rule.GetPostalCodeFormat().empty() &&
204 AddressProblem::UNRECOGNIZED_FORMAT) &&
206 address.postal_code, country_rule.GetPostalCodeFormat())) {
207 problems->push_back(AddressProblem(
209 AddressProblem::UNRECOGNIZED_FORMAT,
210 country_rule.GetInvalidPostalCodeMessageId()));
213 while (ruleset != NULL) {
214 const Rule& rule = ruleset->GetLanguageCodeRule(address.language_code);
216 // Validate the field values, e.g. state names in US.
217 AddressField sub_field_type =
218 static_cast<AddressField>(ruleset->field() + 1);
220 const std::string& user_input = address.GetFieldValue(sub_field_type);
221 if (!user_input.empty() &&
222 FilterAllows(filter, sub_field_type, AddressProblem::UNKNOWN_VALUE) &&
223 !rule.CanonicalizeSubKey(user_input, false, &sub_key)) {
224 problems->push_back(AddressProblem(
226 AddressProblem::UNKNOWN_VALUE,
227 country_rule.GetInvalidFieldMessageId(sub_field_type)));
230 // Validate sub-region specific postal code format. A sub-region specifies
231 // the regular expression for a prefix of the postal code.
232 if (ruleset->field() > COUNTRY &&
233 !address.postal_code.empty() &&
234 !rule.GetPostalCodeFormat().empty() &&
237 AddressProblem::MISMATCHING_VALUE) &&
238 !ValueMatchesPrefixRegex(
239 address.postal_code, rule.GetPostalCodeFormat())) {
240 problems->push_back(AddressProblem(
242 AddressProblem::MISMATCHING_VALUE,
243 country_rule.GetInvalidPostalCodeMessageId()));
246 ruleset = ruleset->GetSubRegionRuleset(sub_key);
252 // AddressValidator implementation.
253 virtual Status GetSuggestions(const AddressData& user_input,
254 AddressField focused_field,
255 size_t suggestions_limit,
256 std::vector<AddressData>* suggestions) const {
257 std::map<std::string, Ruleset*>::const_iterator ruleset_it =
258 rules_.find(user_input.country_code);
260 if (ruleset_it == rules_.end()) {
262 loading_rules_.find(user_input.country_code) != loading_rules_.end()
267 if (suggestions == NULL) {
270 suggestions->clear();
272 assert(ruleset_it->second != NULL);
274 // Do not suggest anything if the user is typing in the field for which
275 // there's no validation data.
276 if (focused_field != POSTAL_CODE &&
277 (focused_field < ADMIN_AREA || focused_field > DEPENDENT_LOCALITY)) {
281 // Do not suggest anything if the user input is empty.
282 if (user_input.GetFieldValue(focused_field).empty()) {
286 const Ruleset& country_ruleset = *ruleset_it->second;
287 const Rule& country_rule =
288 country_ruleset.GetLanguageCodeRule(user_input.language_code);
290 // Do not suggest anything if the user is typing the postal code that is not
291 // valid for the country.
292 if (!user_input.postal_code.empty() &&
293 focused_field == POSTAL_CODE &&
294 !country_rule.GetPostalCodeFormat().empty() &&
295 !ValueMatchesPrefixRegex(
296 user_input.postal_code, country_rule.GetPostalCodeFormat())) {
300 // Initialize the prefix search index lazily.
301 if (!ruleset_it->second->prefix_search_index_ready()) {
302 ruleset_it->second->BuildPrefixSearchIndex();
305 if (focused_field != POSTAL_CODE &&
306 focused_field > country_ruleset.deepest_ruleset_level()) {
310 // Determine the most specific address field that can be suggested.
311 AddressField suggestion_field = focused_field != POSTAL_CODE
312 ? focused_field : DEPENDENT_LOCALITY;
313 if (suggestion_field > country_ruleset.deepest_ruleset_level()) {
314 suggestion_field = country_ruleset.deepest_ruleset_level();
316 if (focused_field != POSTAL_CODE) {
317 while (user_input.GetFieldValue(suggestion_field).empty() &&
318 suggestion_field > ADMIN_AREA) {
319 suggestion_field = static_cast<AddressField>(suggestion_field - 1);
323 // Find all rulesets that match user input.
324 AddressFieldRulesets rulesets;
325 for (int i = ADMIN_AREA; i <= suggestion_field; ++i) {
326 for (int j = Rule::KEY; j <= Rule::LATIN_NAME; ++j) {
327 AddressField address_field = static_cast<AddressField>(i);
328 Rule::IdentityField rule_field = static_cast<Rule::IdentityField>(j);
330 // Find all rulesets at |address_field| level whose |rule_field| starts
331 // with user input value.
332 country_ruleset.FindRulesetsByPrefix(
333 user_input.language_code, address_field, rule_field,
334 user_input.GetFieldValue(address_field),
335 &rulesets[address_field][rule_field]);
337 // Filter out the rulesets whose parents do not match the user input.
338 if (address_field > ADMIN_AREA) {
339 AddressField parent_field =
340 static_cast<AddressField>(address_field - 1);
341 Rulesets rulesets_with_parents;
343 rulesets[address_field][rule_field].begin(),
344 rulesets[address_field][rule_field].end(),
345 ParentedRulesetCollector(rulesets[parent_field][rule_field],
346 &rulesets_with_parents));
347 rulesets[address_field][rule_field].swap(rulesets_with_parents);
352 // Determine the fields in the rules that match the user input. This
353 // operation converts a map of Rule::IdentityField value -> Ruleset into a
354 // map of Ruleset -> Rule::IdentityField bitset.
355 std::map<const Ruleset*, MatchingRuleFields> suggestion_rulesets;
356 for (IdentityFieldRulesets::const_iterator rule_field_it =
357 rulesets[suggestion_field].begin();
358 rule_field_it != rulesets[suggestion_field].end();
360 const Rule::IdentityField rule_identity_field = rule_field_it->first;
361 for (Rulesets::const_iterator ruleset_it = rule_field_it->second.begin();
362 ruleset_it != rule_field_it->second.end();
364 suggestion_rulesets[*ruleset_it].set(rule_identity_field);
368 // Generate suggestions based on the rulesets. Use a Rule::IdentityField
369 // from the bitset to generate address field values.
370 for (std::map<const Ruleset*, MatchingRuleFields>::const_iterator
371 suggestion_it = suggestion_rulesets.begin();
372 suggestion_it != suggestion_rulesets.end();
374 const Ruleset& ruleset = *suggestion_it->first;
375 const Rule& rule = ruleset.GetLanguageCodeRule(user_input.language_code);
376 const MatchingRuleFields& matching_rule_fields = suggestion_it->second;
378 // Do not suggest this region if the postal code in user input does not
380 if (!user_input.postal_code.empty() &&
381 !rule.GetPostalCodeFormat().empty() &&
382 !ValueMatchesPrefixRegex(
383 user_input.postal_code, rule.GetPostalCodeFormat())) {
387 // Do not add more suggestions than |suggestions_limit|.
388 if (suggestions->size() >= suggestions_limit) {
389 suggestions->clear();
393 // If the user's language is not one of the supported languages of a
394 // country that has latinized names for its regions, then prefer to
395 // suggest the latinized region names. If the user types in local script
396 // instead, then the local script names will be suggested.
397 Rule::IdentityField rule_field = Rule::KEY;
398 if (!country_rule.GetLanguage().empty() &&
399 country_rule.GetLanguage() != user_input.language_code &&
400 !rule.GetLatinName().empty() &&
401 matching_rule_fields.test(Rule::LATIN_NAME)) {
402 rule_field = Rule::LATIN_NAME;
403 } else if (matching_rule_fields.test(Rule::KEY)) {
404 rule_field = Rule::KEY;
405 } else if (matching_rule_fields.test(Rule::NAME)) {
406 rule_field = Rule::NAME;
407 } else if (matching_rule_fields.test(Rule::LATIN_NAME)) {
408 rule_field = Rule::LATIN_NAME;
413 AddressData suggestion;
414 suggestion.country_code = user_input.country_code;
415 suggestion.postal_code = user_input.postal_code;
417 // Traverse the tree of rulesets from the most specific |ruleset| to the
418 // country-wide "root" of the tree. Use the region names found at each of
419 // the levels of the ruleset tree to build the |suggestion|.
420 for (const Ruleset* suggestion_ruleset = &ruleset;
421 suggestion_ruleset->parent() != NULL;
422 suggestion_ruleset = suggestion_ruleset->parent()) {
423 const Rule& suggestion_rule =
424 suggestion_ruleset->GetLanguageCodeRule(user_input.language_code);
425 suggestion.SetFieldValue(suggestion_ruleset->field(),
426 suggestion_rule.GetIdentityField(rule_field));
429 suggestions->push_back(suggestion);
435 // AddressValidator implementation.
436 virtual bool CanonicalizeAdministrativeArea(AddressData* address_data) const {
437 std::map<std::string, Ruleset*>::const_iterator ruleset_it =
438 rules_.find(address_data->country_code);
439 if (ruleset_it == rules_.end()) {
443 ruleset_it->second->GetLanguageCodeRule(address_data->language_code);
445 return rule.CanonicalizeSubKey(address_data->administrative_area,
446 true, // Keep input latin.
447 &address_data->administrative_area);
451 // Called when CountryRulesAggregator::AggregateRules loads the |ruleset| for
452 // the |country_code|.
453 void OnRulesLoaded(bool success,
454 const std::string& country_code,
455 scoped_ptr<Ruleset> ruleset) {
456 assert(rules_.find(country_code) == rules_.end());
457 loading_rules_.erase(country_code);
459 assert(ruleset != NULL);
460 assert(ruleset->field() == COUNTRY);
461 rules_[country_code] = ruleset.release();
463 if (load_rules_delegate_ != NULL) {
464 load_rules_delegate_->OnAddressValidationRulesLoaded(
465 country_code, success);
469 // Adds problems for just the required fields portion of |country_rule|.
470 void EnforceRequiredFields(const Rule& country_rule,
471 const AddressData& address,
472 const AddressProblemFilter& filter,
473 AddressProblems* problems) const {
474 assert(problems != NULL);
475 for (std::vector<AddressField>::const_iterator
476 field_it = country_rule.GetRequired().begin();
477 field_it != country_rule.GetRequired().end();
479 bool field_empty = *field_it != STREET_ADDRESS
480 ? address.GetFieldValue(*field_it).empty()
481 : IsEmptyStreetAddress(address.address_lines);
484 filter, *field_it, AddressProblem::MISSING_REQUIRED_FIELD)) {
485 problems->push_back(AddressProblem(
487 AddressProblem::MISSING_REQUIRED_FIELD,
488 IDS_LIBADDRESSINPUT_I18N_MISSING_REQUIRED_FIELD));
493 // Loads the ruleset for a country code.
494 CountryRulesAggregator aggregator_;
496 // An optional delegate to be invoked when a ruleset finishes loading.
497 LoadRulesDelegate* load_rules_delegate_;
499 // A set of country codes for which a ruleset is being loaded.
500 std::set<std::string> loading_rules_;
502 // A mapping of a country code to the owned ruleset for that country code.
503 std::map<std::string, Ruleset*> rules_;
505 DISALLOW_COPY_AND_ASSIGN(AddressValidatorImpl);
510 AddressValidator::~AddressValidator() {}
513 scoped_ptr<AddressValidator> AddressValidator::Build(
514 scoped_ptr<Downloader> downloader,
515 scoped_ptr<Storage> storage,
516 LoadRulesDelegate* load_rules_delegate) {
517 return scoped_ptr<AddressValidator>(new AddressValidatorImpl(
518 VALIDATION_DATA_URL, downloader.Pass(), storage.Pass(),
519 load_rules_delegate));
522 scoped_ptr<AddressValidator> BuildAddressValidatorForTesting(
523 const std::string& validation_data_url,
524 scoped_ptr<Downloader> downloader,
525 scoped_ptr<Storage> storage,
526 LoadRulesDelegate* load_rules_delegate) {
527 return scoped_ptr<AddressValidator>(new AddressValidatorImpl(
528 validation_data_url, downloader.Pass(), storage.Pass(),
529 load_rules_delegate));
532 } // namespace addressinput