#include <libaddressinput/util/scoped_ptr.h>
#include <algorithm>
+#include <bitset>
#include <cassert>
#include <cstddef>
#include <map>
#include <set>
#include <string>
+#include <utility>
+#include <vector>
#include <re2/re2.h>
#include "country_rules_aggregator.h"
#include "grit/libaddressinput_strings.h"
+#include "region_data_constants.h"
#include "retriever.h"
#include "rule.h"
#include "ruleset.h"
#include "util/stl_util.h"
+#include "util/string_util.h"
namespace i18n {
namespace addressinput {
namespace {
+// A type to store a list of pointers to Ruleset objects.
+typedef std::set<const Ruleset*> Rulesets;
+
+// A type to map the field in a rule to rulesets.
+typedef std::map<Rule::IdentityField, Rulesets> IdentityFieldRulesets;
+
+// A type to map the field in an address to rulesets.
+typedef std::map<AddressField, IdentityFieldRulesets> AddressFieldRulesets;
+
+// A set of Rule::IdentityField values that match user input.
+typedef std::bitset<Rule::IDENTITY_FIELDS_SIZE> MatchingRuleFields;
+
+// Returns true if |prefix_regex| matches a prefix of |value|. For example,
+// "(90|81)" matches a prefix of "90291".
+bool ValueMatchesPrefixRegex(const std::string& value,
+ const std::string& prefix_regex) {
+ return RE2::FullMatch(value, "^(" + prefix_regex + ").*");
+}
+
// Returns true if the filter is empty (all problems allowed) or contains the
// |field|->|problem| mapping (explicitly allowed).
bool FilterAllows(const AddressProblemFilter& filter,
return true;
}
+// Collects rulesets based on whether they have a parent in the given list.
+class ParentedRulesetCollector {
+ public:
+ // Retains a reference to both of the parameters. Does not make a copy of
+ // |parent_rulesets|. Does not take ownership of |rulesets_with_parents|. The
+ // |rulesets_with_parents| parameter should not be NULL.
+ ParentedRulesetCollector(const Rulesets& parent_rulesets,
+ Rulesets* rulesets_with_parents)
+ : parent_rulesets_(parent_rulesets),
+ rulesets_with_parents_(rulesets_with_parents) {
+ assert(rulesets_with_parents_ != NULL);
+ }
+
+ ~ParentedRulesetCollector() {}
+
+ // Adds |ruleset_to_test| to the |rulesets_with_parents_| collection, if the
+ // given ruleset has a parent in |parent_rulesets_|. The |ruleset_to_test|
+ // parameter should not be NULL.
+ void operator()(const Ruleset* ruleset_to_test) {
+ assert(ruleset_to_test != NULL);
+ if (parent_rulesets_.find(ruleset_to_test->parent()) !=
+ parent_rulesets_.end()) {
+ rulesets_with_parents_->insert(ruleset_to_test);
+ }
+ }
+
+ private:
+ const Rulesets& parent_rulesets_;
+ Rulesets* rulesets_with_parents_;
+};
+
// Validates AddressData structure.
class AddressValidatorImpl : public AddressValidator {
public:
// Takes ownership of |downloader| and |storage|. Does not take ownership of
// |load_rules_delegate|.
- AddressValidatorImpl(scoped_ptr<Downloader> downloader,
+ AddressValidatorImpl(const std::string& validation_data_url,
+ scoped_ptr<Downloader> downloader,
scoped_ptr<Storage> storage,
LoadRulesDelegate* load_rules_delegate)
: aggregator_(scoped_ptr<Retriever>(new Retriever(
- VALIDATION_DATA_URL,
+ validation_data_url,
downloader.Pass(),
storage.Pass()))),
load_rules_delegate_(load_rules_delegate),
const AddressData& address,
const AddressProblemFilter& filter,
AddressProblems* problems) const {
- std::map<std::string, const Ruleset*>::const_iterator ruleset_it =
+ std::map<std::string, Ruleset*>::const_iterator ruleset_it =
rules_.find(address.country_code);
+
+ // We can still validate the required fields even if the full ruleset isn't
+ // ready.
if (ruleset_it == rules_.end()) {
+ if (problems != NULL) {
+ Rule rule;
+ rule.CopyFrom(Rule::GetDefault());
+ if (rule.ParseSerializedRule(
+ RegionDataConstants::GetRegionData(address.country_code))) {
+ EnforceRequiredFields(rule, address, filter, problems);
+ }
+ }
+
return loading_rules_.find(address.country_code) != loading_rules_.end()
? RULES_NOT_READY
: RULES_UNAVAILABLE;
}
+ if (problems == NULL) {
+ return SUCCESS;
+ }
+
const Ruleset* ruleset = ruleset_it->second;
assert(ruleset != NULL);
const Rule& country_rule =
ruleset->GetLanguageCodeRule(address.language_code);
-
- // Validate required fields.
- for (std::vector<AddressField>::const_iterator
- field_it = country_rule.GetRequired().begin();
- field_it != country_rule.GetRequired().end();
- ++field_it) {
- bool field_empty = *field_it != STREET_ADDRESS
- ? address.GetFieldValue(*field_it).empty()
- : IsEmptyStreetAddress(address.address_lines);
- if (field_empty &&
- FilterAllows(
- filter, *field_it, AddressProblem::MISSING_REQUIRED_FIELD)) {
- problems->push_back(AddressProblem(
- *field_it,
- AddressProblem::MISSING_REQUIRED_FIELD,
- IDS_LIBADDRESSINPUT_I18N_MISSING_REQUIRED_FIELD));
- }
- }
+ EnforceRequiredFields(country_rule, address, filter, problems);
// Validate general postal code format. A country-level rule specifies the
// regular expression for the whole postal code.
// Validate the field values, e.g. state names in US.
AddressField sub_field_type =
static_cast<AddressField>(ruleset->field() + 1);
- const std::string& sub_field = address.GetFieldValue(sub_field_type);
- const std::vector<std::string>& sub_keys = rule.GetSubKeys();
- if (!sub_field.empty() &&
- !sub_keys.empty() &&
+ std::string sub_key;
+ const std::string& user_input = address.GetFieldValue(sub_field_type);
+ if (!user_input.empty() &&
FilterAllows(filter, sub_field_type, AddressProblem::UNKNOWN_VALUE) &&
- std::find(sub_keys.begin(), sub_keys.end(), sub_field) ==
- sub_keys.end()) {
+ !rule.CanonicalizeSubKey(user_input, false, &sub_key)) {
problems->push_back(AddressProblem(
sub_field_type,
AddressProblem::UNKNOWN_VALUE,
// Validate sub-region specific postal code format. A sub-region specifies
// the regular expression for a prefix of the postal code.
- int match_position = -1;
if (ruleset->field() > COUNTRY &&
!address.postal_code.empty() &&
!rule.GetPostalCodeFormat().empty() &&
FilterAllows(filter,
POSTAL_CODE,
AddressProblem::MISMATCHING_VALUE) &&
- (!RE2::PartialMatch(address.postal_code,
- rule.GetPostalCodeFormat(),
- &match_position) ||
- match_position != 0)) {
+ !ValueMatchesPrefixRegex(
+ address.postal_code, rule.GetPostalCodeFormat())) {
problems->push_back(AddressProblem(
POSTAL_CODE,
AddressProblem::MISMATCHING_VALUE,
country_rule.GetInvalidPostalCodeMessageId()));
}
- ruleset = ruleset->GetSubRegionRuleset(sub_field);
+ ruleset = ruleset->GetSubRegionRuleset(sub_key);
+ }
+
+ return SUCCESS;
+ }
+
+ // AddressValidator implementation.
+ virtual Status GetSuggestions(const AddressData& user_input,
+ AddressField focused_field,
+ size_t suggestions_limit,
+ std::vector<AddressData>* suggestions) const {
+ std::map<std::string, Ruleset*>::const_iterator ruleset_it =
+ rules_.find(user_input.country_code);
+
+ if (ruleset_it == rules_.end()) {
+ return
+ loading_rules_.find(user_input.country_code) != loading_rules_.end()
+ ? RULES_NOT_READY
+ : RULES_UNAVAILABLE;
+ }
+
+ if (suggestions == NULL) {
+ return SUCCESS;
+ }
+ suggestions->clear();
+
+ assert(ruleset_it->second != NULL);
+
+ // Do not suggest anything if the user is typing in the field for which
+ // there's no validation data.
+ if (focused_field != POSTAL_CODE &&
+ (focused_field < ADMIN_AREA || focused_field > DEPENDENT_LOCALITY)) {
+ return SUCCESS;
+ }
+
+ // Do not suggest anything if the user input is empty.
+ if (user_input.GetFieldValue(focused_field).empty()) {
+ return SUCCESS;
+ }
+
+ const Ruleset& country_ruleset = *ruleset_it->second;
+ const Rule& country_rule =
+ country_ruleset.GetLanguageCodeRule(user_input.language_code);
+
+ // Do not suggest anything if the user is typing the postal code that is not
+ // valid for the country.
+ if (!user_input.postal_code.empty() &&
+ focused_field == POSTAL_CODE &&
+ !country_rule.GetPostalCodeFormat().empty() &&
+ !ValueMatchesPrefixRegex(
+ user_input.postal_code, country_rule.GetPostalCodeFormat())) {
+ return SUCCESS;
+ }
+
+ // Initialize the prefix search index lazily.
+ if (!ruleset_it->second->prefix_search_index_ready()) {
+ ruleset_it->second->BuildPrefixSearchIndex();
+ }
+
+ if (focused_field != POSTAL_CODE &&
+ focused_field > country_ruleset.deepest_ruleset_level()) {
+ return SUCCESS;
+ }
+
+ // Determine the most specific address field that can be suggested.
+ AddressField suggestion_field = focused_field != POSTAL_CODE
+ ? focused_field : DEPENDENT_LOCALITY;
+ if (suggestion_field > country_ruleset.deepest_ruleset_level()) {
+ suggestion_field = country_ruleset.deepest_ruleset_level();
+ }
+ if (focused_field != POSTAL_CODE) {
+ while (user_input.GetFieldValue(suggestion_field).empty() &&
+ suggestion_field > ADMIN_AREA) {
+ suggestion_field = static_cast<AddressField>(suggestion_field - 1);
+ }
+ }
+
+ // Find all rulesets that match user input.
+ AddressFieldRulesets rulesets;
+ for (int i = ADMIN_AREA; i <= suggestion_field; ++i) {
+ for (int j = Rule::KEY; j <= Rule::LATIN_NAME; ++j) {
+ AddressField address_field = static_cast<AddressField>(i);
+ Rule::IdentityField rule_field = static_cast<Rule::IdentityField>(j);
+
+ // Find all rulesets at |address_field| level whose |rule_field| starts
+ // with user input value.
+ country_ruleset.FindRulesetsByPrefix(
+ user_input.language_code, address_field, rule_field,
+ user_input.GetFieldValue(address_field),
+ &rulesets[address_field][rule_field]);
+
+ // Filter out the rulesets whose parents do not match the user input.
+ if (address_field > ADMIN_AREA) {
+ AddressField parent_field =
+ static_cast<AddressField>(address_field - 1);
+ Rulesets rulesets_with_parents;
+ std::for_each(
+ rulesets[address_field][rule_field].begin(),
+ rulesets[address_field][rule_field].end(),
+ ParentedRulesetCollector(rulesets[parent_field][rule_field],
+ &rulesets_with_parents));
+ rulesets[address_field][rule_field].swap(rulesets_with_parents);
+ }
+ }
+ }
+
+ // Determine the fields in the rules that match the user input. This
+ // operation converts a map of Rule::IdentityField value -> Ruleset into a
+ // map of Ruleset -> Rule::IdentityField bitset.
+ std::map<const Ruleset*, MatchingRuleFields> suggestion_rulesets;
+ for (IdentityFieldRulesets::const_iterator rule_field_it =
+ rulesets[suggestion_field].begin();
+ rule_field_it != rulesets[suggestion_field].end();
+ ++rule_field_it) {
+ const Rule::IdentityField rule_identity_field = rule_field_it->first;
+ for (Rulesets::const_iterator ruleset_it = rule_field_it->second.begin();
+ ruleset_it != rule_field_it->second.end();
+ ++ruleset_it) {
+ suggestion_rulesets[*ruleset_it].set(rule_identity_field);
+ }
+ }
+
+ // Generate suggestions based on the rulesets. Use a Rule::IdentityField
+ // from the bitset to generate address field values.
+ for (std::map<const Ruleset*, MatchingRuleFields>::const_iterator
+ suggestion_it = suggestion_rulesets.begin();
+ suggestion_it != suggestion_rulesets.end();
+ ++suggestion_it) {
+ const Ruleset& ruleset = *suggestion_it->first;
+ const Rule& rule = ruleset.GetLanguageCodeRule(user_input.language_code);
+ const MatchingRuleFields& matching_rule_fields = suggestion_it->second;
+
+ // Do not suggest this region if the postal code in user input does not
+ // match it.
+ if (!user_input.postal_code.empty() &&
+ !rule.GetPostalCodeFormat().empty() &&
+ !ValueMatchesPrefixRegex(
+ user_input.postal_code, rule.GetPostalCodeFormat())) {
+ continue;
+ }
+
+ // Do not add more suggestions than |suggestions_limit|.
+ if (suggestions->size() >= suggestions_limit) {
+ suggestions->clear();
+ return SUCCESS;
+ }
+
+ // If the user's language is not one of the supported languages of a
+ // country that has latinized names for its regions, then prefer to
+ // suggest the latinized region names. If the user types in local script
+ // instead, then the local script names will be suggested.
+ Rule::IdentityField rule_field = Rule::KEY;
+ if (!country_rule.GetLanguage().empty() &&
+ country_rule.GetLanguage() != user_input.language_code &&
+ !rule.GetLatinName().empty() &&
+ matching_rule_fields.test(Rule::LATIN_NAME)) {
+ rule_field = Rule::LATIN_NAME;
+ } else if (matching_rule_fields.test(Rule::KEY)) {
+ rule_field = Rule::KEY;
+ } else if (matching_rule_fields.test(Rule::NAME)) {
+ rule_field = Rule::NAME;
+ } else if (matching_rule_fields.test(Rule::LATIN_NAME)) {
+ rule_field = Rule::LATIN_NAME;
+ } else {
+ assert(false);
+ }
+
+ AddressData suggestion;
+ suggestion.country_code = user_input.country_code;
+ suggestion.postal_code = user_input.postal_code;
+
+ // Traverse the tree of rulesets from the most specific |ruleset| to the
+ // country-wide "root" of the tree. Use the region names found at each of
+ // the levels of the ruleset tree to build the |suggestion|.
+ for (const Ruleset* suggestion_ruleset = &ruleset;
+ suggestion_ruleset->parent() != NULL;
+ suggestion_ruleset = suggestion_ruleset->parent()) {
+ const Rule& suggestion_rule =
+ suggestion_ruleset->GetLanguageCodeRule(user_input.language_code);
+ suggestion.SetFieldValue(suggestion_ruleset->field(),
+ suggestion_rule.GetIdentityField(rule_field));
+ }
+
+ suggestions->push_back(suggestion);
}
return SUCCESS;
}
+ // AddressValidator implementation.
+ virtual bool CanonicalizeAdministrativeArea(AddressData* address_data) const {
+ std::map<std::string, Ruleset*>::const_iterator ruleset_it =
+ rules_.find(address_data->country_code);
+ if (ruleset_it == rules_.end()) {
+ return false;
+ }
+ const Rule& rule =
+ ruleset_it->second->GetLanguageCodeRule(address_data->language_code);
+
+ return rule.CanonicalizeSubKey(address_data->administrative_area,
+ true, // Keep input latin.
+ &address_data->administrative_area);
+ }
+
private:
// Called when CountryRulesAggregator::AggregateRules loads the |ruleset| for
// the |country_code|.
}
}
+ // Adds problems for just the required fields portion of |country_rule|.
+ void EnforceRequiredFields(const Rule& country_rule,
+ const AddressData& address,
+ const AddressProblemFilter& filter,
+ AddressProblems* problems) const {
+ assert(problems != NULL);
+ for (std::vector<AddressField>::const_iterator
+ field_it = country_rule.GetRequired().begin();
+ field_it != country_rule.GetRequired().end();
+ ++field_it) {
+ bool field_empty = *field_it != STREET_ADDRESS
+ ? address.GetFieldValue(*field_it).empty()
+ : IsEmptyStreetAddress(address.address_lines);
+ if (field_empty &&
+ FilterAllows(
+ filter, *field_it, AddressProblem::MISSING_REQUIRED_FIELD)) {
+ problems->push_back(AddressProblem(
+ *field_it,
+ AddressProblem::MISSING_REQUIRED_FIELD,
+ IDS_LIBADDRESSINPUT_I18N_MISSING_REQUIRED_FIELD));
+ }
+ }
+ }
+
// Loads the ruleset for a country code.
CountryRulesAggregator aggregator_;
std::set<std::string> loading_rules_;
// A mapping of a country code to the owned ruleset for that country code.
- std::map<std::string, const Ruleset*> rules_;
+ std::map<std::string, Ruleset*> rules_;
DISALLOW_COPY_AND_ASSIGN(AddressValidatorImpl);
};
scoped_ptr<Storage> storage,
LoadRulesDelegate* load_rules_delegate) {
return scoped_ptr<AddressValidator>(new AddressValidatorImpl(
- downloader.Pass(), storage.Pass(), load_rules_delegate));
+ VALIDATION_DATA_URL, downloader.Pass(), storage.Pass(),
+ load_rules_delegate));
+}
+
+scoped_ptr<AddressValidator> BuildAddressValidatorForTesting(
+ const std::string& validation_data_url,
+ scoped_ptr<Downloader> downloader,
+ scoped_ptr<Storage> storage,
+ LoadRulesDelegate* load_rules_delegate) {
+ return scoped_ptr<AddressValidator>(new AddressValidatorImpl(
+ validation_data_url, downloader.Pass(), storage.Pass(),
+ load_rules_delegate));
}
} // namespace addressinput