1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/autofill/core/browser/address_field.h"
9 #include "base/logging.h"
10 #include "base/memory/scoped_ptr.h"
11 #include "base/strings/string16.h"
12 #include "base/strings/string_util.h"
13 #include "base/strings/utf_string_conversions.h"
14 #include "components/autofill/core/browser/autofill_field.h"
15 #include "components/autofill/core/browser/autofill_regex_constants.h"
16 #include "components/autofill/core/browser/autofill_scanner.h"
17 #include "components/autofill/core/browser/field_types.h"
18 #include "ui/base/l10n/l10n_util.h"
20 using base::UTF8ToUTF16;
24 FormField* AddressField::Parse(AutofillScanner* scanner) {
28 scoped_ptr<AddressField> address_field(new AddressField);
29 const AutofillField* const initial_field = scanner->Cursor();
30 size_t saved_cursor = scanner->SaveCursor();
32 base::string16 attention_ignored = UTF8ToUTF16(autofill::kAttentionIgnoredRe);
33 base::string16 region_ignored = UTF8ToUTF16(autofill::kRegionIgnoredRe);
35 // Allow address fields to appear in any order.
36 size_t begin_trailing_non_labeled_fields = 0;
37 bool has_trailing_non_labeled_fields = false;
38 while (!scanner->IsEnd()) {
39 const size_t cursor = scanner->SaveCursor();
40 if (address_field->ParseAddressLines(scanner) ||
41 address_field->ParseCity(scanner) ||
42 address_field->ParseState(scanner) ||
43 address_field->ParseZipCode(scanner) ||
44 address_field->ParseCountry(scanner) ||
45 address_field->ParseCompany(scanner)) {
46 has_trailing_non_labeled_fields = false;
48 } else if (ParseField(scanner, attention_ignored, NULL) ||
49 ParseField(scanner, region_ignored, NULL)) {
50 // We ignore the following:
52 // * Province/Region/Other.
54 } else if (scanner->Cursor() != initial_field &&
55 ParseEmptyLabel(scanner, NULL)) {
56 // Ignore non-labeled fields within an address; the page
57 // MapQuest Driving Directions North America.html contains such a field.
58 // We only ignore such fields after we've parsed at least one other field;
59 // otherwise we'd effectively parse address fields before other field
60 // types after any non-labeled fields, and we want email address fields to
61 // have precedence since some pages contain fields labeled
63 if (!has_trailing_non_labeled_fields) {
64 has_trailing_non_labeled_fields = true;
65 begin_trailing_non_labeled_fields = cursor;
75 // If we have identified any address fields in this field then it should be
76 // added to the list of fields.
77 if (address_field->company_ ||
78 address_field->address1_ ||
79 address_field->address2_ ||
80 address_field->street_address_ ||
81 address_field->city_ ||
82 address_field->state_ ||
83 address_field->zip_ ||
84 address_field->zip4_ ||
85 address_field->country_) {
86 // Don't slurp non-labeled fields at the end into the address.
87 if (has_trailing_non_labeled_fields)
88 scanner->RewindTo(begin_trailing_non_labeled_fields);
90 return address_field.release();
93 scanner->RewindTo(saved_cursor);
97 AddressField::AddressField()
101 street_address_(NULL),
109 bool AddressField::ClassifyField(ServerFieldTypeMap* map) const {
110 // The page can request the address lines as a single textarea input or as
111 // multiple text fields (or not at all), but it shouldn't be possible to
113 DCHECK(!(address1_ && street_address_));
114 DCHECK(!(address2_ && street_address_));
116 return AddClassification(company_, COMPANY_NAME, map) &&
117 AddClassification(address1_, ADDRESS_HOME_LINE1, map) &&
118 AddClassification(address2_, ADDRESS_HOME_LINE2, map) &&
119 AddClassification(street_address_, ADDRESS_HOME_STREET_ADDRESS, map) &&
120 AddClassification(city_, ADDRESS_HOME_CITY, map) &&
121 AddClassification(state_, ADDRESS_HOME_STATE, map) &&
122 AddClassification(zip_, ADDRESS_HOME_ZIP, map) &&
123 AddClassification(country_, ADDRESS_HOME_COUNTRY, map);
126 bool AddressField::ParseCompany(AutofillScanner* scanner) {
127 if (company_ && !company_->IsEmpty())
130 return ParseField(scanner, UTF8ToUTF16(autofill::kCompanyRe), &company_);
133 bool AddressField::ParseAddressLines(AutofillScanner* scanner) {
134 // We only match the string "address" in page text, not in element names,
135 // because sometimes every element in a group of address fields will have
136 // a name containing the string "address"; for example, on the page
137 // Kohl's - Register Billing Address.html the text element labeled "city"
138 // has the name "BILL_TO_ADDRESS<>city". We do match address labels
139 // such as "address1", which appear as element names on various pages (eg
140 // AmericanGirl-Registration.html, BloomingdalesBilling.html,
141 // EBay Registration Enter Information.html).
142 if (address1_ || street_address_)
145 // Ignore "Address Lookup" field. http://crbug.com/427622
146 if (ParseField(scanner, base::UTF8ToUTF16(autofill::kAddressLookupRe), NULL))
149 base::string16 pattern = UTF8ToUTF16(autofill::kAddressLine1Re);
150 base::string16 label_pattern = UTF8ToUTF16(autofill::kAddressLine1LabelRe);
151 if (!ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT, &address1_) &&
152 !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
154 !ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT | MATCH_TEXT_AREA,
156 !ParseFieldSpecifics(scanner, label_pattern,
157 MATCH_LABEL | MATCH_TEXT_AREA,
162 // Optionally parse more address lines, which may have empty labels.
163 pattern = UTF8ToUTF16(autofill::kAddressLine2Re);
164 label_pattern = UTF8ToUTF16(autofill::kAddressLine2LabelRe);
165 if (!street_address_ &&
166 !ParseEmptyLabel(scanner, &address2_) &&
167 !ParseField(scanner, pattern, &address2_)) {
168 ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
172 // Try for surplus lines, which we will promptly discard.
173 // Some pages have 3 address lines (eg SharperImageModifyAccount.html)
174 // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)!
176 pattern = UTF8ToUTF16(autofill::kAddressLinesExtraRe);
177 while (ParseField(scanner, pattern, NULL)) {
178 // Consumed a surplus line, try for another.
185 bool AddressField::ParseCountry(AutofillScanner* scanner) {
186 // Parse a country. The occasional page (e.g.
187 // Travelocity_New Member Information1.html) calls this a "location".
188 if (country_ && !country_->IsEmpty())
191 return ParseFieldSpecifics(scanner,
192 UTF8ToUTF16(autofill::kCountryRe),
193 MATCH_DEFAULT | MATCH_SELECT,
197 bool AddressField::ParseZipCode(AutofillScanner* scanner) {
198 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this
199 // is called a "post code".
203 // Some sites use type="tel" for zip fields (to get a numerical input).
204 // http://crbug.com/426958
205 if (!ParseFieldSpecifics(scanner,
206 UTF8ToUTF16(autofill::kZipCodeRe),
207 MATCH_DEFAULT | MATCH_TELEPHONE,
212 // Look for a zip+4, whose field name will also often contain
213 // the substring "zip".
214 ParseFieldSpecifics(scanner,
215 UTF8ToUTF16(autofill::kZip4Re),
216 MATCH_DEFAULT | MATCH_TELEPHONE,
221 bool AddressField::ParseCity(AutofillScanner* scanner) {
222 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use
227 // Select fields are allowed here. This occurs on top-100 site rediff.com.
228 return ParseFieldSpecifics(scanner,
229 UTF8ToUTF16(autofill::kCityRe),
230 MATCH_DEFAULT | MATCH_SELECT,
234 bool AddressField::ParseState(AutofillScanner* scanner) {
238 return ParseFieldSpecifics(scanner,
239 UTF8ToUTF16(autofill::kStateRe),
240 MATCH_DEFAULT | MATCH_SELECT,
244 } // namespace autofill