Update To 11.40.268.0
[platform/framework/web/crosswalk.git] / src / components / autofill / core / browser / address_field.cc
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/autofill/core/browser/address_field.h"
6
7 #include <stddef.h>
8
9 #include "base/logging.h"
10 #include "base/memory/scoped_ptr.h"
11 #include "base/strings/string16.h"
12 #include "base/strings/string_util.h"
13 #include "base/strings/utf_string_conversions.h"
14 #include "components/autofill/core/browser/autofill_field.h"
15 #include "components/autofill/core/browser/autofill_regex_constants.h"
16 #include "components/autofill/core/browser/autofill_scanner.h"
17 #include "components/autofill/core/browser/field_types.h"
18 #include "ui/base/l10n/l10n_util.h"
19
20 using base::UTF8ToUTF16;
21
22 namespace autofill {
23
24 FormField* AddressField::Parse(AutofillScanner* scanner) {
25   if (scanner->IsEnd())
26     return NULL;
27
28   scoped_ptr<AddressField> address_field(new AddressField);
29   const AutofillField* const initial_field = scanner->Cursor();
30   size_t saved_cursor = scanner->SaveCursor();
31
32   base::string16 attention_ignored = UTF8ToUTF16(autofill::kAttentionIgnoredRe);
33   base::string16 region_ignored = UTF8ToUTF16(autofill::kRegionIgnoredRe);
34
35   // Allow address fields to appear in any order.
36   size_t begin_trailing_non_labeled_fields = 0;
37   bool has_trailing_non_labeled_fields = false;
38   while (!scanner->IsEnd()) {
39     const size_t cursor = scanner->SaveCursor();
40     if (address_field->ParseAddressLines(scanner) ||
41         address_field->ParseCity(scanner) ||
42         address_field->ParseState(scanner) ||
43         address_field->ParseZipCode(scanner) ||
44         address_field->ParseCountry(scanner) ||
45         address_field->ParseCompany(scanner)) {
46       has_trailing_non_labeled_fields = false;
47       continue;
48     } else if (ParseField(scanner, attention_ignored, NULL) ||
49                ParseField(scanner, region_ignored, NULL)) {
50       // We ignore the following:
51       // * Attention.
52       // * Province/Region/Other.
53       continue;
54     } else if (scanner->Cursor() != initial_field &&
55                ParseEmptyLabel(scanner, NULL)) {
56       // Ignore non-labeled fields within an address; the page
57       // MapQuest Driving Directions North America.html contains such a field.
58       // We only ignore such fields after we've parsed at least one other field;
59       // otherwise we'd effectively parse address fields before other field
60       // types after any non-labeled fields, and we want email address fields to
61       // have precedence since some pages contain fields labeled
62       // "Email address".
63       if (!has_trailing_non_labeled_fields) {
64         has_trailing_non_labeled_fields = true;
65         begin_trailing_non_labeled_fields = cursor;
66       }
67
68       continue;
69     } else {
70       // No field found.
71       break;
72     }
73   }
74
75   // If we have identified any address fields in this field then it should be
76   // added to the list of fields.
77   if (address_field->company_ ||
78       address_field->address1_ ||
79       address_field->address2_ ||
80       address_field->street_address_ ||
81       address_field->city_ ||
82       address_field->state_ ||
83       address_field->zip_ ||
84       address_field->zip4_ ||
85       address_field->country_) {
86     // Don't slurp non-labeled fields at the end into the address.
87     if (has_trailing_non_labeled_fields)
88       scanner->RewindTo(begin_trailing_non_labeled_fields);
89
90     return address_field.release();
91   }
92
93   scanner->RewindTo(saved_cursor);
94   return NULL;
95 }
96
97 AddressField::AddressField()
98     : company_(NULL),
99       address1_(NULL),
100       address2_(NULL),
101       street_address_(NULL),
102       city_(NULL),
103       state_(NULL),
104       zip_(NULL),
105       zip4_(NULL),
106       country_(NULL) {
107 }
108
109 bool AddressField::ClassifyField(ServerFieldTypeMap* map) const {
110   // The page can request the address lines as a single textarea input or as
111   // multiple text fields (or not at all), but it shouldn't be possible to
112   // request both.
113   DCHECK(!(address1_ && street_address_));
114   DCHECK(!(address2_ && street_address_));
115
116   return AddClassification(company_, COMPANY_NAME, map) &&
117          AddClassification(address1_, ADDRESS_HOME_LINE1, map) &&
118          AddClassification(address2_, ADDRESS_HOME_LINE2, map) &&
119          AddClassification(street_address_, ADDRESS_HOME_STREET_ADDRESS, map) &&
120          AddClassification(city_, ADDRESS_HOME_CITY, map) &&
121          AddClassification(state_, ADDRESS_HOME_STATE, map) &&
122          AddClassification(zip_, ADDRESS_HOME_ZIP, map) &&
123          AddClassification(country_, ADDRESS_HOME_COUNTRY, map);
124 }
125
126 bool AddressField::ParseCompany(AutofillScanner* scanner) {
127   if (company_ && !company_->IsEmpty())
128     return false;
129
130   return ParseField(scanner, UTF8ToUTF16(autofill::kCompanyRe), &company_);
131 }
132
133 bool AddressField::ParseAddressLines(AutofillScanner* scanner) {
134   // We only match the string "address" in page text, not in element names,
135   // because sometimes every element in a group of address fields will have
136   // a name containing the string "address"; for example, on the page
137   // Kohl's - Register Billing Address.html the text element labeled "city"
138   // has the name "BILL_TO_ADDRESS<>city".  We do match address labels
139   // such as "address1", which appear as element names on various pages (eg
140   // AmericanGirl-Registration.html, BloomingdalesBilling.html,
141   // EBay Registration Enter Information.html).
142   if (address1_ || street_address_)
143     return false;
144
145   // Ignore "Address Lookup" field. http://crbug.com/427622
146   if (ParseField(scanner, base::UTF8ToUTF16(autofill::kAddressLookupRe), NULL))
147     return false;
148
149   base::string16 pattern = UTF8ToUTF16(autofill::kAddressLine1Re);
150   base::string16 label_pattern = UTF8ToUTF16(autofill::kAddressLine1LabelRe);
151   if (!ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT, &address1_) &&
152       !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
153                            &address1_) &&
154       !ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT | MATCH_TEXT_AREA,
155                            &street_address_) &&
156       !ParseFieldSpecifics(scanner, label_pattern,
157                            MATCH_LABEL | MATCH_TEXT_AREA,
158                            &street_address_)) {
159     return false;
160   }
161
162   // Optionally parse more address lines, which may have empty labels.
163   pattern = UTF8ToUTF16(autofill::kAddressLine2Re);
164   label_pattern = UTF8ToUTF16(autofill::kAddressLine2LabelRe);
165   if (!street_address_ &&
166       !ParseEmptyLabel(scanner, &address2_) &&
167       !ParseField(scanner, pattern, &address2_)) {
168     ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
169                         &address2_);
170   }
171
172   // Try for surplus lines, which we will promptly discard.
173   // Some pages have 3 address lines (eg SharperImageModifyAccount.html)
174   // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)!
175   if (address2_) {
176     pattern = UTF8ToUTF16(autofill::kAddressLinesExtraRe);
177     while (ParseField(scanner, pattern, NULL)) {
178       // Consumed a surplus line, try for another.
179     }
180   }
181
182   return true;
183 }
184
185 bool AddressField::ParseCountry(AutofillScanner* scanner) {
186   // Parse a country.  The occasional page (e.g.
187   // Travelocity_New Member Information1.html) calls this a "location".
188   if (country_ && !country_->IsEmpty())
189     return false;
190
191   return ParseFieldSpecifics(scanner,
192                              UTF8ToUTF16(autofill::kCountryRe),
193                              MATCH_DEFAULT | MATCH_SELECT,
194                              &country_);
195 }
196
197 bool AddressField::ParseZipCode(AutofillScanner* scanner) {
198   // Parse a zip code.  On some UK pages (e.g. The China Shop2.html) this
199   // is called a "post code".
200   if (zip_)
201     return false;
202
203   // Some sites use type="tel" for zip fields (to get a numerical input).
204   // http://crbug.com/426958
205   if (!ParseFieldSpecifics(scanner,
206                            UTF8ToUTF16(autofill::kZipCodeRe),
207                            MATCH_DEFAULT | MATCH_TELEPHONE,
208                            &zip_)) {
209     return false;
210   }
211
212   // Look for a zip+4, whose field name will also often contain
213   // the substring "zip".
214   ParseFieldSpecifics(scanner,
215                       UTF8ToUTF16(autofill::kZip4Re),
216                       MATCH_DEFAULT | MATCH_TELEPHONE,
217                       &zip4_);
218   return true;
219 }
220
221 bool AddressField::ParseCity(AutofillScanner* scanner) {
222   // Parse a city name.  Some UK pages (e.g. The China Shop2.html) use
223   // the term "town".
224   if (city_)
225     return false;
226
227   // Select fields are allowed here.  This occurs on top-100 site rediff.com.
228   return ParseFieldSpecifics(scanner,
229                              UTF8ToUTF16(autofill::kCityRe),
230                              MATCH_DEFAULT | MATCH_SELECT,
231                              &city_);
232 }
233
234 bool AddressField::ParseState(AutofillScanner* scanner) {
235   if (state_)
236     return false;
237
238   return ParseFieldSpecifics(scanner,
239                              UTF8ToUTF16(autofill::kStateRe),
240                              MATCH_DEFAULT | MATCH_SELECT,
241                              &state_);
242 }
243
244 }  // namespace autofill