1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/autofill/core/browser/form_structure.h"
9 #include "base/basictypes.h"
10 #include "base/command_line.h"
11 #include "base/logging.h"
12 #include "base/memory/scoped_ptr.h"
13 #include "base/sha1.h"
14 #include "base/strings/string_number_conversions.h"
15 #include "base/strings/string_util.h"
16 #include "base/strings/stringprintf.h"
17 #include "base/strings/utf_string_conversions.h"
18 #include "base/time/time.h"
19 #include "components/autofill/core/browser/autofill_metrics.h"
20 #include "components/autofill/core/browser/autofill_type.h"
21 #include "components/autofill/core/browser/autofill_xml_parser.h"
22 #include "components/autofill/core/browser/field_types.h"
23 #include "components/autofill/core/browser/form_field.h"
24 #include "components/autofill/core/common/autofill_constants.h"
25 #include "components/autofill/core/common/form_data.h"
26 #include "components/autofill/core/common/form_data_predictions.h"
27 #include "components/autofill/core/common/form_field_data.h"
28 #include "components/autofill/core/common/form_field_data_predictions.h"
29 #include "third_party/icu/source/i18n/unicode/regex.h"
30 #include "third_party/libjingle/source/talk/xmllite/xmlelement.h"
35 const char kFormMethodPost[] = "post";
37 // XML elements and attributes.
38 const char kAttributeAcceptedFeatures[] = "accepts";
39 const char kAttributeAutofillUsed[] = "autofillused";
40 const char kAttributeAutofillType[] = "autofilltype";
41 const char kAttributeClientVersion[] = "clientversion";
42 const char kAttributeDataPresent[] = "datapresent";
43 const char kAttributeFieldID[] = "fieldid";
44 const char kAttributeFieldType[] = "fieldtype";
45 const char kAttributeFormSignature[] = "formsignature";
46 const char kAttributeName[] = "name";
47 const char kAttributeSignature[] = "signature";
48 const char kAcceptedFeaturesExperiment[] = "e"; // e=experiments
49 const char kClientVersion[] = "6.1.1715.1442/en (GGLL)";
50 const char kXMLDeclaration[] = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
51 const char kXMLElementAutofillQuery[] = "autofillquery";
52 const char kXMLElementAutofillUpload[] = "autofillupload";
53 const char kXMLElementFieldAssignments[] = "fieldassignments";
54 const char kXMLElementField[] = "field";
55 const char kXMLElementFields[] = "fields";
56 const char kXMLElementForm[] = "form";
57 const char kBillingMode[] = "billing";
58 const char kShippingMode[] = "shipping";
60 // Stip away >= 5 consecutive digits.
61 const char kIgnorePatternInFieldName[] = "\\d{5,}+";
63 // Helper for |EncodeUploadRequest()| that creates a bit field corresponding to
64 // |available_field_types| and returns the hex representation as a string.
65 std::string EncodeFieldTypes(const ServerFieldTypeSet& available_field_types) {
66 // There are |MAX_VALID_FIELD_TYPE| different field types and 8 bits per byte,
67 // so we need ceil(MAX_VALID_FIELD_TYPE / 8) bytes to encode the bit field.
68 const size_t kNumBytes = (MAX_VALID_FIELD_TYPE + 0x7) / 8;
70 // Pack the types in |available_field_types| into |bit_field|.
71 std::vector<uint8> bit_field(kNumBytes, 0);
72 for (ServerFieldTypeSet::const_iterator field_type =
73 available_field_types.begin();
74 field_type != available_field_types.end();
76 // Set the appropriate bit in the field. The bit we set is the one
77 // |field_type| % 8 from the left of the byte.
78 const size_t byte = *field_type / 8;
79 const size_t bit = 0x80 >> (*field_type % 8);
80 DCHECK(byte < bit_field.size());
81 bit_field[byte] |= bit;
84 // Discard any trailing zeroes.
85 // If there are no available types, we return the empty string.
86 size_t data_end = bit_field.size();
87 for (; data_end > 0 && !bit_field[data_end - 1]; --data_end) {
90 // Print all meaningfull bytes into a string.
91 std::string data_presence;
92 data_presence.reserve(data_end * 2 + 1);
93 for (size_t i = 0; i < data_end; ++i) {
94 base::StringAppendF(&data_presence, "%02x", bit_field[i]);
100 // Helper for |EncodeFormRequest()| that creates XmlElements for the given field
101 // in upload xml, and also add them to the parent XmlElement.
102 void EncodeFieldForUpload(const AutofillField& field,
103 buzz::XmlElement* parent) {
104 // Don't upload checkable fields.
105 if (field.is_checkable)
108 ServerFieldTypeSet types = field.possible_types();
109 // |types| could be empty in unit-tests only.
110 for (ServerFieldTypeSet::iterator field_type = types.begin();
111 field_type != types.end(); ++field_type) {
112 buzz::XmlElement *field_element = new buzz::XmlElement(
113 buzz::QName(kXMLElementField));
115 field_element->SetAttr(buzz::QName(kAttributeSignature),
116 field.FieldSignature());
117 field_element->SetAttr(buzz::QName(kAttributeAutofillType),
118 base::IntToString(*field_type));
119 parent->AddElement(field_element);
123 // Helper for |EncodeFormRequest()| that creates XmlElement for the given field
124 // in query xml, and also add it to the parent XmlElement.
125 void EncodeFieldForQuery(const AutofillField& field,
126 buzz::XmlElement* parent) {
127 buzz::XmlElement *field_element = new buzz::XmlElement(
128 buzz::QName(kXMLElementField));
129 field_element->SetAttr(buzz::QName(kAttributeSignature),
130 field.FieldSignature());
131 parent->AddElement(field_element);
134 // Helper for |EncodeFormRequest()| that creates XmlElements for the given field
135 // in field assignments xml, and also add them to the parent XmlElement.
136 void EncodeFieldForFieldAssignments(const AutofillField& field,
137 buzz::XmlElement* parent) {
138 ServerFieldTypeSet types = field.possible_types();
139 for (ServerFieldTypeSet::iterator field_type = types.begin();
140 field_type != types.end(); ++field_type) {
141 buzz::XmlElement *field_element = new buzz::XmlElement(
142 buzz::QName(kXMLElementFields));
144 field_element->SetAttr(buzz::QName(kAttributeFieldID),
145 field.FieldSignature());
146 field_element->SetAttr(buzz::QName(kAttributeFieldType),
147 base::IntToString(*field_type));
148 field_element->SetAttr(buzz::QName(kAttributeName),
149 UTF16ToUTF8(field.name));
150 parent->AddElement(field_element);
154 // Returns |true| iff the |token| is a type hint for a contact field, as
155 // specified in the implementation section of http://is.gd/whatwg_autocomplete
156 // Note that "fax" and "pager" are intentionally ignored, as Chrome does not
157 // support filling either type of information.
158 bool IsContactTypeHint(const std::string& token) {
159 return token == "home" || token == "work" || token == "mobile";
162 // Returns |true| iff the |token| is a type hint appropriate for a field of the
163 // given |field_type|, as specified in the implementation section of
164 // http://is.gd/whatwg_autocomplete
165 bool ContactTypeHintMatchesFieldType(const std::string& token,
166 HtmlFieldType field_type) {
167 // The "home" and "work" type hints are only appropriate for email and phone
168 // number field types.
169 if (token == "home" || token == "work") {
170 return field_type == HTML_TYPE_EMAIL ||
171 (field_type >= HTML_TYPE_TEL &&
172 field_type <= HTML_TYPE_TEL_LOCAL_SUFFIX);
175 // The "mobile" type hint is only appropriate for phone number field types.
176 // Note that "fax" and "pager" are intentionally ignored, as Chrome does not
177 // support filling either type of information.
178 if (token == "mobile") {
179 return field_type >= HTML_TYPE_TEL &&
180 field_type <= HTML_TYPE_TEL_LOCAL_SUFFIX;
186 // Returns the Chrome Autofill-supported field type corresponding to the given
187 // |autocomplete_attribute_value|, if there is one, in the context of the given
188 // |field|. Chrome Autofill supports a subset of the field types listed at
189 // http://is.gd/whatwg_autocomplete
190 HtmlFieldType FieldTypeFromAutocompleteAttributeValue(
191 const std::string& autocomplete_attribute_value,
192 const AutofillField& field) {
193 if (autocomplete_attribute_value == "name")
194 return HTML_TYPE_NAME;
196 if (autocomplete_attribute_value == "given-name")
197 return HTML_TYPE_GIVEN_NAME;
199 if (autocomplete_attribute_value == "additional-name") {
200 if (field.max_length == 1)
201 return HTML_TYPE_ADDITIONAL_NAME_INITIAL;
203 return HTML_TYPE_ADDITIONAL_NAME;
206 if (autocomplete_attribute_value == "family-name")
207 return HTML_TYPE_FAMILY_NAME;
209 if (autocomplete_attribute_value == "organization")
210 return HTML_TYPE_ORGANIZATION;
212 if (autocomplete_attribute_value == "street-address")
213 return HTML_TYPE_STREET_ADDRESS;
215 if (autocomplete_attribute_value == "address-line1")
216 return HTML_TYPE_ADDRESS_LINE1;
218 if (autocomplete_attribute_value == "address-line2")
219 return HTML_TYPE_ADDRESS_LINE2;
221 if (autocomplete_attribute_value == "locality")
222 return HTML_TYPE_LOCALITY;
224 if (autocomplete_attribute_value == "region")
225 return HTML_TYPE_REGION;
227 if (autocomplete_attribute_value == "country")
228 return HTML_TYPE_COUNTRY_CODE;
230 if (autocomplete_attribute_value == "country-name")
231 return HTML_TYPE_COUNTRY_NAME;
233 if (autocomplete_attribute_value == "postal-code")
234 return HTML_TYPE_POSTAL_CODE;
236 if (autocomplete_attribute_value == "cc-name")
237 return HTML_TYPE_CREDIT_CARD_NAME;
239 if (autocomplete_attribute_value == "cc-number")
240 return HTML_TYPE_CREDIT_CARD_NUMBER;
242 if (autocomplete_attribute_value == "cc-exp") {
243 if (field.max_length == 5)
244 return HTML_TYPE_CREDIT_CARD_EXP_DATE_2_DIGIT_YEAR;
245 else if (field.max_length == 7)
246 return HTML_TYPE_CREDIT_CARD_EXP_DATE_4_DIGIT_YEAR;
248 return HTML_TYPE_CREDIT_CARD_EXP;
251 if (autocomplete_attribute_value == "cc-exp-month")
252 return HTML_TYPE_CREDIT_CARD_EXP_MONTH;
254 if (autocomplete_attribute_value == "cc-exp-year") {
255 if (field.max_length == 2)
256 return HTML_TYPE_CREDIT_CARD_EXP_2_DIGIT_YEAR;
257 else if (field.max_length == 4)
258 return HTML_TYPE_CREDIT_CARD_EXP_4_DIGIT_YEAR;
260 return HTML_TYPE_CREDIT_CARD_EXP_YEAR;
263 if (autocomplete_attribute_value == "cc-csc")
264 return HTML_TYPE_CREDIT_CARD_VERIFICATION_CODE;
266 if (autocomplete_attribute_value == "cc-type")
267 return HTML_TYPE_CREDIT_CARD_TYPE;
269 if (autocomplete_attribute_value == "tel")
270 return HTML_TYPE_TEL;
272 if (autocomplete_attribute_value == "tel-country-code")
273 return HTML_TYPE_TEL_COUNTRY_CODE;
275 if (autocomplete_attribute_value == "tel-national")
276 return HTML_TYPE_TEL_NATIONAL;
278 if (autocomplete_attribute_value == "tel-area-code")
279 return HTML_TYPE_TEL_AREA_CODE;
281 if (autocomplete_attribute_value == "tel-local")
282 return HTML_TYPE_TEL_LOCAL;
284 if (autocomplete_attribute_value == "tel-local-prefix")
285 return HTML_TYPE_TEL_LOCAL_PREFIX;
287 if (autocomplete_attribute_value == "tel-local-suffix")
288 return HTML_TYPE_TEL_LOCAL_SUFFIX;
290 if (autocomplete_attribute_value == "email")
291 return HTML_TYPE_EMAIL;
293 return HTML_TYPE_UNKNOWN;
296 std::string StripDigitsIfRequired(const base::string16& input) {
297 UErrorCode status = U_ZERO_ERROR;
298 CR_DEFINE_STATIC_LOCAL(icu::UnicodeString, icu_pattern,
299 (kIgnorePatternInFieldName));
300 CR_DEFINE_STATIC_LOCAL(icu::RegexMatcher, matcher,
301 (icu_pattern, UREGEX_CASE_INSENSITIVE, status));
302 DCHECK_EQ(status, U_ZERO_ERROR);
304 icu::UnicodeString icu_input(input.data(), input.length());
305 matcher.reset(icu_input);
307 icu::UnicodeString replaced_string = matcher.replaceAll("", status);
309 std::string return_string;
310 status = U_ZERO_ERROR;
311 UTF16ToUTF8(replaced_string.getBuffer(),
312 static_cast<size_t>(replaced_string.length()),
314 if (status != U_ZERO_ERROR) {
315 DVLOG(1) << "Couldn't strip digits in " << UTF16ToUTF8(input);
316 return UTF16ToUTF8(input);
319 return return_string;
324 FormStructure::FormStructure(const FormData& form)
325 : form_name_(form.name),
326 source_url_(form.origin),
327 target_url_(form.action),
329 active_field_count_(0),
330 upload_required_(USE_UPLOAD_RATES),
331 server_experiment_id_("no server response"),
332 has_author_specified_types_(false) {
333 // Copy the form fields.
334 std::map<base::string16, size_t> unique_names;
335 for (std::vector<FormFieldData>::const_iterator field =
337 field != form.fields.end(); ++field) {
338 if (!ShouldSkipField(*field)) {
339 // Add all supported form fields (including with empty names) to the
340 // signature. This is a requirement for Autofill servers.
341 form_signature_field_names_.append("&");
342 form_signature_field_names_.append(StripDigitsIfRequired(field->name));
344 ++active_field_count_;
347 // Generate a unique name for this field by appending a counter to the name.
348 // Make sure to prepend the counter with a non-numeric digit so that we are
349 // guaranteed to avoid collisions.
350 if (!unique_names.count(field->name))
351 unique_names[field->name] = 1;
353 ++unique_names[field->name];
354 base::string16 unique_name = field->name + ASCIIToUTF16("_") +
355 base::IntToString16(unique_names[field->name]);
356 fields_.push_back(new AutofillField(*field, unique_name));
359 std::string method = UTF16ToUTF8(form.method);
360 if (StringToLowerASCII(method) == kFormMethodPost) {
363 // Either the method is 'get', or we don't know. In this case we default
369 FormStructure::~FormStructure() {}
371 void FormStructure::DetermineHeuristicTypes(
372 const AutofillMetrics& metric_logger) {
373 // First, try to detect field types based on each field's |autocomplete|
374 // attribute value. If there is at least one form field that specifies an
375 // autocomplete type hint, don't try to apply other heuristics to match fields
377 bool has_author_specified_sections;
378 ParseFieldTypesFromAutocompleteAttributes(&has_author_specified_types_,
379 &has_author_specified_sections);
381 if (!has_author_specified_types_) {
382 ServerFieldTypeMap field_type_map;
383 FormField::ParseFormFields(fields_.get(), &field_type_map);
384 for (size_t i = 0; i < field_count(); ++i) {
385 AutofillField* field = fields_[i];
386 ServerFieldTypeMap::iterator iter =
387 field_type_map.find(field->unique_name());
388 if (iter != field_type_map.end())
389 field->set_heuristic_type(iter->second);
393 UpdateAutofillCount();
394 IdentifySections(has_author_specified_sections);
396 if (IsAutofillable(true)) {
397 metric_logger.LogDeveloperEngagementMetric(
398 AutofillMetrics::FILLABLE_FORM_PARSED);
399 if (has_author_specified_types_) {
400 metric_logger.LogDeveloperEngagementMetric(
401 AutofillMetrics::FILLABLE_FORM_CONTAINS_TYPE_HINTS);
406 bool FormStructure::EncodeUploadRequest(
407 const ServerFieldTypeSet& available_field_types,
408 bool form_was_autofilled,
409 std::string* encoded_xml) const {
410 DCHECK(ShouldBeCrowdsourced());
412 // Verify that |available_field_types| agrees with the possible field types we
414 for (std::vector<AutofillField*>::const_iterator field = begin();
417 for (ServerFieldTypeSet::const_iterator type =
418 (*field)->possible_types().begin();
419 type != (*field)->possible_types().end();
421 DCHECK(*type == UNKNOWN_TYPE ||
422 *type == EMPTY_TYPE ||
423 available_field_types.count(*type));
427 // Set up the <autofillupload> element and its attributes.
428 buzz::XmlElement autofill_request_xml(
429 (buzz::QName(kXMLElementAutofillUpload)));
430 autofill_request_xml.SetAttr(buzz::QName(kAttributeClientVersion),
432 autofill_request_xml.SetAttr(buzz::QName(kAttributeFormSignature),
434 autofill_request_xml.SetAttr(buzz::QName(kAttributeAutofillUsed),
435 form_was_autofilled ? "true" : "false");
436 autofill_request_xml.SetAttr(buzz::QName(kAttributeDataPresent),
437 EncodeFieldTypes(available_field_types).c_str());
439 if (!EncodeFormRequest(FormStructure::UPLOAD, &autofill_request_xml))
440 return false; // Malformed form, skip it.
442 // Obtain the XML structure as a string.
443 *encoded_xml = kXMLDeclaration;
444 *encoded_xml += autofill_request_xml.Str().c_str();
446 // To enable this logging, run with the flag --vmodule="form_structure=2".
447 VLOG(2) << "\n" << *encoded_xml;
452 bool FormStructure::EncodeFieldAssignments(
453 const ServerFieldTypeSet& available_field_types,
454 std::string* encoded_xml) const {
455 DCHECK(ShouldBeCrowdsourced());
457 // Set up the <fieldassignments> element and its attributes.
458 buzz::XmlElement autofill_request_xml(
459 (buzz::QName(kXMLElementFieldAssignments)));
460 autofill_request_xml.SetAttr(buzz::QName(kAttributeFormSignature),
463 if (!EncodeFormRequest(FormStructure::FIELD_ASSIGNMENTS,
464 &autofill_request_xml))
465 return false; // Malformed form, skip it.
467 // Obtain the XML structure as a string.
468 *encoded_xml = kXMLDeclaration;
469 *encoded_xml += autofill_request_xml.Str().c_str();
475 bool FormStructure::EncodeQueryRequest(
476 const std::vector<FormStructure*>& forms,
477 std::vector<std::string>* encoded_signatures,
478 std::string* encoded_xml) {
479 DCHECK(encoded_signatures);
481 encoded_xml->clear();
482 encoded_signatures->clear();
483 encoded_signatures->reserve(forms.size());
485 // Set up the <autofillquery> element and attributes.
486 buzz::XmlElement autofill_request_xml(
487 (buzz::QName(kXMLElementAutofillQuery)));
488 autofill_request_xml.SetAttr(buzz::QName(kAttributeClientVersion),
491 // Some badly formatted web sites repeat forms - detect that and encode only
492 // one form as returned data would be the same for all the repeated forms.
493 std::set<std::string> processed_forms;
494 for (ScopedVector<FormStructure>::const_iterator it = forms.begin();
497 std::string signature((*it)->FormSignature());
498 if (processed_forms.find(signature) != processed_forms.end())
500 processed_forms.insert(signature);
501 scoped_ptr<buzz::XmlElement> encompassing_xml_element(
502 new buzz::XmlElement(buzz::QName(kXMLElementForm)));
503 encompassing_xml_element->SetAttr(buzz::QName(kAttributeSignature),
506 if (!(*it)->EncodeFormRequest(FormStructure::QUERY,
507 encompassing_xml_element.get()))
508 continue; // Malformed form, skip it.
510 autofill_request_xml.AddElement(encompassing_xml_element.release());
511 encoded_signatures->push_back(signature);
514 if (!encoded_signatures->size())
517 autofill_request_xml.SetAttr(buzz::QName(kAttributeAcceptedFeatures),
518 kAcceptedFeaturesExperiment);
520 // Obtain the XML structure as a string.
521 *encoded_xml = kXMLDeclaration;
522 *encoded_xml += autofill_request_xml.Str().c_str();
528 void FormStructure::ParseQueryResponse(
529 const std::string& response_xml,
530 const std::vector<FormStructure*>& forms,
531 const AutofillMetrics& metric_logger) {
532 metric_logger.LogServerQueryMetric(AutofillMetrics::QUERY_RESPONSE_RECEIVED);
534 // Parse the field types from the server response to the query.
535 std::vector<AutofillServerFieldInfo> field_infos;
536 UploadRequired upload_required;
537 std::string experiment_id;
538 AutofillQueryXmlParser parse_handler(&field_infos,
541 buzz::XmlParser parser(&parse_handler);
542 parser.Parse(response_xml.c_str(), response_xml.length(), true);
543 if (!parse_handler.succeeded())
546 metric_logger.LogServerQueryMetric(AutofillMetrics::QUERY_RESPONSE_PARSED);
547 metric_logger.LogServerExperimentIdForQuery(experiment_id);
549 bool heuristics_detected_fillable_field = false;
550 bool query_response_overrode_heuristics = false;
552 // Copy the field types into the actual form.
553 std::vector<AutofillServerFieldInfo>::iterator current_info =
555 for (std::vector<FormStructure*>::const_iterator iter = forms.begin();
556 iter != forms.end(); ++iter) {
557 FormStructure* form = *iter;
558 form->upload_required_ = upload_required;
559 form->server_experiment_id_ = experiment_id;
561 for (std::vector<AutofillField*>::iterator field = form->fields_.begin();
562 field != form->fields_.end(); ++field) {
563 if (form->ShouldSkipField(**field))
566 // In some cases *successful* response does not return all the fields.
567 // Quit the update of the types then.
568 if (current_info == field_infos.end())
571 // UNKNOWN_TYPE is reserved for use by the client.
572 DCHECK_NE(current_info->field_type, UNKNOWN_TYPE);
574 ServerFieldType heuristic_type = (*field)->heuristic_type();
575 if (heuristic_type != UNKNOWN_TYPE)
576 heuristics_detected_fillable_field = true;
578 (*field)->set_server_type(current_info->field_type);
579 if (heuristic_type != (*field)->Type().GetStorableType())
580 query_response_overrode_heuristics = true;
582 // Copy default value into the field if available.
583 if (!current_info->default_value.empty())
584 (*field)->set_default_value(current_info->default_value);
589 form->UpdateAutofillCount();
590 form->IdentifySections(false);
593 AutofillMetrics::ServerQueryMetric metric;
594 if (query_response_overrode_heuristics) {
595 if (heuristics_detected_fillable_field) {
596 metric = AutofillMetrics::QUERY_RESPONSE_OVERRODE_LOCAL_HEURISTICS;
598 metric = AutofillMetrics::QUERY_RESPONSE_WITH_NO_LOCAL_HEURISTICS;
601 metric = AutofillMetrics::QUERY_RESPONSE_MATCHED_LOCAL_HEURISTICS;
603 metric_logger.LogServerQueryMetric(metric);
607 void FormStructure::GetFieldTypePredictions(
608 const std::vector<FormStructure*>& form_structures,
609 std::vector<FormDataPredictions>* forms) {
611 forms->reserve(form_structures.size());
612 for (size_t i = 0; i < form_structures.size(); ++i) {
613 FormStructure* form_structure = form_structures[i];
614 FormDataPredictions form;
615 form.data.name = form_structure->form_name_;
617 ASCIIToUTF16((form_structure->method_ == POST) ? "POST" : "GET");
618 form.data.origin = form_structure->source_url_;
619 form.data.action = form_structure->target_url_;
620 form.signature = form_structure->FormSignature();
621 form.experiment_id = form_structure->server_experiment_id_;
623 for (std::vector<AutofillField*>::const_iterator field =
624 form_structure->fields_.begin();
625 field != form_structure->fields_.end(); ++field) {
626 form.data.fields.push_back(FormFieldData(**field));
628 FormFieldDataPredictions annotated_field;
629 annotated_field.signature = (*field)->FieldSignature();
630 annotated_field.heuristic_type =
631 AutofillType((*field)->heuristic_type()).ToString();
632 annotated_field.server_type =
633 AutofillType((*field)->server_type()).ToString();
634 annotated_field.overall_type = (*field)->Type().ToString();
635 form.fields.push_back(annotated_field);
638 forms->push_back(form);
642 std::string FormStructure::FormSignature() const {
643 std::string scheme(target_url_.scheme());
644 std::string host(target_url_.host());
646 // If target host or scheme is empty, set scheme and host of source url.
647 // This is done to match the Toolbar's behavior.
648 if (scheme.empty() || host.empty()) {
649 scheme = source_url_.scheme();
650 host = source_url_.host();
653 std::string form_string = scheme + "://" + host + "&" +
654 UTF16ToUTF8(form_name_) +
655 form_signature_field_names_;
657 return Hash64Bit(form_string);
660 bool FormStructure::ShouldSkipField(const FormFieldData& field) const {
661 return field.is_checkable;
664 bool FormStructure::IsAutofillable(bool require_method_post) const {
665 if (autofill_count() < kRequiredAutofillFields)
668 return ShouldBeParsed(require_method_post);
671 void FormStructure::UpdateAutofillCount() {
673 for (std::vector<AutofillField*>::const_iterator iter = begin();
674 iter != end(); ++iter) {
675 AutofillField* field = *iter;
676 if (field && field->IsFieldFillable())
681 bool FormStructure::ShouldBeParsed(bool require_method_post) const {
682 if (active_field_count() < kRequiredAutofillFields)
685 // Rule out http(s)://*/search?...
686 // e.g. http://www.google.com/search?q=...
687 // http://search.yahoo.com/search?p=...
688 if (target_url_.path() == "/search")
691 bool has_text_field = false;
692 for (std::vector<AutofillField*>::const_iterator it = begin();
693 it != end() && !has_text_field; ++it) {
694 has_text_field |= (*it)->form_control_type != "select-one";
699 return !require_method_post || (method_ == POST);
702 bool FormStructure::ShouldBeCrowdsourced() const {
703 return !has_author_specified_types_ && ShouldBeParsed(true);
706 void FormStructure::UpdateFromCache(const FormStructure& cached_form) {
707 // Map from field signatures to cached fields.
708 std::map<std::string, const AutofillField*> cached_fields;
709 for (size_t i = 0; i < cached_form.field_count(); ++i) {
710 const AutofillField* field = cached_form.field(i);
711 cached_fields[field->FieldSignature()] = field;
714 for (std::vector<AutofillField*>::const_iterator iter = begin();
715 iter != end(); ++iter) {
716 AutofillField* field = *iter;
718 std::map<std::string, const AutofillField*>::const_iterator
719 cached_field = cached_fields.find(field->FieldSignature());
720 if (cached_field != cached_fields.end()) {
721 if (field->form_control_type != "select-one" &&
722 field->value == cached_field->second->value) {
723 // From the perspective of learning user data, text fields containing
724 // default values are equivalent to empty fields.
725 field->value = base::string16();
728 field->set_heuristic_type(cached_field->second->heuristic_type());
729 field->set_server_type(cached_field->second->server_type());
733 UpdateAutofillCount();
735 server_experiment_id_ = cached_form.server_experiment_id();
737 // The form signature should match between query and upload requests to the
738 // server. On many websites, form elements are dynamically added, removed, or
739 // rearranged via JavaScript between page load and form submission, so we
740 // copy over the |form_signature_field_names_| corresponding to the query
742 DCHECK_EQ(cached_form.form_name_, form_name_);
743 DCHECK_EQ(cached_form.source_url_, source_url_);
744 DCHECK_EQ(cached_form.target_url_, target_url_);
745 form_signature_field_names_ = cached_form.form_signature_field_names_;
748 void FormStructure::LogQualityMetrics(
749 const AutofillMetrics& metric_logger,
750 const base::TimeTicks& load_time,
751 const base::TimeTicks& interaction_time,
752 const base::TimeTicks& submission_time) const {
753 std::string experiment_id = server_experiment_id();
754 metric_logger.LogServerExperimentIdForUpload(experiment_id);
756 size_t num_detected_field_types = 0;
757 bool did_autofill_all_possible_fields = true;
758 bool did_autofill_some_possible_fields = false;
759 for (size_t i = 0; i < field_count(); ++i) {
760 const AutofillField* field = this->field(i);
761 metric_logger.LogQualityMetric(AutofillMetrics::FIELD_SUBMITTED,
764 // No further logging for empty fields nor for fields where the entered data
765 // does not appear to already exist in the user's stored Autofill data.
766 const ServerFieldTypeSet& field_types = field->possible_types();
767 DCHECK(!field_types.empty());
768 if (field_types.count(EMPTY_TYPE) || field_types.count(UNKNOWN_TYPE))
771 ++num_detected_field_types;
772 if (field->is_autofilled)
773 did_autofill_some_possible_fields = true;
775 did_autofill_all_possible_fields = false;
777 // Collapse field types that Chrome treats as identical, e.g. home and
778 // billing address fields.
779 ServerFieldTypeSet collapsed_field_types;
780 for (ServerFieldTypeSet::const_iterator it = field_types.begin();
781 it != field_types.end();
783 // Since we currently only support US phone numbers, the (city code + main
784 // digits) number is almost always identical to the whole phone number.
785 // TODO(isherman): Improve this logic once we add support for
786 // international numbers.
787 if (*it == PHONE_HOME_CITY_AND_NUMBER)
788 collapsed_field_types.insert(PHONE_HOME_WHOLE_NUMBER);
790 collapsed_field_types.insert(AutofillType(*it).GetStorableType());
793 // Capture the field's type, if it is unambiguous.
794 ServerFieldType field_type = UNKNOWN_TYPE;
795 if (collapsed_field_types.size() == 1)
796 field_type = *collapsed_field_types.begin();
798 ServerFieldType heuristic_type =
799 AutofillType(field->heuristic_type()).GetStorableType();
800 ServerFieldType server_type =
801 AutofillType(field->server_type()).GetStorableType();
802 ServerFieldType predicted_type = field->Type().GetStorableType();
804 // Log heuristic, server, and overall type quality metrics, independently of
805 // whether the field was autofilled.
806 if (heuristic_type == UNKNOWN_TYPE) {
807 metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_UNKNOWN,
808 field_type, experiment_id);
809 } else if (field_types.count(heuristic_type)) {
810 metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_MATCH,
811 field_type, experiment_id);
813 metric_logger.LogHeuristicTypePrediction(AutofillMetrics::TYPE_MISMATCH,
814 field_type, experiment_id);
817 if (server_type == NO_SERVER_DATA) {
818 metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_UNKNOWN,
819 field_type, experiment_id);
820 } else if (field_types.count(server_type)) {
821 metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_MATCH,
822 field_type, experiment_id);
824 metric_logger.LogServerTypePrediction(AutofillMetrics::TYPE_MISMATCH,
825 field_type, experiment_id);
828 if (predicted_type == UNKNOWN_TYPE) {
829 metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_UNKNOWN,
830 field_type, experiment_id);
831 } else if (field_types.count(predicted_type)) {
832 metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_MATCH,
833 field_type, experiment_id);
835 metric_logger.LogOverallTypePrediction(AutofillMetrics::TYPE_MISMATCH,
836 field_type, experiment_id);
839 // TODO(isherman): <select> fields don't support |is_autofilled()|, so we
840 // have to skip them for the remaining metrics.
841 if (field->form_control_type == "select-one")
844 if (field->is_autofilled) {
845 metric_logger.LogQualityMetric(AutofillMetrics::FIELD_AUTOFILLED,
848 metric_logger.LogQualityMetric(AutofillMetrics::FIELD_NOT_AUTOFILLED,
851 if (heuristic_type == UNKNOWN_TYPE) {
852 metric_logger.LogQualityMetric(
853 AutofillMetrics::NOT_AUTOFILLED_HEURISTIC_TYPE_UNKNOWN,
855 } else if (field_types.count(heuristic_type)) {
856 metric_logger.LogQualityMetric(
857 AutofillMetrics::NOT_AUTOFILLED_HEURISTIC_TYPE_MATCH,
860 metric_logger.LogQualityMetric(
861 AutofillMetrics::NOT_AUTOFILLED_HEURISTIC_TYPE_MISMATCH,
865 if (server_type == NO_SERVER_DATA) {
866 metric_logger.LogQualityMetric(
867 AutofillMetrics::NOT_AUTOFILLED_SERVER_TYPE_UNKNOWN,
869 } else if (field_types.count(server_type)) {
870 metric_logger.LogQualityMetric(
871 AutofillMetrics::NOT_AUTOFILLED_SERVER_TYPE_MATCH,
874 metric_logger.LogQualityMetric(
875 AutofillMetrics::NOT_AUTOFILLED_SERVER_TYPE_MISMATCH,
881 if (num_detected_field_types < kRequiredAutofillFields) {
882 metric_logger.LogUserHappinessMetric(
883 AutofillMetrics::SUBMITTED_NON_FILLABLE_FORM);
885 if (did_autofill_all_possible_fields) {
886 metric_logger.LogUserHappinessMetric(
887 AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_ALL);
888 } else if (did_autofill_some_possible_fields) {
889 metric_logger.LogUserHappinessMetric(
890 AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_SOME);
892 metric_logger.LogUserHappinessMetric(
893 AutofillMetrics::SUBMITTED_FILLABLE_FORM_AUTOFILLED_NONE);
896 // Unlike the other times, the |submission_time| should always be available.
897 DCHECK(!submission_time.is_null());
899 // The |load_time| might be unset, in the case that the form was dynamically
901 if (!load_time.is_null()) {
902 // Submission should always chronologically follow form load.
903 DCHECK(submission_time > load_time);
904 base::TimeDelta elapsed = submission_time - load_time;
905 if (did_autofill_some_possible_fields)
906 metric_logger.LogFormFillDurationFromLoadWithAutofill(elapsed);
908 metric_logger.LogFormFillDurationFromLoadWithoutAutofill(elapsed);
911 // The |interaction_time| might be unset, in the case that the user
912 // submitted a blank form.
913 if (!interaction_time.is_null()) {
914 // Submission should always chronologically follow interaction.
915 DCHECK(submission_time > interaction_time);
916 base::TimeDelta elapsed = submission_time - interaction_time;
917 if (did_autofill_some_possible_fields) {
918 metric_logger.LogFormFillDurationFromInteractionWithAutofill(elapsed);
920 metric_logger.LogFormFillDurationFromInteractionWithoutAutofill(
927 const AutofillField* FormStructure::field(size_t index) const {
928 if (index >= fields_.size()) {
933 return fields_[index];
936 AutofillField* FormStructure::field(size_t index) {
937 return const_cast<AutofillField*>(
938 static_cast<const FormStructure*>(this)->field(index));
941 size_t FormStructure::field_count() const {
942 return fields_.size();
945 size_t FormStructure::active_field_count() const {
946 return active_field_count_;
949 std::string FormStructure::server_experiment_id() const {
950 return server_experiment_id_;
953 FormData FormStructure::ToFormData() const {
954 // |data.user_submitted| will always be false.
956 data.name = form_name_;
957 data.origin = source_url_;
958 data.action = target_url_;
959 data.method = ASCIIToUTF16(method_ == POST ? "POST" : "GET");
961 for (size_t i = 0; i < fields_.size(); ++i) {
962 data.fields.push_back(FormFieldData(*fields_[i]));
968 bool FormStructure::operator==(const FormData& form) const {
969 // TODO(jhawkins): Is this enough to differentiate a form?
970 if (form_name_ == form.name &&
971 source_url_ == form.origin &&
972 target_url_ == form.action) {
976 // TODO(jhawkins): Compare field names, IDs and labels once we have labels
982 bool FormStructure::operator!=(const FormData& form) const {
983 return !operator==(form);
986 std::string FormStructure::Hash64Bit(const std::string& str) {
987 std::string hash_bin = base::SHA1HashString(str);
988 DCHECK_EQ(20U, hash_bin.length());
990 uint64 hash64 = (((static_cast<uint64>(hash_bin[0])) & 0xFF) << 56) |
991 (((static_cast<uint64>(hash_bin[1])) & 0xFF) << 48) |
992 (((static_cast<uint64>(hash_bin[2])) & 0xFF) << 40) |
993 (((static_cast<uint64>(hash_bin[3])) & 0xFF) << 32) |
994 (((static_cast<uint64>(hash_bin[4])) & 0xFF) << 24) |
995 (((static_cast<uint64>(hash_bin[5])) & 0xFF) << 16) |
996 (((static_cast<uint64>(hash_bin[6])) & 0xFF) << 8) |
997 ((static_cast<uint64>(hash_bin[7])) & 0xFF);
999 return base::Uint64ToString(hash64);
1002 bool FormStructure::EncodeFormRequest(
1003 FormStructure::EncodeRequestType request_type,
1004 buzz::XmlElement* encompassing_xml_element) const {
1005 if (!field_count()) // Nothing to add.
1008 // Some badly formatted web sites repeat fields - limit number of fields to
1009 // 48, which is far larger than any valid form and XML still fits into 2K.
1010 // Do not send requests for forms with more than this many fields, as they are
1011 // near certainly not valid/auto-fillable.
1012 const size_t kMaxFieldsOnTheForm = 48;
1013 if (field_count() > kMaxFieldsOnTheForm)
1016 // Add the child nodes for the form fields.
1017 for (size_t index = 0; index < field_count(); ++index) {
1018 const AutofillField* field = fields_[index];
1019 switch (request_type) {
1020 case FormStructure::UPLOAD:
1021 EncodeFieldForUpload(*field, encompassing_xml_element);
1023 case FormStructure::QUERY:
1024 if (ShouldSkipField(*field))
1026 EncodeFieldForQuery(*field, encompassing_xml_element);
1028 case FormStructure::FIELD_ASSIGNMENTS:
1029 EncodeFieldForFieldAssignments(*field, encompassing_xml_element);
1036 void FormStructure::ParseFieldTypesFromAutocompleteAttributes(
1038 bool* found_sections) {
1039 const std::string kDefaultSection = "-default";
1041 *found_types = false;
1042 *found_sections = false;
1043 for (std::vector<AutofillField*>::iterator it = fields_.begin();
1044 it != fields_.end(); ++it) {
1045 AutofillField* field = *it;
1047 // To prevent potential section name collisions, add a default suffix for
1048 // other fields. Without this, 'autocomplete' attribute values
1049 // "section--shipping street-address" and "shipping street-address" would be
1050 // parsed identically, given the section handling code below. We do this
1051 // before any validation so that fields with invalid attributes still end up
1052 // in the default section. These default section names will be overridden
1053 // by subsequent heuristic parsing steps if there are no author-specified
1055 field->set_section(kDefaultSection);
1057 // Canonicalize the attribute value by trimming whitespace, collapsing
1058 // non-space characters (e.g. tab) to spaces, and converting to lowercase.
1059 std::string autocomplete_attribute =
1060 CollapseWhitespaceASCII(field->autocomplete_attribute, false);
1061 autocomplete_attribute = StringToLowerASCII(autocomplete_attribute);
1063 // The autocomplete attribute is overloaded: it can specify either a field
1064 // type hint or whether autocomplete should be enabled at all. Ignore the
1065 // latter type of attribute value.
1066 if (autocomplete_attribute.empty() ||
1067 autocomplete_attribute == "on" ||
1068 autocomplete_attribute == "off") {
1072 // Any other value, even it is invalid, is considered to be a type hint.
1073 // This allows a website's author to specify an attribute like
1074 // autocomplete="other" on a field to disable all Autofill heuristics for
1076 *found_types = true;
1078 // Tokenize the attribute value. Per the spec, the tokens are parsed in
1080 std::vector<std::string> tokens;
1081 Tokenize(autocomplete_attribute, " ", &tokens);
1083 // The final token must be the field type.
1084 // If it is not one of the known types, abort.
1085 DCHECK(!tokens.empty());
1086 std::string field_type_token = tokens.back();
1088 HtmlFieldType field_type =
1089 FieldTypeFromAutocompleteAttributeValue(field_type_token, *field);
1090 if (field_type == HTML_TYPE_UNKNOWN)
1093 // The preceding token, if any, may be a type hint.
1094 if (!tokens.empty() && IsContactTypeHint(tokens.back())) {
1095 // If it is, it must match the field type; otherwise, abort.
1096 // Note that an invalid token invalidates the entire attribute value, even
1097 // if the other tokens are valid.
1098 if (!ContactTypeHintMatchesFieldType(tokens.back(), field_type))
1101 // Chrome Autofill ignores these type hints.
1105 // The preceding token, if any, may be a fixed string that is either
1106 // "shipping" or "billing". Chrome Autofill treats these as implicit
1107 // section name suffixes.
1108 DCHECK_EQ(kDefaultSection, field->section());
1109 std::string section = field->section();
1110 HtmlFieldMode mode = HTML_MODE_NONE;
1111 if (!tokens.empty()) {
1112 if (tokens.back() == kShippingMode)
1113 mode = HTML_MODE_SHIPPING;
1114 else if (tokens.back() == kBillingMode)
1115 mode = HTML_MODE_BILLING;
1118 if (mode != HTML_MODE_NONE) {
1119 section = "-" + tokens.back();
1123 // The preceding token, if any, may be a named section.
1124 const std::string kSectionPrefix = "section-";
1125 if (!tokens.empty() &&
1126 StartsWithASCII(tokens.back(), kSectionPrefix, true)) {
1127 // Prepend this section name to the suffix set in the preceding block.
1128 section = tokens.back().substr(kSectionPrefix.size()) + section;
1132 // No other tokens are allowed. If there are any remaining, abort.
1133 if (!tokens.empty())
1136 if (section != kDefaultSection) {
1137 *found_sections = true;
1138 field->set_section(section);
1141 // No errors encountered while parsing!
1142 // Update the |field|'s type based on what was parsed from the attribute.
1143 field->SetHtmlType(field_type, mode);
1147 void FormStructure::IdentifySections(bool has_author_specified_sections) {
1148 if (fields_.empty())
1151 if (!has_author_specified_sections) {
1152 // Name sections after the first field in the section.
1153 base::string16 current_section = fields_.front()->unique_name();
1155 // Keep track of the types we've seen in this section.
1156 std::set<ServerFieldType> seen_types;
1157 ServerFieldType previous_type = UNKNOWN_TYPE;
1159 for (std::vector<AutofillField*>::iterator field = fields_.begin();
1160 field != fields_.end(); ++field) {
1161 const ServerFieldType current_type = (*field)->Type().GetStorableType();
1163 bool already_saw_current_type = seen_types.count(current_type) > 0;
1165 // Forms often ask for multiple phone numbers -- e.g. both a daytime and
1166 // evening phone number. Our phone number detection is also generally a
1167 // little off. Hence, ignore this field type as a signal here.
1168 if (AutofillType(current_type).group() == PHONE_HOME)
1169 already_saw_current_type = false;
1171 // Some forms have adjacent fields of the same type. Two common examples:
1172 // * Forms with two email fields, where the second is meant to "confirm"
1174 // * Forms with a <select> menu for states in some countries, and a
1175 // freeform <input> field for states in other countries. (Usually,
1176 // only one of these two will be visible for any given choice of
1178 // Generally, adjacent fields of the same type belong in the same logical
1180 if (current_type == previous_type)
1181 already_saw_current_type = false;
1183 previous_type = current_type;
1185 if (current_type != UNKNOWN_TYPE && already_saw_current_type) {
1186 // We reached the end of a section, so start a new section.
1188 current_section = (*field)->unique_name();
1191 seen_types.insert(current_type);
1192 (*field)->set_section(UTF16ToUTF8(current_section));
1196 // Ensure that credit card and address fields are in separate sections.
1197 // This simplifies the section-aware logic in autofill_manager.cc.
1198 for (std::vector<AutofillField*>::iterator field = fields_.begin();
1199 field != fields_.end(); ++field) {
1200 FieldTypeGroup field_type_group = (*field)->Type().group();
1201 if (field_type_group == CREDIT_CARD)
1202 (*field)->set_section((*field)->section() + "-cc");
1204 (*field)->set_section((*field)->section() + "-default");
1208 } // namespace autofill