1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/autofill/content/renderer/form_autofill_util.h"
9 #include "base/command_line.h"
10 #include "base/logging.h"
11 #include "base/memory/scoped_vector.h"
12 #include "base/metrics/field_trial.h"
13 #include "base/strings/string_util.h"
14 #include "base/strings/utf_string_conversions.h"
15 #include "components/autofill/core/common/autofill_data_validation.h"
16 #include "components/autofill/core/common/autofill_switches.h"
17 #include "components/autofill/core/common/form_data.h"
18 #include "components/autofill/core/common/form_field_data.h"
19 #include "components/autofill/core/common/web_element_descriptor.h"
20 #include "third_party/WebKit/public/platform/WebString.h"
21 #include "third_party/WebKit/public/platform/WebVector.h"
22 #include "third_party/WebKit/public/web/WebDocument.h"
23 #include "third_party/WebKit/public/web/WebElement.h"
24 #include "third_party/WebKit/public/web/WebElementCollection.h"
25 #include "third_party/WebKit/public/web/WebExceptionCode.h"
26 #include "third_party/WebKit/public/web/WebFormControlElement.h"
27 #include "third_party/WebKit/public/web/WebFormElement.h"
28 #include "third_party/WebKit/public/web/WebInputElement.h"
29 #include "third_party/WebKit/public/web/WebLabelElement.h"
30 #include "third_party/WebKit/public/web/WebLocalFrame.h"
31 #include "third_party/WebKit/public/web/WebNode.h"
32 #include "third_party/WebKit/public/web/WebNodeList.h"
33 #include "third_party/WebKit/public/web/WebOptionElement.h"
34 #include "third_party/WebKit/public/web/WebSelectElement.h"
35 #include "third_party/WebKit/public/web/WebTextAreaElement.h"
37 using blink::WebDocument;
38 using blink::WebElement;
39 using blink::WebElementCollection;
40 using blink::WebExceptionCode;
41 using blink::WebFormControlElement;
42 using blink::WebFormElement;
43 using blink::WebFrame;
44 using blink::WebInputElement;
45 using blink::WebLabelElement;
47 using blink::WebNodeList;
48 using blink::WebOptionElement;
49 using blink::WebSelectElement;
50 using blink::WebTextAreaElement;
51 using blink::WebString;
52 using blink::WebVector;
57 // A bit field mask for FillForm functions to not fill some fields.
58 enum FieldFilterMask {
60 FILTER_DISABLED_ELEMENTS = 1 << 0,
61 FILTER_READONLY_ELEMENTS = 1 << 1,
62 FILTER_NON_FOCUSABLE_ELEMENTS = 1 << 2,
63 FILTER_ALL_NON_EDITIABLE_ELEMENTS = FILTER_DISABLED_ELEMENTS |
64 FILTER_READONLY_ELEMENTS |
65 FILTER_NON_FOCUSABLE_ELEMENTS,
68 bool IsOptionElement(const WebElement& element) {
69 CR_DEFINE_STATIC_LOCAL(WebString, kOption, ("option"));
70 return element.hasHTMLTagName(kOption);
73 bool IsScriptElement(const WebElement& element) {
74 CR_DEFINE_STATIC_LOCAL(WebString, kScript, ("script"));
75 return element.hasHTMLTagName(kScript);
78 bool IsNoScriptElement(const WebElement& element) {
79 CR_DEFINE_STATIC_LOCAL(WebString, kNoScript, ("noscript"));
80 return element.hasHTMLTagName(kNoScript);
83 bool HasTagName(const WebNode& node, const blink::WebString& tag) {
84 return node.isElementNode() && node.toConst<WebElement>().hasHTMLTagName(tag);
87 bool IsAutofillableElement(const WebFormControlElement& element) {
88 const WebInputElement* input_element = toWebInputElement(&element);
89 return IsAutofillableInputElement(input_element) ||
90 IsSelectElement(element) ||
91 IsTextAreaElement(element);
94 // Check whether the given field satisfies the REQUIRE_AUTOCOMPLETE requirement.
95 bool SatisfiesRequireAutocomplete(const WebInputElement& input_element) {
96 return input_element.autoComplete();
99 // Appends |suffix| to |prefix| so that any intermediary whitespace is collapsed
100 // to a single space. If |force_whitespace| is true, then the resulting string
101 // is guaranteed to have a space between |prefix| and |suffix|. Otherwise, the
102 // result includes a space only if |prefix| has trailing whitespace or |suffix|
103 // has leading whitespace.
105 // * CombineAndCollapseWhitespace("foo", "bar", false) -> "foobar"
106 // * CombineAndCollapseWhitespace("foo", "bar", true) -> "foo bar"
107 // * CombineAndCollapseWhitespace("foo ", "bar", false) -> "foo bar"
108 // * CombineAndCollapseWhitespace("foo", " bar", false) -> "foo bar"
109 // * CombineAndCollapseWhitespace("foo", " bar", true) -> "foo bar"
110 // * CombineAndCollapseWhitespace("foo ", " bar", false) -> "foo bar"
111 // * CombineAndCollapseWhitespace(" foo", "bar ", false) -> " foobar "
112 // * CombineAndCollapseWhitespace(" foo", "bar ", true) -> " foo bar "
113 const base::string16 CombineAndCollapseWhitespace(
114 const base::string16& prefix,
115 const base::string16& suffix,
116 bool force_whitespace) {
117 base::string16 prefix_trimmed;
118 base::TrimPositions prefix_trailing_whitespace =
119 base::TrimWhitespace(prefix, base::TRIM_TRAILING, &prefix_trimmed);
121 // Recursively compute the children's text.
122 base::string16 suffix_trimmed;
123 base::TrimPositions suffix_leading_whitespace =
124 base::TrimWhitespace(suffix, base::TRIM_LEADING, &suffix_trimmed);
126 if (prefix_trailing_whitespace || suffix_leading_whitespace ||
128 return prefix_trimmed + base::ASCIIToUTF16(" ") + suffix_trimmed;
130 return prefix_trimmed + suffix_trimmed;
134 // This is a helper function for the FindChildText() function (see below).
135 // Search depth is limited with the |depth| parameter.
136 base::string16 FindChildTextInner(const WebNode& node, int depth) {
137 if (depth <= 0 || node.isNull())
138 return base::string16();
140 // Skip over comments.
141 if (node.nodeType() == WebNode::CommentNode)
142 return FindChildTextInner(node.nextSibling(), depth - 1);
144 if (node.nodeType() != WebNode::ElementNode &&
145 node.nodeType() != WebNode::TextNode)
146 return base::string16();
148 // Ignore elements known not to contain inferable labels.
149 if (node.isElementNode()) {
150 const WebElement element = node.toConst<WebElement>();
151 if (IsOptionElement(element) ||
152 IsScriptElement(element) ||
153 IsNoScriptElement(element) ||
154 (element.isFormControlElement() &&
155 IsAutofillableElement(element.toConst<WebFormControlElement>()))) {
156 return base::string16();
160 // Extract the text exactly at this node.
161 base::string16 node_text = node.nodeValue();
163 // Recursively compute the children's text.
164 // Preserve inter-element whitespace separation.
165 base::string16 child_text = FindChildTextInner(node.firstChild(), depth - 1);
166 bool add_space = node.nodeType() == WebNode::TextNode && node_text.empty();
167 node_text = CombineAndCollapseWhitespace(node_text, child_text, add_space);
169 // Recursively compute the siblings' text.
170 // Again, preserve inter-element whitespace separation.
171 base::string16 sibling_text =
172 FindChildTextInner(node.nextSibling(), depth - 1);
173 add_space = node.nodeType() == WebNode::TextNode && node_text.empty();
174 node_text = CombineAndCollapseWhitespace(node_text, sibling_text, add_space);
179 // Returns the aggregated values of the descendants of |element| that are
180 // non-empty text nodes. This is a faster alternative to |innerText()| for
181 // performance critical operations. It does a full depth-first search so can be
182 // used when the structure is not directly known. However, unlike with
183 // |innerText()|, the search depth and breadth are limited to a fixed threshold.
184 // Whitespace is trimmed from text accumulated at descendant nodes.
185 base::string16 FindChildText(const WebNode& node) {
186 if (node.isTextNode())
187 return node.nodeValue();
189 WebNode child = node.firstChild();
191 const int kChildSearchDepth = 10;
192 base::string16 node_text = FindChildTextInner(child, kChildSearchDepth);
193 base::TrimWhitespace(node_text, base::TRIM_ALL, &node_text);
197 // Helper for |InferLabelForElement()| that infers a label, if possible, from
198 // a previous sibling of |element|,
199 // e.g. Some Text <input ...>
200 // or Some <span>Text</span> <input ...>
201 // or <p>Some Text</p><input ...>
202 // or <label>Some Text</label> <input ...>
203 // or Some Text <img><input ...>
204 // or <b>Some Text</b><br/> <input ...>.
205 base::string16 InferLabelFromPrevious(const WebFormControlElement& element) {
206 base::string16 inferred_label;
207 WebNode previous = element;
209 previous = previous.previousSibling();
210 if (previous.isNull())
213 // Skip over comments.
214 WebNode::NodeType node_type = previous.nodeType();
215 if (node_type == WebNode::CommentNode)
218 // Otherwise, only consider normal HTML elements and their contents.
219 if (node_type != WebNode::TextNode &&
220 node_type != WebNode::ElementNode)
223 // A label might be split across multiple "lightweight" nodes.
224 // Coalesce any text contained in multiple consecutive
225 // (a) plain text nodes or
226 // (b) inline HTML elements that are essentially equivalent to text nodes.
227 CR_DEFINE_STATIC_LOCAL(WebString, kBold, ("b"));
228 CR_DEFINE_STATIC_LOCAL(WebString, kStrong, ("strong"));
229 CR_DEFINE_STATIC_LOCAL(WebString, kSpan, ("span"));
230 CR_DEFINE_STATIC_LOCAL(WebString, kFont, ("font"));
231 if (previous.isTextNode() ||
232 HasTagName(previous, kBold) || HasTagName(previous, kStrong) ||
233 HasTagName(previous, kSpan) || HasTagName(previous, kFont)) {
234 base::string16 value = FindChildText(previous);
235 // A text node's value will be empty if it is for a line break.
236 bool add_space = previous.isTextNode() && value.empty();
238 CombineAndCollapseWhitespace(value, inferred_label, add_space);
242 // If we have identified a partial label and have reached a non-lightweight
243 // element, consider the label to be complete.
244 base::string16 trimmed_label;
245 base::TrimWhitespace(inferred_label, base::TRIM_ALL, &trimmed_label);
246 if (!trimmed_label.empty())
249 // <img> and <br> tags often appear between the input element and its
250 // label text, so skip over them.
251 CR_DEFINE_STATIC_LOCAL(WebString, kImage, ("img"));
252 CR_DEFINE_STATIC_LOCAL(WebString, kBreak, ("br"));
253 if (HasTagName(previous, kImage) || HasTagName(previous, kBreak))
256 // We only expect <p> and <label> tags to contain the full label text.
257 CR_DEFINE_STATIC_LOCAL(WebString, kPage, ("p"));
258 CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label"));
259 if (HasTagName(previous, kPage) || HasTagName(previous, kLabel))
260 inferred_label = FindChildText(previous);
265 base::TrimWhitespace(inferred_label, base::TRIM_ALL, &inferred_label);
266 return inferred_label;
269 // Helper for |InferLabelForElement()| that infers a label, if possible, from
270 // enclosing list item,
271 // e.g. <li>Some Text<input ...><input ...><input ...></tr>
272 base::string16 InferLabelFromListItem(const WebFormControlElement& element) {
273 WebNode parent = element.parentNode();
274 CR_DEFINE_STATIC_LOCAL(WebString, kListItem, ("li"));
275 while (!parent.isNull() && parent.isElementNode() &&
276 !parent.to<WebElement>().hasTagName(kListItem)) {
277 parent = parent.parentNode();
280 if (!parent.isNull() && HasTagName(parent, kListItem))
281 return FindChildText(parent);
283 return base::string16();
286 // Helper for |InferLabelForElement()| that infers a label, if possible, from
287 // surrounding table structure,
288 // e.g. <tr><td>Some Text</td><td><input ...></td></tr>
289 // or <tr><th>Some Text</th><td><input ...></td></tr>
290 // or <tr><td><b>Some Text</b></td><td><b><input ...></b></td></tr>
291 // or <tr><th><b>Some Text</b></th><td><b><input ...></b></td></tr>
292 base::string16 InferLabelFromTableColumn(const WebFormControlElement& element) {
293 CR_DEFINE_STATIC_LOCAL(WebString, kTableCell, ("td"));
294 WebNode parent = element.parentNode();
295 while (!parent.isNull() && parent.isElementNode() &&
296 !parent.to<WebElement>().hasTagName(kTableCell)) {
297 parent = parent.parentNode();
301 return base::string16();
303 // Check all previous siblings, skipping non-element nodes, until we find a
304 // non-empty text block.
305 base::string16 inferred_label;
306 WebNode previous = parent.previousSibling();
307 CR_DEFINE_STATIC_LOCAL(WebString, kTableHeader, ("th"));
308 while (inferred_label.empty() && !previous.isNull()) {
309 if (HasTagName(previous, kTableCell) || HasTagName(previous, kTableHeader))
310 inferred_label = FindChildText(previous);
312 previous = previous.previousSibling();
315 return inferred_label;
318 // Helper for |InferLabelForElement()| that infers a label, if possible, from
319 // surrounding table structure,
320 // e.g. <tr><td>Some Text</td></tr><tr><td><input ...></td></tr>
321 base::string16 InferLabelFromTableRow(const WebFormControlElement& element) {
322 CR_DEFINE_STATIC_LOCAL(WebString, kTableRow, ("tr"));
323 WebNode parent = element.parentNode();
324 while (!parent.isNull() && parent.isElementNode() &&
325 !parent.to<WebElement>().hasTagName(kTableRow)) {
326 parent = parent.parentNode();
330 return base::string16();
332 // Check all previous siblings, skipping non-element nodes, until we find a
333 // non-empty text block.
334 base::string16 inferred_label;
335 WebNode previous = parent.previousSibling();
336 while (inferred_label.empty() && !previous.isNull()) {
337 if (HasTagName(previous, kTableRow))
338 inferred_label = FindChildText(previous);
340 previous = previous.previousSibling();
343 return inferred_label;
346 // Helper for |InferLabelForElement()| that infers a label, if possible, from
347 // a surrounding div table,
348 // e.g. <div>Some Text<span><input ...></span></div>
349 // e.g. <div>Some Text</div><div><input ...></div>
350 base::string16 InferLabelFromDivTable(const WebFormControlElement& element) {
351 WebNode node = element.parentNode();
352 bool looking_for_parent = true;
354 // Search the sibling and parent <div>s until we find a candidate label.
355 base::string16 inferred_label;
356 CR_DEFINE_STATIC_LOCAL(WebString, kDiv, ("div"));
357 CR_DEFINE_STATIC_LOCAL(WebString, kTable, ("table"));
358 CR_DEFINE_STATIC_LOCAL(WebString, kFieldSet, ("fieldset"));
359 while (inferred_label.empty() && !node.isNull()) {
360 if (HasTagName(node, kDiv)) {
361 looking_for_parent = false;
362 inferred_label = FindChildText(node);
363 } else if (looking_for_parent &&
364 (HasTagName(node, kTable) || HasTagName(node, kFieldSet))) {
365 // If the element is in a table or fieldset, its label most likely is too.
369 if (node.previousSibling().isNull()) {
370 // If there are no more siblings, continue walking up the tree.
371 looking_for_parent = true;
374 if (looking_for_parent)
375 node = node.parentNode();
377 node = node.previousSibling();
380 return inferred_label;
383 // Helper for |InferLabelForElement()| that infers a label, if possible, from
384 // a surrounding definition list,
385 // e.g. <dl><dt>Some Text</dt><dd><input ...></dd></dl>
386 // e.g. <dl><dt><b>Some Text</b></dt><dd><b><input ...></b></dd></dl>
387 base::string16 InferLabelFromDefinitionList(
388 const WebFormControlElement& element) {
389 CR_DEFINE_STATIC_LOCAL(WebString, kDefinitionData, ("dd"));
390 WebNode parent = element.parentNode();
391 while (!parent.isNull() && parent.isElementNode() &&
392 !parent.to<WebElement>().hasTagName(kDefinitionData))
393 parent = parent.parentNode();
395 if (parent.isNull() || !HasTagName(parent, kDefinitionData))
396 return base::string16();
398 // Skip by any intervening text nodes.
399 WebNode previous = parent.previousSibling();
400 while (!previous.isNull() && previous.isTextNode())
401 previous = previous.previousSibling();
403 CR_DEFINE_STATIC_LOCAL(WebString, kDefinitionTag, ("dt"));
404 if (previous.isNull() || !HasTagName(previous, kDefinitionTag))
405 return base::string16();
407 return FindChildText(previous);
410 // Infers corresponding label for |element| from surrounding context in the DOM,
411 // e.g. the contents of the preceding <p> tag or text element.
412 base::string16 InferLabelForElement(const WebFormControlElement& element) {
413 base::string16 inferred_label = InferLabelFromPrevious(element);
414 if (!inferred_label.empty())
415 return inferred_label;
417 // If we didn't find a label, check for list item case.
418 inferred_label = InferLabelFromListItem(element);
419 if (!inferred_label.empty())
420 return inferred_label;
422 // If we didn't find a label, check for table cell case.
423 inferred_label = InferLabelFromTableColumn(element);
424 if (!inferred_label.empty())
425 return inferred_label;
427 // If we didn't find a label, check for table row case.
428 inferred_label = InferLabelFromTableRow(element);
429 if (!inferred_label.empty())
430 return inferred_label;
432 // If we didn't find a label, check for definition list case.
433 inferred_label = InferLabelFromDefinitionList(element);
434 if (!inferred_label.empty())
435 return inferred_label;
437 // If we didn't find a label, check for div table case.
438 return InferLabelFromDivTable(element);
441 // Fills |option_strings| with the values of the <option> elements present in
443 void GetOptionStringsFromElement(const WebSelectElement& select_element,
444 std::vector<base::string16>* option_values,
445 std::vector<base::string16>* option_contents) {
446 DCHECK(!select_element.isNull());
448 option_values->clear();
449 option_contents->clear();
450 WebVector<WebElement> list_items = select_element.listItems();
452 // Constrain the maximum list length to prevent a malicious site from DOS'ing
453 // the browser, without entirely breaking autocomplete for some extreme
454 // legitimate sites: http://crbug.com/49332 and http://crbug.com/363094
455 if (list_items.size() > kMaxListSize)
458 option_values->reserve(list_items.size());
459 option_contents->reserve(list_items.size());
460 for (size_t i = 0; i < list_items.size(); ++i) {
461 if (IsOptionElement(list_items[i])) {
462 const WebOptionElement option = list_items[i].toConst<WebOptionElement>();
463 option_values->push_back(option.value());
464 option_contents->push_back(option.text());
469 // The callback type used by |ForEachMatchingFormField()|.
470 typedef void (*Callback)(const FormFieldData&,
471 bool, /* is_initiating_element */
472 blink::WebFormControlElement*);
474 // For each autofillable field in |data| that matches a field in the |form|,
475 // the |callback| is invoked with the corresponding |form| field data.
476 void ForEachMatchingFormField(const WebFormElement& form_element,
477 const WebElement& initiating_element,
478 const FormData& data,
479 FieldFilterMask filters,
482 std::vector<WebFormControlElement> control_elements;
483 ExtractAutofillableElements(form_element, REQUIRE_AUTOCOMPLETE,
486 if (control_elements.size() != data.fields.size()) {
487 // This case should be reachable only for pathological websites and tests,
488 // which add or remove form fields while the user is interacting with the
493 // It's possible that the site has injected fields into the form after the
494 // page has loaded, so we can't assert that the size of the cached control
495 // elements is equal to the size of the fields in |form|. Fortunately, the
496 // one case in the wild where this happens, paypal.com signup form, the fields
497 // are appended to the end of the form and are not visible.
498 for (size_t i = 0; i < control_elements.size(); ++i) {
499 WebFormControlElement* element = &control_elements[i];
501 if (base::string16(element->nameForAutofill()) != data.fields[i].name) {
502 // This case should be reachable only for pathological websites, which
503 // rename form fields while the user is interacting with the Autofill
504 // popup. I (isherman) am not aware of any such websites, and so am
505 // optimistically including a NOTREACHED(). If you ever trip this check,
506 // please file a bug against me.
511 bool is_initiating_element = (*element == initiating_element);
513 // Only autofill empty fields and the field that initiated the filling,
514 // i.e. the field the user is currently editing and interacting with.
515 const WebInputElement* input_element = toWebInputElement(element);
516 if (!force_override && !is_initiating_element &&
517 ((IsAutofillableInputElement(input_element) ||
518 IsTextAreaElement(*element)) &&
519 !element->value().isEmpty()))
522 if (((filters & FILTER_DISABLED_ELEMENTS) && !element->isEnabled()) ||
523 ((filters & FILTER_READONLY_ELEMENTS) && element->isReadOnly()) ||
524 ((filters & FILTER_NON_FOCUSABLE_ELEMENTS) && !element->isFocusable()))
527 callback(data.fields[i], is_initiating_element, element);
531 // Sets the |field|'s value to the value in |data|.
532 // Also sets the "autofilled" attribute, causing the background to be yellow.
533 void FillFormField(const FormFieldData& data,
534 bool is_initiating_node,
535 blink::WebFormControlElement* field) {
537 if (data.value.empty())
540 if (!data.is_autofilled)
543 field->setAutofilled(true);
545 WebInputElement* input_element = toWebInputElement(field);
546 if (IsCheckableElement(input_element)) {
547 input_element->setChecked(data.is_checked, true);
549 base::string16 value = data.value;
550 if (IsTextInput(input_element) || IsMonthInput(input_element)) {
551 // If the maxlength attribute contains a negative value, maxLength()
552 // returns the default maxlength value.
553 value = value.substr(0, input_element->maxLength());
555 field->setValue(value, true);
558 if (is_initiating_node &&
559 ((IsTextInput(input_element) || IsMonthInput(input_element)) ||
560 IsTextAreaElement(*field))) {
561 int length = field->value().length();
562 field->setSelectionRange(length, length);
563 // Clear the current IME composition (the underline), if there is one.
564 field->document().frame()->unmarkText();
568 // Sets the |field|'s "suggested" (non JS visible) value to the value in |data|.
569 // Also sets the "autofilled" attribute, causing the background to be yellow.
570 void PreviewFormField(const FormFieldData& data,
571 bool is_initiating_node,
572 blink::WebFormControlElement* field) {
573 // Nothing to preview.
574 if (data.value.empty())
577 if (!data.is_autofilled)
580 // Preview input, textarea and select fields. For input fields, excludes
581 // checkboxes and radio buttons, as there is no provision for
582 // setSuggestedCheckedValue in WebInputElement.
583 WebInputElement* input_element = toWebInputElement(field);
584 if (IsTextInput(input_element) || IsMonthInput(input_element)) {
585 // If the maxlength attribute contains a negative value, maxLength()
586 // returns the default maxlength value.
587 input_element->setSuggestedValue(
588 data.value.substr(0, input_element->maxLength()));
589 input_element->setAutofilled(true);
590 } else if (IsTextAreaElement(*field) || IsSelectElement(*field)) {
591 field->setSuggestedValue(data.value);
592 field->setAutofilled(true);
595 if (is_initiating_node &&
596 (IsTextInput(input_element) || IsTextAreaElement(*field))) {
597 // Select the part of the text that the user didn't type.
598 int start = field->value().length();
599 int end = field->suggestedValue().length();
600 field->setSelectionRange(start, end);
604 std::string RetrievalMethodToString(
605 const WebElementDescriptor::RetrievalMethod& method) {
607 case WebElementDescriptor::CSS_SELECTOR:
608 return "CSS_SELECTOR";
609 case WebElementDescriptor::ID:
611 case WebElementDescriptor::NONE:
618 // Recursively checks whether |node| or any of its children have a non-empty
619 // bounding box. The recursion depth is bounded by |depth|.
620 bool IsWebNodeVisibleImpl(const blink::WebNode& node, const int depth) {
623 if (node.hasNonEmptyBoundingBox())
626 // The childNodes method is not a const method. Therefore it cannot be called
627 // on a const reference. Therefore we need a const cast.
628 const blink::WebNodeList& children =
629 const_cast<blink::WebNode&>(node).childNodes();
630 size_t length = children.length();
631 for (size_t i = 0; i < length; ++i) {
632 const blink::WebNode& item = children.item(i);
633 if (IsWebNodeVisibleImpl(item, depth - 1))
641 const size_t kMaxParseableFields = 200;
643 bool IsMonthInput(const WebInputElement* element) {
644 CR_DEFINE_STATIC_LOCAL(WebString, kMonth, ("month"));
645 return element && element->formControlType() == kMonth;
648 // All text fields, including password fields, should be extracted.
649 bool IsTextInput(const WebInputElement* element) {
650 return element && element->isTextField();
653 bool IsSelectElement(const WebFormControlElement& element) {
654 // Static for improved performance.
655 CR_DEFINE_STATIC_LOCAL(WebString, kSelectOne, ("select-one"));
656 return element.formControlType() == kSelectOne;
659 bool IsTextAreaElement(const WebFormControlElement& element) {
660 // Static for improved performance.
661 CR_DEFINE_STATIC_LOCAL(WebString, kTextArea, ("textarea"));
662 return element.formControlType() == kTextArea;
665 bool IsCheckableElement(const WebInputElement* element) {
669 return element->isCheckbox() || element->isRadioButton();
672 bool IsAutofillableInputElement(const WebInputElement* element) {
673 return IsTextInput(element) ||
674 IsMonthInput(element) ||
675 IsCheckableElement(element);
678 const base::string16 GetFormIdentifier(const WebFormElement& form) {
679 base::string16 identifier = form.name();
680 CR_DEFINE_STATIC_LOCAL(WebString, kId, ("id"));
681 if (identifier.empty())
682 identifier = form.getAttribute(kId);
687 bool IsWebNodeVisible(const blink::WebNode& node) {
688 // In the bug http://crbug.com/237216 the form's bounding box is empty
689 // however the form has non empty children. Thus we need to look at the
691 int kNodeSearchDepth = 2;
692 return IsWebNodeVisibleImpl(node, kNodeSearchDepth);
695 bool ClickElement(const WebDocument& document,
696 const WebElementDescriptor& element_descriptor) {
697 WebString web_descriptor = WebString::fromUTF8(element_descriptor.descriptor);
698 blink::WebElement element;
700 switch (element_descriptor.retrieval_method) {
701 case WebElementDescriptor::CSS_SELECTOR: {
702 WebExceptionCode ec = 0;
703 element = document.querySelector(web_descriptor, ec);
705 DVLOG(1) << "Query selector failed. Error code: " << ec << ".";
708 case WebElementDescriptor::ID:
709 element = document.getElementById(web_descriptor);
711 case WebElementDescriptor::NONE:
715 if (element.isNull()) {
716 DVLOG(1) << "Could not find "
717 << element_descriptor.descriptor
719 << RetrievalMethodToString(element_descriptor.retrieval_method)
724 element.simulateClick();
728 // Fills |autofillable_elements| with all the auto-fillable form control
729 // elements in |form_element|.
730 void ExtractAutofillableElements(
731 const WebFormElement& form_element,
732 RequirementsMask requirements,
733 std::vector<WebFormControlElement>* autofillable_elements) {
734 WebVector<WebFormControlElement> control_elements;
735 form_element.getFormControlElements(control_elements);
737 autofillable_elements->clear();
738 for (size_t i = 0; i < control_elements.size(); ++i) {
739 WebFormControlElement element = control_elements[i];
740 if (!IsAutofillableElement(element))
743 if (requirements & REQUIRE_AUTOCOMPLETE) {
744 // TODO(isherman): WebKit currently doesn't handle the autocomplete
745 // attribute for select or textarea elements, but it probably should.
746 WebInputElement* input_element = toWebInputElement(&control_elements[i]);
747 if (IsAutofillableInputElement(input_element) &&
748 !SatisfiesRequireAutocomplete(*input_element))
752 autofillable_elements->push_back(element);
756 void WebFormControlElementToFormField(const WebFormControlElement& element,
757 ExtractMask extract_mask,
758 FormFieldData* field) {
760 DCHECK(!element.isNull());
761 CR_DEFINE_STATIC_LOCAL(WebString, kAutocomplete, ("autocomplete"));
763 // The label is not officially part of a WebFormControlElement; however, the
764 // labels for all form control elements are scraped from the DOM and set in
765 // WebFormElementToFormData.
766 field->name = element.nameForAutofill();
767 field->form_control_type = base::UTF16ToUTF8(element.formControlType());
768 field->autocomplete_attribute =
769 base::UTF16ToUTF8(element.getAttribute(kAutocomplete));
770 if (field->autocomplete_attribute.size() > kMaxDataLength) {
771 // Discard overly long attribute values to avoid DOS-ing the browser
772 // process. However, send over a default string to indicate that the
773 // attribute was present.
774 field->autocomplete_attribute = "x-max-data-length-exceeded";
777 if (!IsAutofillableElement(element))
780 const WebInputElement* input_element = toWebInputElement(&element);
781 if (IsAutofillableInputElement(input_element) ||
782 IsTextAreaElement(element)) {
783 field->is_autofilled = element.isAutofilled();
784 field->is_focusable = element.isFocusable();
785 field->should_autocomplete = element.autoComplete();
786 field->text_direction = element.directionForFormData() ==
787 "rtl" ? base::i18n::RIGHT_TO_LEFT : base::i18n::LEFT_TO_RIGHT;
790 if (IsAutofillableInputElement(input_element)) {
791 if (IsTextInput(input_element))
792 field->max_length = input_element->maxLength();
794 field->is_checkable = IsCheckableElement(input_element);
795 field->is_checked = input_element->isChecked();
796 } else if (IsTextAreaElement(element)) {
797 // Nothing more to do in this case.
798 } else if (extract_mask & EXTRACT_OPTIONS) {
799 // Set option strings on the field if available.
800 DCHECK(IsSelectElement(element));
801 const WebSelectElement select_element = element.toConst<WebSelectElement>();
802 GetOptionStringsFromElement(select_element,
803 &field->option_values,
804 &field->option_contents);
807 if (!(extract_mask & EXTRACT_VALUE))
810 base::string16 value = element.value();
812 if (IsSelectElement(element)) {
813 const WebSelectElement select_element = element.toConst<WebSelectElement>();
814 // Convert the |select_element| value to text if requested.
815 if (extract_mask & EXTRACT_OPTION_TEXT) {
816 WebVector<WebElement> list_items = select_element.listItems();
817 for (size_t i = 0; i < list_items.size(); ++i) {
818 if (IsOptionElement(list_items[i])) {
819 const WebOptionElement option_element =
820 list_items[i].toConst<WebOptionElement>();
821 if (option_element.value() == value) {
822 value = option_element.text();
830 // Constrain the maximum data length to prevent a malicious site from DOS'ing
831 // the browser: http://crbug.com/49332
832 if (value.size() > kMaxDataLength)
833 value = value.substr(0, kMaxDataLength);
835 field->value = value;
838 bool WebFormElementToFormData(
839 const blink::WebFormElement& form_element,
840 const blink::WebFormControlElement& form_control_element,
841 RequirementsMask requirements,
842 ExtractMask extract_mask,
844 FormFieldData* field) {
845 CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label"));
846 CR_DEFINE_STATIC_LOCAL(WebString, kFor, ("for"));
847 CR_DEFINE_STATIC_LOCAL(WebString, kHidden, ("hidden"));
849 const WebFrame* frame = form_element.document().frame();
853 if (requirements & REQUIRE_AUTOCOMPLETE && !form_element.autoComplete())
856 form->name = GetFormIdentifier(form_element);
857 form->method = form_element.method();
858 form->origin = frame->document().url();
859 form->action = frame->document().completeURL(form_element.action());
860 form->user_submitted = form_element.wasUserSubmitted();
862 // If the completed URL is not valid, just use the action we get from
864 if (!form->action.is_valid())
865 form->action = GURL(form_element.action());
867 // A map from a FormFieldData's name to the FormFieldData itself.
868 std::map<base::string16, FormFieldData*> name_map;
870 // The extracted FormFields. We use pointers so we can store them in
872 ScopedVector<FormFieldData> form_fields;
874 WebVector<WebFormControlElement> control_elements;
875 form_element.getFormControlElements(control_elements);
877 // A vector of bools that indicate whether each field in the form meets the
878 // requirements and thus will be in the resulting |form|.
879 std::vector<bool> fields_extracted(control_elements.size(), false);
881 for (size_t i = 0; i < control_elements.size(); ++i) {
882 const WebFormControlElement& control_element = control_elements[i];
884 if (!IsAutofillableElement(control_element))
887 const WebInputElement* input_element = toWebInputElement(&control_element);
888 if (requirements & REQUIRE_AUTOCOMPLETE &&
889 IsAutofillableInputElement(input_element) &&
890 !SatisfiesRequireAutocomplete(*input_element))
893 // Create a new FormFieldData, fill it out and map it to the field's name.
894 FormFieldData* form_field = new FormFieldData;
895 WebFormControlElementToFormField(control_element, extract_mask, form_field);
896 form_fields.push_back(form_field);
897 // TODO(jhawkins): A label element is mapped to a form control element's id.
898 // field->name() will contain the id only if the name does not exist. Add
899 // an id() method to WebFormControlElement and use that here.
900 name_map[form_field->name] = form_field;
901 fields_extracted[i] = true;
904 // If we failed to extract any fields, give up. Also, to avoid overly
905 // expensive computation, we impose a maximum number of allowable fields.
906 if (form_fields.empty() || form_fields.size() > kMaxParseableFields)
909 // Loop through the label elements inside the form element. For each label
910 // element, get the corresponding form control element, use the form control
911 // element's name as a key into the <name, FormFieldData> map to find the
912 // previously created FormFieldData and set the FormFieldData's label to the
913 // label.firstChild().nodeValue() of the label element.
914 WebElementCollection labels = form_element.getElementsByTagName(kLabel);
915 DCHECK(!labels.isNull());
916 for (WebElement item = labels.firstItem(); !item.isNull();
917 item = labels.nextItem()) {
918 WebLabelElement label = item.to<WebLabelElement>();
919 WebFormControlElement field_element =
920 label.correspondingControl().to<WebFormControlElement>();
922 base::string16 element_name;
923 if (field_element.isNull()) {
924 // Sometimes site authors will incorrectly specify the corresponding
925 // field element's name rather than its id, so we compensate here.
926 element_name = label.getAttribute(kFor);
928 !field_element.isFormControlElement() ||
929 field_element.formControlType() == kHidden) {
932 element_name = field_element.nameForAutofill();
935 std::map<base::string16, FormFieldData*>::iterator iter =
936 name_map.find(element_name);
937 if (iter != name_map.end()) {
938 base::string16 label_text = FindChildText(label);
940 // Concatenate labels because some sites might have multiple label
942 if (!iter->second->label.empty() && !label_text.empty())
943 iter->second->label += base::ASCIIToUTF16(" ");
944 iter->second->label += label_text;
948 // Loop through the form control elements, extracting the label text from
949 // the DOM. We use the |fields_extracted| vector to make sure we assign the
950 // extracted label to the correct field, as it's possible |form_fields| will
951 // not contain all of the elements in |control_elements|.
952 for (size_t i = 0, field_idx = 0;
953 i < control_elements.size() && field_idx < form_fields.size(); ++i) {
954 // This field didn't meet the requirements, so don't try to find a label
956 if (!fields_extracted[i])
959 const WebFormControlElement& control_element = control_elements[i];
960 if (form_fields[field_idx]->label.empty())
961 form_fields[field_idx]->label = InferLabelForElement(control_element);
963 if (field && form_control_element == control_element)
964 *field = *form_fields[field_idx];
969 // Copy the created FormFields into the resulting FormData object.
970 for (ScopedVector<FormFieldData>::const_iterator iter = form_fields.begin();
971 iter != form_fields.end(); ++iter) {
972 form->fields.push_back(**iter);
978 bool FindFormAndFieldForFormControlElement(const WebFormControlElement& element,
980 FormFieldData* field,
981 RequirementsMask requirements) {
982 if (!IsAutofillableElement(element))
985 const WebFormElement form_element = element.form();
986 if (form_element.isNull())
989 ExtractMask extract_mask =
990 static_cast<ExtractMask>(EXTRACT_VALUE | EXTRACT_OPTIONS);
991 return WebFormElementToFormData(form_element,
999 void FillForm(const FormData& form, const WebFormControlElement& element) {
1000 WebFormElement form_element = element.form();
1001 if (form_element.isNull())
1004 ForEachMatchingFormField(form_element,
1007 FILTER_ALL_NON_EDITIABLE_ELEMENTS,
1008 false, /* dont force override */
1012 void FillFormIncludingNonFocusableElements(const FormData& form_data,
1013 const WebFormElement& form_element) {
1014 if (form_element.isNull())
1017 FieldFilterMask filter_mask = static_cast<FieldFilterMask>(
1018 FILTER_DISABLED_ELEMENTS | FILTER_READONLY_ELEMENTS);
1019 ForEachMatchingFormField(form_element,
1023 true, /* force override */
1027 void FillFormForAllElements(const FormData& form_data,
1028 const WebFormElement& form_element) {
1029 if (form_element.isNull())
1032 ForEachMatchingFormField(form_element,
1036 true, /* force override */
1040 void PreviewForm(const FormData& form, const WebFormControlElement& element) {
1041 WebFormElement form_element = element.form();
1042 if (form_element.isNull())
1045 ForEachMatchingFormField(form_element,
1048 FILTER_ALL_NON_EDITIABLE_ELEMENTS,
1049 false, /* dont force override */
1053 bool ClearPreviewedFormWithElement(const WebFormControlElement& element,
1054 bool was_autofilled) {
1055 WebFormElement form_element = element.form();
1056 if (form_element.isNull())
1059 std::vector<WebFormControlElement> control_elements;
1060 ExtractAutofillableElements(form_element, REQUIRE_AUTOCOMPLETE,
1062 for (size_t i = 0; i < control_elements.size(); ++i) {
1063 // There might be unrelated elements in this form which have already been
1064 // auto-filled. For example, the user might have already filled the address
1065 // part of a form and now be dealing with the credit card section. We only
1066 // want to reset the auto-filled status for fields that were previewed.
1067 WebFormControlElement control_element = control_elements[i];
1069 // Only text input, textarea and select elements can be previewed.
1070 WebInputElement* input_element = toWebInputElement(&control_element);
1071 if (!IsTextInput(input_element) &&
1072 !IsMonthInput(input_element) &&
1073 !IsTextAreaElement(control_element) &&
1074 !IsSelectElement(control_element))
1077 // If the element is not auto-filled, we did not preview it,
1078 // so there is nothing to reset.
1079 if(!control_element.isAutofilled())
1082 if ((IsTextInput(input_element) ||
1083 IsMonthInput(input_element) ||
1084 IsTextAreaElement(control_element) ||
1085 IsSelectElement(control_element)) &&
1086 control_element.suggestedValue().isEmpty())
1089 // Clear the suggested value. For the initiating node, also restore the
1091 if (IsTextInput(input_element) || IsMonthInput(input_element) ||
1092 IsTextAreaElement(control_element)) {
1093 control_element.setSuggestedValue(WebString());
1094 bool is_initiating_node = (element == control_element);
1095 if (is_initiating_node) {
1096 control_element.setAutofilled(was_autofilled);
1097 // Clearing the suggested value in the focused node (above) can cause
1098 // selection to be lost. We force selection range to restore the text
1100 int length = control_element.value().length();
1101 control_element.setSelectionRange(length, length);
1103 control_element.setAutofilled(false);
1105 } else if (IsSelectElement(control_element)) {
1106 control_element.setSuggestedValue(WebString());
1107 control_element.setAutofilled(false);
1114 bool FormWithElementIsAutofilled(const WebInputElement& element) {
1115 WebFormElement form_element = element.form();
1116 if (form_element.isNull())
1119 std::vector<WebFormControlElement> control_elements;
1120 ExtractAutofillableElements(form_element, REQUIRE_AUTOCOMPLETE,
1122 for (size_t i = 0; i < control_elements.size(); ++i) {
1123 WebInputElement* input_element = toWebInputElement(&control_elements[i]);
1124 if (!IsAutofillableInputElement(input_element))
1127 if (input_element->isAutofilled())
1134 bool IsWebpageEmpty(const blink::WebFrame* frame) {
1135 blink::WebDocument document = frame->document();
1137 return IsWebElementEmpty(document.head()) &&
1138 IsWebElementEmpty(document.body());
1141 bool IsWebElementEmpty(const blink::WebElement& element) {
1142 // This array contains all tags which can be present in an empty page.
1143 const char* const kAllowedValue[] = {
1148 const size_t kAllowedValueLength = arraysize(kAllowedValue);
1150 if (element.isNull())
1152 // The childNodes method is not a const method. Therefore it cannot be called
1153 // on a const reference. Therefore we need a const cast.
1154 const blink::WebNodeList& children =
1155 const_cast<blink::WebElement&>(element).childNodes();
1156 for (size_t i = 0; i < children.length(); ++i) {
1157 const blink::WebNode& item = children.item(i);
1159 if (item.isTextNode() &&
1160 !base::ContainsOnlyChars(item.nodeValue().utf8(),
1161 base::kWhitespaceASCII))
1164 // We ignore all other items with names which begin with
1165 // the character # because they are not html tags.
1166 if (item.nodeName().utf8()[0] == '#')
1169 bool tag_is_allowed = false;
1170 // Test if the item name is in the kAllowedValue array
1171 for (size_t allowed_value_index = 0;
1172 allowed_value_index < kAllowedValueLength; ++allowed_value_index) {
1173 if (HasTagName(item,
1174 WebString::fromUTF8(kAllowedValue[allowed_value_index]))) {
1175 tag_is_allowed = true;
1179 if (!tag_is_allowed)
1185 gfx::RectF GetScaledBoundingBox(float scale, WebFormControlElement* element) {
1186 gfx::Rect bounding_box(element->boundsInViewportSpace());
1187 return gfx::RectF(bounding_box.x() * scale,
1188 bounding_box.y() * scale,
1189 bounding_box.width() * scale,
1190 bounding_box.height() * scale);
1193 } // namespace autofill