1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
6 #include "base/command_line.h"
7 #include "base/compiler_specific.h"
8 #include "base/containers/hash_tables.h"
9 #include "base/file_util.h"
10 #include "base/files/file_path.h"
11 #include "base/strings/string_util.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "content/public/common/content_switches.h"
14 #include "content/public/renderer/render_view.h"
15 #include "content/public/renderer/render_view_observer.h"
16 #include "content/public/test/test_utils.h"
17 #include "content/renderer/savable_resources.h"
18 #include "content/shell/browser/shell.h"
19 #include "content/test/content_browser_test.h"
20 #include "content/test/content_browser_test_utils.h"
21 #include "net/base/net_util.h"
22 #include "net/url_request/url_request_context.h"
23 #include "third_party/WebKit/public/platform/WebCString.h"
24 #include "third_party/WebKit/public/platform/WebData.h"
25 #include "third_party/WebKit/public/platform/WebString.h"
26 #include "third_party/WebKit/public/platform/WebURL.h"
27 #include "third_party/WebKit/public/platform/WebVector.h"
28 #include "third_party/WebKit/public/web/WebDocument.h"
29 #include "third_party/WebKit/public/web/WebElement.h"
30 #include "third_party/WebKit/public/web/WebElementCollection.h"
31 #include "third_party/WebKit/public/web/WebFrame.h"
32 #include "third_party/WebKit/public/web/WebNode.h"
33 #include "third_party/WebKit/public/web/WebNodeList.h"
34 #include "third_party/WebKit/public/web/WebPageSerializer.h"
35 #include "third_party/WebKit/public/web/WebPageSerializerClient.h"
36 #include "third_party/WebKit/public/web/WebView.h"
38 using blink::WebCString;
40 using blink::WebDocument;
41 using blink::WebElement;
42 using blink::WebElementCollection;
43 using blink::WebFrame;
45 using blink::WebNodeList;
46 using blink::WebPageSerializer;
47 using blink::WebPageSerializerClient;
48 using blink::WebString;
51 using blink::WebVector;
55 // The first RenderFrame is routing ID 1, and the first RenderView is 2.
56 const int kRenderViewRoutingId = 2;
62 // Iterate recursively over sub-frames to find one with with a given url.
63 WebFrame* FindSubFrameByURL(WebView* web_view, const GURL& url) {
64 if (!web_view->mainFrame())
67 std::vector<WebFrame*> stack;
68 stack.push_back(web_view->mainFrame());
70 while (!stack.empty()) {
71 WebFrame* current_frame = stack.back();
73 if (GURL(current_frame->document().url()) == url)
75 WebElementCollection all = current_frame->document().all();
76 for (WebElement element = all.firstItem();
77 !element.isNull(); element = all.nextItem()) {
78 // Check frame tag and iframe tag
79 if (!element.hasTagName("frame") && !element.hasTagName("iframe"))
81 WebFrame* sub_frame = WebFrame::fromFrameOwnerElement(element);
83 stack.push_back(sub_frame);
89 // Helper function that test whether the first node in the doc is a doc type
91 bool HasDocType(const WebDocument& doc) {
92 WebNode node = doc.firstChild();
95 return node.nodeType() == WebNode::DocumentTypeNode;
98 // Helper function for checking whether input node is META tag. Return true
99 // means it is META element, otherwise return false. The parameter charset_info
100 // return actual charset info if the META tag has charset declaration.
101 bool IsMetaElement(const WebNode& node, std::string& charset_info) {
102 if (!node.isElementNode())
104 const WebElement meta = node.toConst<WebElement>();
105 if (!meta.hasTagName("meta"))
107 charset_info.erase(0, charset_info.length());
108 // Check the META charset declaration.
109 WebString httpEquiv = meta.getAttribute("http-equiv");
110 if (LowerCaseEqualsASCII(httpEquiv, "content-type")) {
111 std::string content = meta.getAttribute("content").utf8();
112 int pos = content.find("charset", 0);
114 // Add a dummy charset declaration to charset_info, which indicates this
115 // META tag has charset declaration although we do not get correct value
117 charset_info.append("has-charset-declaration");
118 int remaining_length = content.length() - pos - 7;
119 if (!remaining_length)
121 int start_pos = pos + 7;
123 while (remaining_length--)
124 if (content[start_pos++] == L'=')
126 // Skip beginning space.
127 while (remaining_length) {
128 if (content[start_pos] > 0x0020)
133 if (!remaining_length)
135 int end_pos = start_pos;
136 // Now we find out the start point of charset info. Search the end point.
137 while (remaining_length--) {
138 if (content[end_pos] <= 0x0020 || content[end_pos] == L';')
142 // Get actual charset info.
143 charset_info = content.substr(start_pos, end_pos - start_pos);
150 class LoadObserver : public RenderViewObserver {
152 LoadObserver(RenderView* render_view, const base::Closure& quit_closure)
153 : RenderViewObserver(render_view),
154 quit_closure_(quit_closure) {}
156 virtual void DidFinishLoad(blink::WebFrame* frame) OVERRIDE {
157 if (frame == render_view()->GetWebView()->mainFrame())
162 base::Closure quit_closure_;
165 class DomSerializerTests : public ContentBrowserTest,
166 public WebPageSerializerClient {
169 : serialized_(false),
170 local_directory_name_(FILE_PATH_LITERAL("./dummy_files/")) {}
172 virtual void SetUpCommandLine(CommandLine* command_line) OVERRIDE {
173 command_line->AppendSwitch(switches::kSingleProcess);
175 // Don't want to try to create a GPU process.
176 command_line->AppendSwitch(switches::kDisableAcceleratedCompositing);
180 // DomSerializerDelegate.
181 virtual void didSerializeDataForFrame(const WebURL& frame_web_url,
182 const WebCString& data,
183 PageSerializationStatus status) {
185 GURL frame_url(frame_web_url);
186 // If the all frames are finished saving, check all finish status
187 if (status == WebPageSerializerClient::AllFramesAreFinished) {
188 SerializationFinishStatusMap::iterator it =
189 serialization_finish_status_.begin();
190 for (; it != serialization_finish_status_.end(); ++it)
191 ASSERT_TRUE(it->second);
196 // Check finish status of current frame.
197 SerializationFinishStatusMap::iterator it =
198 serialization_finish_status_.find(frame_url.spec());
199 // New frame, set initial status as false.
200 if (it == serialization_finish_status_.end())
201 serialization_finish_status_[frame_url.spec()] = false;
203 it = serialization_finish_status_.find(frame_url.spec());
204 ASSERT_TRUE(it != serialization_finish_status_.end());
205 // In process frame, finish status should be false.
206 ASSERT_FALSE(it->second);
208 // Add data to corresponding frame's content.
209 serialized_frame_map_[frame_url.spec()] += data.data();
211 // Current frame is completed saving, change the finish status.
212 if (status == WebPageSerializerClient::CurrentFrameIsFinished)
216 bool HasSerializedFrame(const GURL& frame_url) {
217 return serialized_frame_map_.find(frame_url.spec()) !=
218 serialized_frame_map_.end();
221 const std::string& GetSerializedContentForFrame(
222 const GURL& frame_url) {
223 return serialized_frame_map_[frame_url.spec()];
226 RenderView* GetRenderView() {
227 // We could have the test on the UI thread get the WebContent's routing ID,
228 // but we know this will be the first RV so skip that and just hardcode it.
229 return RenderView::FromRoutingID(kRenderViewRoutingId);
232 WebView* GetWebView() {
233 return GetRenderView()->GetWebView();
236 WebFrame* GetMainFrame() {
237 return GetWebView()->mainFrame();
240 // Load web page according to input content and relative URLs within
242 void LoadContents(const std::string& contents,
243 const GURL& base_url,
244 const WebString encoding_info) {
245 scoped_refptr<MessageLoopRunner> runner = new MessageLoopRunner;
246 LoadObserver observer(GetRenderView(), runner->QuitClosure());
248 // If input encoding is empty, use UTF-8 as default encoding.
249 if (encoding_info.isEmpty()) {
250 GetMainFrame()->loadHTMLString(contents, base_url);
252 WebData data(contents.data(), contents.length());
254 // Do not use WebFrame.LoadHTMLString because it assumes that input
255 // html contents use UTF-8 encoding.
256 // TODO(darin): This should use WebFrame::loadData.
257 WebFrame* web_frame = GetMainFrame();
259 ASSERT_TRUE(web_frame != NULL);
261 web_frame->loadData(data, "text/html", encoding_info, base_url);
267 // Serialize page DOM according to specific page URL. The parameter
268 // recursive_serialization indicates whether we will serialize all
270 void SerializeDomForURL(const GURL& page_url,
271 bool recursive_serialization) {
272 // Find corresponding WebFrame according to page_url.
273 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), page_url);
274 ASSERT_TRUE(web_frame != NULL);
275 WebVector<WebURL> links;
276 links.assign(&page_url, 1);
277 WebString file_path =
278 base::FilePath(FILE_PATH_LITERAL("c:\\dummy.htm")).AsUTF16Unsafe();
279 WebVector<WebString> local_paths;
280 local_paths.assign(&file_path, 1);
281 // Start serializing DOM.
282 bool result = WebPageSerializer::serialize(web_frame,
283 recursive_serialization,
284 static_cast<WebPageSerializerClient*>(this),
287 local_directory_name_.AsUTF16Unsafe());
289 ASSERT_TRUE(serialized_);
292 void SerializeHTMLDOMWithDocTypeOnRenderer(const GURL& file_url) {
293 // Make sure original contents have document type.
294 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url);
295 ASSERT_TRUE(web_frame != NULL);
296 WebDocument doc = web_frame->document();
297 ASSERT_TRUE(HasDocType(doc));
299 SerializeDomForURL(file_url, false);
300 // Load the serialized contents.
301 ASSERT_TRUE(HasSerializedFrame(file_url));
302 const std::string& serialized_contents =
303 GetSerializedContentForFrame(file_url);
304 LoadContents(serialized_contents, file_url,
305 web_frame->document().encoding());
306 // Make sure serialized contents still have document type.
307 web_frame = GetMainFrame();
308 doc = web_frame->document();
309 ASSERT_TRUE(HasDocType(doc));
312 void SerializeHTMLDOMWithoutDocTypeOnRenderer(const GURL& file_url) {
313 // Make sure original contents do not have document type.
314 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url);
315 ASSERT_TRUE(web_frame != NULL);
316 WebDocument doc = web_frame->document();
317 ASSERT_TRUE(!HasDocType(doc));
319 SerializeDomForURL(file_url, false);
320 // Load the serialized contents.
321 ASSERT_TRUE(HasSerializedFrame(file_url));
322 const std::string& serialized_contents =
323 GetSerializedContentForFrame(file_url);
324 LoadContents(serialized_contents, file_url,
325 web_frame->document().encoding());
326 // Make sure serialized contents do not have document type.
327 web_frame = GetMainFrame();
328 doc = web_frame->document();
329 ASSERT_TRUE(!HasDocType(doc));
332 void SerializeXMLDocWithBuiltInEntitiesOnRenderer(
333 const GURL& xml_file_url, const std::string& original_contents) {
335 SerializeDomForURL(xml_file_url, false);
336 // Compare the serialized contents with original contents.
337 ASSERT_TRUE(HasSerializedFrame(xml_file_url));
338 const std::string& serialized_contents =
339 GetSerializedContentForFrame(xml_file_url);
340 ASSERT_EQ(original_contents, serialized_contents);
343 void SerializeHTMLDOMWithAddingMOTWOnRenderer(
344 const GURL& file_url, const std::string& original_contents) {
345 // Make sure original contents does not have MOTW;
346 std::string motw_declaration =
347 WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8();
348 ASSERT_FALSE(motw_declaration.empty());
349 // The encoding of original contents is ISO-8859-1, so we convert the MOTW
350 // declaration to ASCII and search whether original contents has it or not.
351 ASSERT_TRUE(std::string::npos == original_contents.find(motw_declaration));
354 SerializeDomForURL(file_url, false);
355 // Make sure the serialized contents have MOTW ;
356 ASSERT_TRUE(HasSerializedFrame(file_url));
357 const std::string& serialized_contents =
358 GetSerializedContentForFrame(file_url);
359 ASSERT_FALSE(std::string::npos ==
360 serialized_contents.find(motw_declaration));
363 void SerializeHTMLDOMWithNoMetaCharsetInOriginalDocOnRenderer(
364 const GURL& file_url) {
365 // Make sure there is no META charset declaration in original document.
366 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url);
367 ASSERT_TRUE(web_frame != NULL);
368 WebDocument doc = web_frame->document();
369 ASSERT_TRUE(doc.isHTMLDocument());
370 WebElement head_element = doc.head();
371 ASSERT_TRUE(!head_element.isNull());
372 // Go through all children of HEAD element.
373 for (WebNode child = head_element.firstChild(); !child.isNull();
374 child = child.nextSibling()) {
375 std::string charset_info;
376 if (IsMetaElement(child, charset_info))
377 ASSERT_TRUE(charset_info.empty());
380 SerializeDomForURL(file_url, false);
382 // Load the serialized contents.
383 ASSERT_TRUE(HasSerializedFrame(file_url));
384 const std::string& serialized_contents =
385 GetSerializedContentForFrame(file_url);
386 LoadContents(serialized_contents, file_url,
387 web_frame->document().encoding());
388 // Make sure the first child of HEAD element is META which has charset
389 // declaration in serialized contents.
390 web_frame = GetMainFrame();
391 ASSERT_TRUE(web_frame != NULL);
392 doc = web_frame->document();
393 ASSERT_TRUE(doc.isHTMLDocument());
394 head_element = doc.head();
395 ASSERT_TRUE(!head_element.isNull());
396 WebNode meta_node = head_element.firstChild();
397 ASSERT_TRUE(!meta_node.isNull());
398 // Get meta charset info.
399 std::string charset_info2;
400 ASSERT_TRUE(IsMetaElement(meta_node, charset_info2));
401 ASSERT_TRUE(!charset_info2.empty());
402 ASSERT_EQ(charset_info2,
403 std::string(web_frame->document().encoding().utf8()));
405 // Make sure no more additional META tags which have charset declaration.
406 for (WebNode child = meta_node.nextSibling(); !child.isNull();
407 child = child.nextSibling()) {
408 std::string charset_info;
409 if (IsMetaElement(child, charset_info))
410 ASSERT_TRUE(charset_info.empty());
414 void SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDocOnRenderer(
415 const GURL& file_url) {
416 // Make sure there are multiple META charset declarations in original
418 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url);
419 ASSERT_TRUE(web_frame != NULL);
420 WebDocument doc = web_frame->document();
421 ASSERT_TRUE(doc.isHTMLDocument());
422 WebElement head_ele = doc.head();
423 ASSERT_TRUE(!head_ele.isNull());
424 // Go through all children of HEAD element.
425 int charset_declaration_count = 0;
426 for (WebNode child = head_ele.firstChild(); !child.isNull();
427 child = child.nextSibling()) {
428 std::string charset_info;
429 if (IsMetaElement(child, charset_info) && !charset_info.empty())
430 charset_declaration_count++;
432 // The original doc has more than META tags which have charset declaration.
433 ASSERT_TRUE(charset_declaration_count > 1);
436 SerializeDomForURL(file_url, false);
438 // Load the serialized contents.
439 ASSERT_TRUE(HasSerializedFrame(file_url));
440 const std::string& serialized_contents =
441 GetSerializedContentForFrame(file_url);
442 LoadContents(serialized_contents, file_url,
443 web_frame->document().encoding());
444 // Make sure only first child of HEAD element is META which has charset
445 // declaration in serialized contents.
446 web_frame = GetMainFrame();
447 ASSERT_TRUE(web_frame != NULL);
448 doc = web_frame->document();
449 ASSERT_TRUE(doc.isHTMLDocument());
450 head_ele = doc.head();
451 ASSERT_TRUE(!head_ele.isNull());
452 WebNode meta_node = head_ele.firstChild();
453 ASSERT_TRUE(!meta_node.isNull());
454 // Get meta charset info.
455 std::string charset_info2;
456 ASSERT_TRUE(IsMetaElement(meta_node, charset_info2));
457 ASSERT_TRUE(!charset_info2.empty());
458 ASSERT_EQ(charset_info2,
459 std::string(web_frame->document().encoding().utf8()));
461 // Make sure no more additional META tags which have charset declaration.
462 for (WebNode child = meta_node.nextSibling(); !child.isNull();
463 child = child.nextSibling()) {
464 std::string charset_info;
465 if (IsMetaElement(child, charset_info))
466 ASSERT_TRUE(charset_info.empty());
470 void SerializeHTMLDOMWithEntitiesInTextOnRenderer() {
471 base::FilePath page_file_path = GetTestFilePath(
472 "dom_serializer", "dom_serializer/htmlentities_in_text.htm");
473 // Get file URL. The URL is dummy URL to identify the following loading
474 // actions. The test content is in constant:original_contents.
475 GURL file_url = net::FilePathToFileURL(page_file_path);
476 ASSERT_TRUE(file_url.SchemeIsFile());
478 static const char* const original_contents =
479 "<html><body>&<>\"\'</body></html>";
480 // Load the test contents.
481 LoadContents(original_contents, file_url, WebString());
483 // Get BODY's text content in DOM.
484 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url);
485 ASSERT_TRUE(web_frame != NULL);
486 WebDocument doc = web_frame->document();
487 ASSERT_TRUE(doc.isHTMLDocument());
488 WebElement body_ele = doc.body();
489 ASSERT_TRUE(!body_ele.isNull());
490 WebNode text_node = body_ele.firstChild();
491 ASSERT_TRUE(text_node.isTextNode());
492 ASSERT_TRUE(std::string(text_node.createMarkup().utf8()) ==
493 "&<>\"\'");
495 SerializeDomForURL(file_url, false);
496 // Compare the serialized contents with original contents.
497 ASSERT_TRUE(HasSerializedFrame(file_url));
498 const std::string& serialized_contents =
499 GetSerializedContentForFrame(file_url);
500 // Compare the serialized contents with original contents to make sure
502 // Because we add MOTW when serializing DOM, so before comparison, we also
503 // need to add MOTW to original_contents.
504 std::string original_str =
505 WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8();
506 original_str += original_contents;
507 // Since WebCore now inserts a new HEAD element if there is no HEAD element
508 // when creating BODY element. (Please see
509 // HTMLParser::bodyCreateErrorCheck.) We need to append the HEAD content and
510 // corresponding META content if we find WebCore-generated HEAD element.
511 if (!doc.head().isNull()) {
512 WebString encoding = web_frame->document().encoding();
513 std::string htmlTag("<html>");
514 std::string::size_type pos = original_str.find(htmlTag);
515 ASSERT_NE(std::string::npos, pos);
516 pos += htmlTag.length();
517 std::string head_part("<head>");
519 WebPageSerializer::generateMetaCharsetDeclaration(encoding).utf8();
520 head_part += "</head>";
521 original_str.insert(pos, head_part);
523 ASSERT_EQ(original_str, serialized_contents);
526 void SerializeHTMLDOMWithEntitiesInAttributeValueOnRenderer() {
527 base::FilePath page_file_path = GetTestFilePath(
528 "dom_serializer", "dom_serializer/htmlentities_in_attribute_value.htm");
529 // Get file URL. The URL is dummy URL to identify the following loading
530 // actions. The test content is in constant:original_contents.
531 GURL file_url = net::FilePathToFileURL(page_file_path);
532 ASSERT_TRUE(file_url.SchemeIsFile());
534 static const char* const original_contents =
535 "<html><body title=\"&<>"'\"></body></html>";
536 // Load the test contents.
537 LoadContents(original_contents, file_url, WebString());
538 // Get value of BODY's title attribute in DOM.
539 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url);
540 ASSERT_TRUE(web_frame != NULL);
541 WebDocument doc = web_frame->document();
542 ASSERT_TRUE(doc.isHTMLDocument());
543 WebElement body_ele = doc.body();
544 ASSERT_TRUE(!body_ele.isNull());
545 WebString value = body_ele.getAttribute("title");
546 ASSERT_TRUE(std::string(value.utf8()) == "&<>\"\'");
548 SerializeDomForURL(file_url, false);
549 // Compare the serialized contents with original contents.
550 ASSERT_TRUE(HasSerializedFrame(file_url));
551 const std::string& serialized_contents =
552 GetSerializedContentForFrame(file_url);
553 // Compare the serialized contents with original contents to make sure
555 std::string original_str =
556 WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8();
557 original_str += original_contents;
559 WebString encoding = web_frame->document().encoding();
560 std::string htmlTag("<html>");
561 std::string::size_type pos = original_str.find(htmlTag);
562 ASSERT_NE(std::string::npos, pos);
563 pos += htmlTag.length();
564 std::string head_part("<head>");
566 WebPageSerializer::generateMetaCharsetDeclaration(encoding).utf8();
567 head_part += "</head>";
568 original_str.insert(pos, head_part);
570 ASSERT_EQ(original_str, serialized_contents);
573 void SerializeHTMLDOMWithNonStandardEntitiesOnRenderer(const GURL& file_url) {
574 // Get value of BODY's title attribute in DOM.
575 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url);
576 WebDocument doc = web_frame->document();
577 ASSERT_TRUE(doc.isHTMLDocument());
578 WebElement body_element = doc.body();
579 // Unescaped string for "%⊅¹'".
580 static const wchar_t parsed_value[] = {
581 '%', 0x2285, 0x00b9, '\'', 0
583 WebString value = body_element.getAttribute("title");
584 ASSERT_TRUE(base::UTF16ToWide(value) == parsed_value);
585 ASSERT_TRUE(base::UTF16ToWide(body_element.innerText()) == parsed_value);
588 SerializeDomForURL(file_url, false);
589 // Check the serialized string.
590 ASSERT_TRUE(HasSerializedFrame(file_url));
591 const std::string& serialized_contents =
592 GetSerializedContentForFrame(file_url);
593 // Confirm that the serialized string has no non-standard HTML entities.
594 ASSERT_EQ(std::string::npos, serialized_contents.find("%"));
595 ASSERT_EQ(std::string::npos, serialized_contents.find("⊅"));
596 ASSERT_EQ(std::string::npos, serialized_contents.find("¹"));
597 ASSERT_EQ(std::string::npos, serialized_contents.find("'"));
600 void SerializeHTMLDOMWithBaseTagOnRenderer(const GURL& file_url,
601 const GURL& path_dir_url) {
602 // There are total 2 available base tags in this test file.
603 const int kTotalBaseTagCountInTestFile = 2;
605 // Since for this test, we assume there is no savable sub-resource links for
606 // this test file, also all links are relative URLs in this test file, so we
607 // need to check those relative URLs and make sure document has BASE tag.
608 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url);
609 ASSERT_TRUE(web_frame != NULL);
610 WebDocument doc = web_frame->document();
611 ASSERT_TRUE(doc.isHTMLDocument());
612 // Go through all descent nodes.
613 WebElementCollection all = doc.all();
614 int original_base_tag_count = 0;
615 for (WebElement element = all.firstItem(); !element.isNull();
616 element = all.nextItem()) {
617 if (element.hasTagName("base")) {
618 original_base_tag_count++;
621 WebString value = GetSubResourceLinkFromElement(element);
622 if (value.isNull() && element.hasTagName("a")) {
623 value = element.getAttribute("href");
627 // Each link is relative link.
628 if (!value.isNull()) {
629 GURL link(value.utf8());
630 ASSERT_TRUE(link.scheme().empty());
634 ASSERT_EQ(original_base_tag_count, kTotalBaseTagCountInTestFile);
635 // Make sure in original document, the base URL is not equal with the
637 GURL original_base_url(doc.baseURL());
638 ASSERT_NE(original_base_url, path_dir_url);
641 SerializeDomForURL(file_url, false);
643 // Load the serialized contents.
644 ASSERT_TRUE(HasSerializedFrame(file_url));
645 const std::string& serialized_contents =
646 GetSerializedContentForFrame(file_url);
647 LoadContents(serialized_contents, file_url,
648 web_frame->document().encoding());
650 // Make sure all links are absolute URLs and doc there are some number of
651 // BASE tags in serialized HTML data. Each of those BASE tags have same base
652 // URL which is as same as URL of current test file.
653 web_frame = GetMainFrame();
654 ASSERT_TRUE(web_frame != NULL);
655 doc = web_frame->document();
656 ASSERT_TRUE(doc.isHTMLDocument());
657 // Go through all descent nodes.
659 int new_base_tag_count = 0;
660 for (WebNode node = all.firstItem(); !node.isNull();
661 node = all.nextItem()) {
662 if (!node.isElementNode())
664 WebElement element = node.to<WebElement>();
665 if (element.hasTagName("base")) {
666 new_base_tag_count++;
669 WebString value = GetSubResourceLinkFromElement(element);
670 if (value.isNull() && element.hasTagName("a")) {
671 value = element.getAttribute("href");
675 // Each link is absolute link.
676 if (!value.isNull()) {
677 GURL link(std::string(value.utf8()));
678 ASSERT_FALSE(link.scheme().empty());
682 // We have one more added BASE tag which is generated by JavaScript.
683 ASSERT_EQ(new_base_tag_count, original_base_tag_count + 1);
684 // Make sure in new document, the base URL is equal with the |path_dir_url|.
685 GURL new_base_url(doc.baseURL());
686 ASSERT_EQ(new_base_url, path_dir_url);
689 void SerializeHTMLDOMWithEmptyHeadOnRenderer() {
690 base::FilePath page_file_path = GetTestFilePath(
691 "dom_serializer", "empty_head.htm");
692 GURL file_url = net::FilePathToFileURL(page_file_path);
693 ASSERT_TRUE(file_url.SchemeIsFile());
695 // Load the test html content.
696 static const char* const empty_head_contents =
697 "<html><head></head><body>hello world</body></html>";
698 LoadContents(empty_head_contents, file_url, WebString());
700 // Make sure the head tag is empty.
701 WebFrame* web_frame = GetMainFrame();
702 ASSERT_TRUE(web_frame != NULL);
703 WebDocument doc = web_frame->document();
704 ASSERT_TRUE(doc.isHTMLDocument());
705 WebElement head_element = doc.head();
706 ASSERT_TRUE(!head_element.isNull());
707 ASSERT_TRUE(!head_element.hasChildNodes());
708 ASSERT_TRUE(head_element.childNodes().length() == 0);
711 SerializeDomForURL(file_url, false);
712 // Make sure the serialized contents have META ;
713 ASSERT_TRUE(HasSerializedFrame(file_url));
714 const std::string& serialized_contents =
715 GetSerializedContentForFrame(file_url);
717 // Reload serialized contents and make sure there is only one META tag.
718 LoadContents(serialized_contents, file_url,
719 web_frame->document().encoding());
720 web_frame = GetMainFrame();
721 ASSERT_TRUE(web_frame != NULL);
722 doc = web_frame->document();
723 ASSERT_TRUE(doc.isHTMLDocument());
724 head_element = doc.head();
725 ASSERT_TRUE(!head_element.isNull());
726 ASSERT_TRUE(head_element.hasChildNodes());
727 ASSERT_TRUE(head_element.childNodes().length() == 1);
728 WebNode meta_node = head_element.firstChild();
729 ASSERT_TRUE(!meta_node.isNull());
730 // Get meta charset info.
731 std::string charset_info;
732 ASSERT_TRUE(IsMetaElement(meta_node, charset_info));
733 ASSERT_TRUE(!charset_info.empty());
734 ASSERT_EQ(charset_info,
735 std::string(web_frame->document().encoding().utf8()));
737 // Check the body's first node is text node and its contents are
739 WebElement body_element = doc.body();
740 ASSERT_TRUE(!body_element.isNull());
741 WebNode text_node = body_element.firstChild();
742 ASSERT_TRUE(text_node.isTextNode());
743 WebString text_node_contents = text_node.nodeValue();
744 ASSERT_TRUE(std::string(text_node_contents.utf8()) == "hello world");
747 void SerializeDocumentWithDownloadedIFrameOnRenderer(const GURL& file_url) {
748 // Do a recursive serialization. We pass if we don't crash.
749 SerializeDomForURL(file_url, true);
752 void SubResourceForElementsInNonHTMLNamespaceOnRenderer(
753 const GURL& file_url) {
754 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url);
755 ASSERT_TRUE(web_frame != NULL);
756 WebDocument doc = web_frame->document();
757 WebNode lastNodeInBody = doc.body().lastChild();
758 ASSERT_EQ(WebNode::ElementNode, lastNodeInBody.nodeType());
759 WebString uri = GetSubResourceLinkFromElement(
760 lastNodeInBody.to<WebElement>());
761 EXPECT_TRUE(uri.isNull());
765 // Map frame_url to corresponding serialized_content.
766 typedef base::hash_map<std::string, std::string> SerializedFrameContentMap;
767 SerializedFrameContentMap serialized_frame_map_;
768 // Map frame_url to corresponding status of serialization finish.
769 typedef base::hash_map<std::string, bool> SerializationFinishStatusMap;
770 SerializationFinishStatusMap serialization_finish_status_;
771 // Flag indicates whether the process of serializing DOM is finished or not.
773 // The local_directory_name_ is dummy relative path of directory which
774 // contain all saved auxiliary files included all sub frames and resources.
775 const base::FilePath local_directory_name_;
778 // If original contents have document type, the serialized contents also have
780 IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithDocType) {
781 base::FilePath page_file_path =
782 GetTestFilePath("dom_serializer", "youtube_1.htm");
783 GURL file_url = net::FilePathToFileURL(page_file_path);
784 ASSERT_TRUE(file_url.SchemeIsFile());
785 // Load the test file.
786 NavigateToURL(shell(), file_url);
788 PostTaskToInProcessRendererAndWait(
789 base::Bind(&DomSerializerTests::SerializeHTMLDOMWithDocTypeOnRenderer,
790 base::Unretained(this), file_url));
793 // If original contents do not have document type, the serialized contents
794 // also do not have document type.
795 IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithoutDocType) {
796 base::FilePath page_file_path =
797 GetTestFilePath("dom_serializer", "youtube_2.htm");
798 GURL file_url = net::FilePathToFileURL(page_file_path);
799 ASSERT_TRUE(file_url.SchemeIsFile());
800 // Load the test file.
801 NavigateToURL(shell(), file_url);
803 PostTaskToInProcessRendererAndWait(
805 &DomSerializerTests::SerializeHTMLDOMWithoutDocTypeOnRenderer,
806 base::Unretained(this), file_url));
809 // Serialize XML document which has all 5 built-in entities. After
810 // finishing serialization, the serialized contents should be same
811 // with original XML document.
813 // TODO(tiger@opera.com): Disabled in preparation of page serializer merge --
814 // XML headers are handled differently in the merged serializer.
815 // Bug: http://crbug.com/328354
816 IN_PROC_BROWSER_TEST_F(DomSerializerTests,
817 DISABLED_SerializeXMLDocWithBuiltInEntities) {
818 base::FilePath page_file_path =
819 GetTestFilePath("dom_serializer", "note.html");
820 base::FilePath xml_file_path = GetTestFilePath("dom_serializer", "note.xml");
821 // Read original contents for later comparison.
822 std::string original_contents;
823 ASSERT_TRUE(base::ReadFileToString(xml_file_path, &original_contents));
825 GURL file_url = net::FilePathToFileURL(page_file_path);
826 GURL xml_file_url = net::FilePathToFileURL(xml_file_path);
827 ASSERT_TRUE(file_url.SchemeIsFile());
828 // Load the test file.
829 NavigateToURL(shell(), file_url);
831 PostTaskToInProcessRendererAndWait(
833 &DomSerializerTests::SerializeXMLDocWithBuiltInEntitiesOnRenderer,
834 base::Unretained(this), xml_file_url, original_contents));
837 // When serializing DOM, we add MOTW declaration before html tag.
838 IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithAddingMOTW) {
839 base::FilePath page_file_path =
840 GetTestFilePath("dom_serializer", "youtube_2.htm");
841 // Read original contents for later comparison .
842 std::string original_contents;
843 ASSERT_TRUE(base::ReadFileToString(page_file_path, &original_contents));
845 GURL file_url = net::FilePathToFileURL(page_file_path);
846 ASSERT_TRUE(file_url.SchemeIsFile());
848 // Load the test file.
849 NavigateToURL(shell(), file_url);
851 PostTaskToInProcessRendererAndWait(
853 &DomSerializerTests::SerializeHTMLDOMWithAddingMOTWOnRenderer,
854 base::Unretained(this), file_url, original_contents));
857 // When serializing DOM, we will add the META which have correct charset
858 // declaration as first child of HEAD element for resolving WebKit bug:
859 // http://bugs.webkit.org/show_bug.cgi?id=16621 even the original document
860 // does not have META charset declaration.
861 IN_PROC_BROWSER_TEST_F(DomSerializerTests,
862 SerializeHTMLDOMWithNoMetaCharsetInOriginalDoc) {
863 base::FilePath page_file_path =
864 GetTestFilePath("dom_serializer", "youtube_1.htm");
866 GURL file_url = net::FilePathToFileURL(page_file_path);
867 ASSERT_TRUE(file_url.SchemeIsFile());
868 // Load the test file.
869 NavigateToURL(shell(), file_url);
871 PostTaskToInProcessRendererAndWait(
873 &DomSerializerTests::
874 SerializeHTMLDOMWithNoMetaCharsetInOriginalDocOnRenderer,
875 base::Unretained(this), file_url));
878 // When serializing DOM, if the original document has multiple META charset
879 // declaration, we will add the META which have correct charset declaration
880 // as first child of HEAD element and remove all original META charset
882 IN_PROC_BROWSER_TEST_F(DomSerializerTests,
883 SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDoc) {
884 base::FilePath page_file_path =
885 GetTestFilePath("dom_serializer", "youtube_2.htm");
887 GURL file_url = net::FilePathToFileURL(page_file_path);
888 ASSERT_TRUE(file_url.SchemeIsFile());
889 // Load the test file.
890 NavigateToURL(shell(), file_url);
892 PostTaskToInProcessRendererAndWait(
894 &DomSerializerTests::
895 SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDocOnRenderer,
896 base::Unretained(this), file_url));
899 // Test situation of html entities in text when serializing HTML DOM.
900 IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithEntitiesInText) {
901 // Need to spin up the renderer and also navigate to a file url so that the
902 // renderer code doesn't attempt a fork when it sees a load to file scheme
903 // from non-file scheme.
904 NavigateToURL(shell(), GetTestUrl(".", "simple_page.html"));
906 PostTaskToInProcessRendererAndWait(
908 &DomSerializerTests::SerializeHTMLDOMWithEntitiesInTextOnRenderer,
909 base::Unretained(this)));
912 // Test situation of html entities in attribute value when serializing
914 // This test started to fail at WebKit r65388. See http://crbug.com/52279.
916 // TODO(tiger@opera.com): Disabled in preparation of page serializer merge --
917 // Some attributes are handled differently in the merged serializer.
918 // Bug: http://crbug.com/328354
919 IN_PROC_BROWSER_TEST_F(DomSerializerTests,
920 DISABLED_SerializeHTMLDOMWithEntitiesInAttributeValue) {
921 // Need to spin up the renderer and also navigate to a file url so that the
922 // renderer code doesn't attempt a fork when it sees a load to file scheme
923 // from non-file scheme.
924 NavigateToURL(shell(), GetTestUrl(".", "simple_page.html"));
926 PostTaskToInProcessRendererAndWait(
928 &DomSerializerTests::
929 SerializeHTMLDOMWithEntitiesInAttributeValueOnRenderer,
930 base::Unretained(this)));
933 // Test situation of non-standard HTML entities when serializing HTML DOM.
934 // This test started to fail at WebKit r65351. See http://crbug.com/52279.
935 IN_PROC_BROWSER_TEST_F(DomSerializerTests,
936 SerializeHTMLDOMWithNonStandardEntities) {
937 // Make a test file URL and load it.
938 base::FilePath page_file_path = GetTestFilePath(
939 "dom_serializer", "nonstandard_htmlentities.htm");
940 GURL file_url = net::FilePathToFileURL(page_file_path);
941 NavigateToURL(shell(), file_url);
943 PostTaskToInProcessRendererAndWait(
945 &DomSerializerTests::
946 SerializeHTMLDOMWithNonStandardEntitiesOnRenderer,
947 base::Unretained(this), file_url));
950 // Test situation of BASE tag in original document when serializing HTML DOM.
951 // When serializing, we should comment the BASE tag, append a new BASE tag.
952 // rewrite all the savable URLs to relative local path, and change other URLs
955 // TODO(tiger@opera.com): Disabled in preparation of page serializer merge --
956 // Base tags are handled a bit different in merged version.
957 // Bug: http://crbug.com/328354
958 IN_PROC_BROWSER_TEST_F(DomSerializerTests,
959 DISABLED_SerializeHTMLDOMWithBaseTag) {
960 base::FilePath page_file_path = GetTestFilePath(
961 "dom_serializer", "html_doc_has_base_tag.htm");
963 // Get page dir URL which is base URL of this file.
964 base::FilePath dir_name = page_file_path.DirName();
965 dir_name = dir_name.Append(
966 base::FilePath::StringType(base::FilePath::kSeparators[0], 1));
967 GURL path_dir_url = net::FilePathToFileURL(dir_name);
970 GURL file_url = net::FilePathToFileURL(page_file_path);
971 ASSERT_TRUE(file_url.SchemeIsFile());
972 // Load the test file.
973 NavigateToURL(shell(), file_url);
975 PostTaskToInProcessRendererAndWait(
977 &DomSerializerTests::SerializeHTMLDOMWithBaseTagOnRenderer,
978 base::Unretained(this), file_url, path_dir_url));
981 // Serializing page which has an empty HEAD tag.
982 IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithEmptyHead) {
983 // Need to spin up the renderer and also navigate to a file url so that the
984 // renderer code doesn't attempt a fork when it sees a load to file scheme
985 // from non-file scheme.
986 NavigateToURL(shell(), GetTestUrl(".", "simple_page.html"));
988 PostTaskToInProcessRendererAndWait(
989 base::Bind(&DomSerializerTests::SerializeHTMLDOMWithEmptyHeadOnRenderer,
990 base::Unretained(this)));
993 // Test that we don't crash when the page contains an iframe that
994 // was handled as a download (http://crbug.com/42212).
995 IN_PROC_BROWSER_TEST_F(DomSerializerTests,
996 SerializeDocumentWithDownloadedIFrame) {
997 base::FilePath page_file_path = GetTestFilePath(
998 "dom_serializer", "iframe-src-is-exe.htm");
999 GURL file_url = net::FilePathToFileURL(page_file_path);
1000 ASSERT_TRUE(file_url.SchemeIsFile());
1001 // Load the test file.
1002 NavigateToURL(shell(), file_url);
1004 PostTaskToInProcessRendererAndWait(
1006 &DomSerializerTests::
1007 SerializeDocumentWithDownloadedIFrameOnRenderer,
1008 base::Unretained(this), file_url));
1011 IN_PROC_BROWSER_TEST_F(DomSerializerTests,
1012 SubResourceForElementsInNonHTMLNamespace) {
1013 base::FilePath page_file_path = GetTestFilePath(
1014 "dom_serializer", "non_html_namespace.htm");
1015 GURL file_url = net::FilePathToFileURL(page_file_path);
1016 NavigateToURL(shell(), file_url);
1018 PostTaskToInProcessRendererAndWait(
1020 &DomSerializerTests::
1021 SubResourceForElementsInNonHTMLNamespaceOnRenderer,
1022 base::Unretained(this), file_url));
1025 } // namespace content