Upstream version 10.39.225.0
[platform/framework/web/crosswalk.git] / src / components / dom_distiller / content / distiller_page_web_contents_browsertest.cc
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/memory/weak_ptr.h"
6 #include "base/path_service.h"
7 #include "base/run_loop.h"
8 #include "base/values.h"
9 #include "components/dom_distiller/content/distiller_page_web_contents.h"
10 #include "components/dom_distiller/content/web_contents_main_frame_observer.h"
11 #include "components/dom_distiller/core/distiller_page.h"
12 #include "components/dom_distiller/core/proto/distilled_article.pb.h"
13 #include "components/dom_distiller/core/proto/distilled_page.pb.h"
14 #include "components/dom_distiller/core/viewer.h"
15 #include "content/public/browser/browser_context.h"
16 #include "content/public/browser/navigation_controller.h"
17 #include "content/public/browser/render_frame_host.h"
18 #include "content/public/browser/web_contents_observer.h"
19 #include "content/public/test/content_browser_test.h"
20 #include "content/shell/browser/shell.h"
21 #include "grit/components_strings.h"
22 #include "net/test/embedded_test_server/embedded_test_server.h"
23 #include "testing/gmock/include/gmock/gmock.h"
24 #include "third_party/dom_distiller_js/dom_distiller.pb.h"
25 #include "ui/base/l10n/l10n_util.h"
26 #include "ui/base/resource/resource_bundle.h"
27
28 using content::ContentBrowserTest;
29 using testing::ContainsRegex;
30 using testing::HasSubstr;
31 using testing::Not;
32
33 namespace dom_distiller {
34
35 const char* kSimpleArticlePath = "/simple_article.html";
36 const char* kVideoArticlePath = "/video_article.html";
37
38 class DistillerPageWebContentsTest : public ContentBrowserTest {
39  public:
40   // ContentBrowserTest:
41   virtual void SetUpOnMainThread() OVERRIDE {
42     AddComponentsResources();
43     SetUpTestServer();
44     ContentBrowserTest::SetUpOnMainThread();
45   }
46
47   void DistillPage(const base::Closure& quit_closure, const std::string& url) {
48     quit_closure_ = quit_closure;
49     distiller_page_->DistillPage(
50         embedded_test_server()->GetURL(url),
51         dom_distiller::proto::DomDistillerOptions(),
52         base::Bind(&DistillerPageWebContentsTest::OnPageDistillationFinished,
53                    this));
54   }
55
56   void OnPageDistillationFinished(
57       scoped_ptr<proto::DomDistillerResult> distiller_result,
58       bool distillation_successful) {
59     distiller_result_ = distiller_result.Pass();
60     quit_closure_.Run();
61   }
62
63  private:
64   void AddComponentsResources() {
65     base::FilePath pak_file;
66     base::FilePath pak_dir;
67     PathService::Get(base::DIR_MODULE, &pak_dir);
68     pak_file = pak_dir.Append(FILE_PATH_LITERAL("components_resources.pak"));
69     ui::ResourceBundle::GetSharedInstance().AddDataPackFromPath(
70         pak_file, ui::SCALE_FACTOR_NONE);
71   }
72
73   void SetUpTestServer() {
74     base::FilePath path;
75     PathService::Get(base::DIR_SOURCE_ROOT, &path);
76     path = path.AppendASCII("components/test/data/dom_distiller");
77     embedded_test_server()->ServeFilesFromDirectory(path);
78     ASSERT_TRUE(embedded_test_server()->InitializeAndWaitUntilReady());
79   }
80
81  protected:
82   void RunUseCurrentWebContentsTest(const std::string& url,
83                                     bool expect_new_web_contents,
84                                     bool setup_main_frame_observer,
85                                     bool wait_for_document_loaded);
86
87   DistillerPageWebContents* distiller_page_;
88   base::Closure quit_closure_;
89   scoped_ptr<proto::DomDistillerResult> distiller_result_;
90 };
91
92 // Use this class to be able to leak the WebContents, which is needed for when
93 // the current WebContents is used for distillation.
94 class TestDistillerPageWebContents : public DistillerPageWebContents {
95  public:
96   TestDistillerPageWebContents(
97       content::BrowserContext* browser_context,
98       const gfx::Size& render_view_size,
99       scoped_ptr<SourcePageHandleWebContents> optional_web_contents_handle,
100       bool expect_new_web_contents)
101       : DistillerPageWebContents(browser_context, render_view_size,
102                                  optional_web_contents_handle.Pass()),
103         expect_new_web_contents_(expect_new_web_contents),
104         new_web_contents_created_(false) {}
105
106   virtual void CreateNewWebContents(const GURL& url) OVERRIDE {
107     ASSERT_EQ(true, expect_new_web_contents_);
108     new_web_contents_created_ = true;
109     // DistillerPageWebContents::CreateNewWebContents resets the scoped_ptr to
110     // the WebContents, so intentionally leak WebContents here, since it is
111     // owned by the shell.
112     content::WebContents* web_contents = web_contents_.release();
113     web_contents->GetLastCommittedURL();
114     DistillerPageWebContents::CreateNewWebContents(url);
115   }
116
117   virtual ~TestDistillerPageWebContents() {
118     if (!expect_new_web_contents_) {
119       // Intentionally leaking WebContents, since it is owned by the shell.
120       content::WebContents* web_contents = web_contents_.release();
121       web_contents->GetLastCommittedURL();
122     }
123   }
124
125   bool new_web_contents_created() { return new_web_contents_created_; }
126
127  private:
128   bool expect_new_web_contents_;
129   bool new_web_contents_created_;
130 };
131
132 // Helper class to know how far in the loading process the current WebContents
133 // has come. It will call the callback either after
134 // DidCommitProvisionalLoadForFrame or DocumentLoadedInFrame is called for the
135 // main frame, based on the value of |wait_for_document_loaded|.
136 class WebContentsMainFrameHelper : public content::WebContentsObserver {
137  public:
138   WebContentsMainFrameHelper(content::WebContents* web_contents,
139                              const base::Closure& callback,
140                              bool wait_for_document_loaded)
141       : WebContentsObserver(web_contents),
142         callback_(callback),
143         wait_for_document_loaded_(wait_for_document_loaded) {}
144
145   virtual void DidCommitProvisionalLoadForFrame(
146       content::RenderFrameHost* render_frame_host,
147       const GURL& url,
148       ui::PageTransition transition_type) OVERRIDE {
149     if (wait_for_document_loaded_)
150       return;
151     if (!render_frame_host->GetParent())
152       callback_.Run();
153   }
154
155   virtual void DocumentLoadedInFrame(
156       content::RenderFrameHost* render_frame_host) OVERRIDE {
157     if (wait_for_document_loaded_) {
158       if (!render_frame_host->GetParent())
159         callback_.Run();
160     }
161   }
162
163  private:
164   base::Closure callback_;
165   bool wait_for_document_loaded_;
166 };
167
168 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, BasicDistillationWorks) {
169   DistillerPageWebContents distiller_page(
170       shell()->web_contents()->GetBrowserContext(),
171       shell()->web_contents()->GetContainerBounds().size(),
172       scoped_ptr<SourcePageHandleWebContents>());
173   distiller_page_ = &distiller_page;
174
175   base::RunLoop run_loop;
176   DistillPage(run_loop.QuitClosure(), kSimpleArticlePath);
177   run_loop.Run();
178
179   EXPECT_EQ("Test Page Title", distiller_result_->title());
180   EXPECT_THAT(distiller_result_->distilled_content().html(),
181               HasSubstr("Lorem ipsum"));
182   EXPECT_THAT(distiller_result_->distilled_content().html(),
183               Not(HasSubstr("questionable content")));
184   EXPECT_EQ("", distiller_result_->pagination_info().next_page());
185   EXPECT_EQ("", distiller_result_->pagination_info().prev_page());
186 }
187
188 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, HandlesRelativeLinks) {
189   DistillerPageWebContents distiller_page(
190       shell()->web_contents()->GetBrowserContext(),
191       shell()->web_contents()->GetContainerBounds().size(),
192       scoped_ptr<SourcePageHandleWebContents>());
193   distiller_page_ = &distiller_page;
194
195   base::RunLoop run_loop;
196   DistillPage(run_loop.QuitClosure(), kSimpleArticlePath);
197   run_loop.Run();
198
199   // A relative link should've been updated.
200   EXPECT_THAT(distiller_result_->distilled_content().html(),
201               ContainsRegex("href=\"http://127.0.0.1:.*/relativelink.html\""));
202   EXPECT_THAT(distiller_result_->distilled_content().html(),
203               HasSubstr("href=\"http://www.google.com/absolutelink.html\""));
204 }
205
206 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, HandlesRelativeImages) {
207   DistillerPageWebContents distiller_page(
208       shell()->web_contents()->GetBrowserContext(),
209       shell()->web_contents()->GetContainerBounds().size(),
210       scoped_ptr<SourcePageHandleWebContents>());
211   distiller_page_ = &distiller_page;
212
213   base::RunLoop run_loop;
214   DistillPage(run_loop.QuitClosure(), kSimpleArticlePath);
215   run_loop.Run();
216
217   // A relative link should've been updated.
218   EXPECT_THAT(distiller_result_->distilled_content().html(),
219               ContainsRegex("src=\"http://127.0.0.1:.*/relativeimage.png\""));
220   EXPECT_THAT(distiller_result_->distilled_content().html(),
221               HasSubstr("src=\"http://www.google.com/absoluteimage.png\""));
222 }
223
224
225 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, HandlesRelativeVideos) {
226   DistillerPageWebContents distiller_page(
227       shell()->web_contents()->GetBrowserContext(),
228       shell()->web_contents()->GetContainerBounds().size(),
229       scoped_ptr<SourcePageHandleWebContents>());
230   distiller_page_ = &distiller_page;
231
232   base::RunLoop run_loop;
233   DistillPage(run_loop.QuitClosure(), kVideoArticlePath);
234   run_loop.Run();
235
236   // A relative source/track should've been updated.
237   EXPECT_THAT(distiller_result_->distilled_content().html(),
238               ContainsRegex("src=\"http://127.0.0.1:.*/relative_video.mp4\""));
239   EXPECT_THAT(
240       distiller_result_->distilled_content().html(),
241       ContainsRegex("src=\"http://127.0.0.1:.*/relative_track_en.vtt\""));
242   EXPECT_THAT(distiller_result_->distilled_content().html(),
243               HasSubstr("src=\"http://www.google.com/absolute_video.ogg\""));
244   EXPECT_THAT(distiller_result_->distilled_content().html(),
245               HasSubstr("src=\"http://www.google.com/absolute_track_fr.vtt\""));
246 }
247
248 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, VisibilityDetection) {
249   DistillerPageWebContents distiller_page(
250       shell()->web_contents()->GetBrowserContext(),
251       shell()->web_contents()->GetContainerBounds().size(),
252       scoped_ptr<SourcePageHandleWebContents>());
253   distiller_page_ = &distiller_page;
254
255   // visble_style.html and invisible_style.html only differ by the visibility
256   // internal stylesheet.
257
258   {
259     base::RunLoop run_loop;
260     DistillPage(run_loop.QuitClosure(), "/visible_style.html");
261     run_loop.Run();
262     EXPECT_THAT(distiller_result_->distilled_content().html(),
263                 HasSubstr("Lorem ipsum"));
264   }
265
266   {
267     base::RunLoop run_loop;
268     DistillPage(run_loop.QuitClosure(), "/invisible_style.html");
269     run_loop.Run();
270     EXPECT_THAT(distiller_result_->distilled_content().html(),
271                 Not(HasSubstr("Lorem ipsum")));
272   }
273 }
274
275 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest,
276                        UsingCurrentWebContentsWrongUrl) {
277   std::string url("/bogus");
278   bool expect_new_web_contents = true;
279   bool setup_main_frame_observer = true;
280   bool wait_for_document_loaded = true;
281   RunUseCurrentWebContentsTest(url,
282                                expect_new_web_contents,
283                                setup_main_frame_observer,
284                                wait_for_document_loaded);
285 }
286
287 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest,
288                        UsingCurrentWebContentsNoMainFrameObserver) {
289   std::string url(kSimpleArticlePath);
290   bool expect_new_web_contents = true;
291   bool setup_main_frame_observer = false;
292   bool wait_for_document_loaded = true;
293   RunUseCurrentWebContentsTest(url,
294                                expect_new_web_contents,
295                                setup_main_frame_observer,
296                                wait_for_document_loaded);
297 }
298
299 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest,
300                        UsingCurrentWebContentsNotFinishedLoadingYet) {
301   std::string url(kSimpleArticlePath);
302   bool expect_new_web_contents = false;
303   bool setup_main_frame_observer = true;
304   bool wait_for_document_loaded = false;
305   RunUseCurrentWebContentsTest(url,
306                                expect_new_web_contents,
307                                setup_main_frame_observer,
308                                wait_for_document_loaded);
309 }
310
311 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest,
312                        UsingCurrentWebContentsReadyForDistillation) {
313   std::string url(kSimpleArticlePath);
314   bool expect_new_web_contents = false;
315   bool setup_main_frame_observer = true;
316   bool wait_for_document_loaded = true;
317   RunUseCurrentWebContentsTest(url,
318                                expect_new_web_contents,
319                                setup_main_frame_observer,
320                                wait_for_document_loaded);
321 }
322
323 void DistillerPageWebContentsTest::RunUseCurrentWebContentsTest(
324     const std::string& url,
325     bool expect_new_web_contents,
326     bool setup_main_frame_observer,
327     bool wait_for_document_loaded) {
328   content::WebContents* current_web_contents = shell()->web_contents();
329   if (setup_main_frame_observer) {
330     dom_distiller::WebContentsMainFrameObserver::CreateForWebContents(
331         current_web_contents);
332   }
333   base::RunLoop url_loaded_runner;
334   WebContentsMainFrameHelper main_frame_loaded(current_web_contents,
335                                                url_loaded_runner.QuitClosure(),
336                                                wait_for_document_loaded);
337   current_web_contents->GetController().LoadURL(
338       embedded_test_server()->GetURL(url),
339       content::Referrer(),
340       ui::PAGE_TRANSITION_TYPED,
341       std::string());
342   url_loaded_runner.Run();
343
344   scoped_ptr<content::WebContents> old_web_contents_sptr(current_web_contents);
345   scoped_ptr<SourcePageHandleWebContents> source_page_handle(
346       new SourcePageHandleWebContents(old_web_contents_sptr.Pass()));
347
348   TestDistillerPageWebContents distiller_page(
349       shell()->web_contents()->GetBrowserContext(),
350       shell()->web_contents()->GetContainerBounds().size(),
351       source_page_handle.Pass(),
352       expect_new_web_contents);
353   distiller_page_ = &distiller_page;
354
355   base::RunLoop run_loop;
356   DistillPage(run_loop.QuitClosure(), kSimpleArticlePath);
357   run_loop.Run();
358
359   // Sanity check of distillation process.
360   EXPECT_EQ(expect_new_web_contents, distiller_page.new_web_contents_created());
361   EXPECT_EQ("Test Page Title", distiller_result_->title());
362 }
363
364 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, MarkupInfo) {
365   DistillerPageWebContents distiller_page(
366       shell()->web_contents()->GetBrowserContext(),
367       shell()->web_contents()->GetContainerBounds().size(),
368       scoped_ptr<SourcePageHandleWebContents>());
369   distiller_page_ = &distiller_page;
370
371   base::RunLoop run_loop;
372   DistillPage(run_loop.QuitClosure(), "/markup_article.html");
373   run_loop.Run();
374
375   EXPECT_THAT(distiller_result_->distilled_content().html(),
376               HasSubstr("Lorem ipsum"));
377   EXPECT_EQ("Marked-up Markup Test Page Title", distiller_result_->title());
378
379   const proto::MarkupInfo markup_info = distiller_result_->markup_info();
380   EXPECT_EQ("Marked-up Markup Test Page Title", markup_info.title());
381   EXPECT_EQ("Article", markup_info.type());
382   EXPECT_EQ("http://test/markup.html", markup_info.url());
383   EXPECT_EQ("This page tests Markup Info.", markup_info.description());
384   EXPECT_EQ("Whoever Published", markup_info.publisher());
385   EXPECT_EQ("Copyright 2000-2014 Whoever Copyrighted", markup_info.copyright());
386   EXPECT_EQ("Whoever Authored", markup_info.author());
387
388   const proto::MarkupArticle markup_article = markup_info.article();
389   EXPECT_EQ("Whatever Section", markup_article.section());
390   EXPECT_EQ("July 23, 2014", markup_article.published_time());
391   EXPECT_EQ("2014-07-23T23:59", markup_article.modified_time());
392   EXPECT_EQ("", markup_article.expiration_time());
393   ASSERT_EQ(1, markup_article.authors_size());
394   EXPECT_EQ("Whoever Authored", markup_article.authors(0));
395
396   ASSERT_EQ(2, markup_info.images_size());
397
398   const proto::MarkupImage markup_image1 = markup_info.images(0);
399   EXPECT_EQ("http://test/markup1.jpeg", markup_image1.url());
400   EXPECT_EQ("https://test/markup1.jpeg", markup_image1.secure_url());
401   EXPECT_EQ("jpeg", markup_image1.type());
402   EXPECT_EQ("", markup_image1.caption());
403   EXPECT_EQ(600, markup_image1.width());
404   EXPECT_EQ(400, markup_image1.height());
405
406   const proto::MarkupImage markup_image2 = markup_info.images(1);
407   EXPECT_EQ("http://test/markup2.gif", markup_image2.url());
408   EXPECT_EQ("https://test/markup2.gif", markup_image2.secure_url());
409   EXPECT_EQ("gif", markup_image2.type());
410   EXPECT_EQ("", markup_image2.caption());
411   EXPECT_EQ(1000, markup_image2.width());
412   EXPECT_EQ(600, markup_image2.height());
413 }
414
415 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest,
416                        TestTitleAndContentAreNeverEmpty) {
417   const std::string some_title = "some title";
418   const std::string some_content = "some content";
419   const std::string no_title =
420       l10n_util::GetStringUTF8(IDS_DOM_DISTILLER_VIEWER_NO_DATA_TITLE);
421   const std::string no_content =
422       l10n_util::GetStringUTF8(IDS_DOM_DISTILLER_VIEWER_NO_DATA_CONTENT);
423
424   {  // Test non-empty title and content for article.
425     scoped_ptr<DistilledArticleProto> article_proto(
426         new DistilledArticleProto());
427     article_proto->set_title(some_title);
428     (*(article_proto->add_pages())).set_html(some_content);
429     std::string html = viewer::GetUnsafeArticleHtml(article_proto.get(),
430         DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF);
431     EXPECT_THAT(html, HasSubstr(some_title));
432     EXPECT_THAT(html, HasSubstr(some_content));
433     EXPECT_THAT(html, Not(HasSubstr(no_title)));
434     EXPECT_THAT(html, Not(HasSubstr(no_content)));
435   }
436
437   {  // Test empty title and content for article.
438     scoped_ptr<DistilledArticleProto> article_proto(
439         new DistilledArticleProto());
440     article_proto->set_title("");
441     (*(article_proto->add_pages())).set_html("");
442     std::string html = viewer::GetUnsafeArticleHtml(article_proto.get(),
443         DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF);
444     EXPECT_THAT(html, HasSubstr(no_title));
445     EXPECT_THAT(html, HasSubstr(no_content));
446     EXPECT_THAT(html, Not(HasSubstr(some_title)));
447     EXPECT_THAT(html, Not(HasSubstr(some_content)));
448   }
449
450   {  // Test missing title and non-empty content for article.
451     scoped_ptr<DistilledArticleProto> article_proto(
452         new DistilledArticleProto());
453     (*(article_proto->add_pages())).set_html(some_content);
454     std::string html = viewer::GetUnsafeArticleHtml(article_proto.get(),
455         DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF);
456     EXPECT_THAT(html, HasSubstr(no_title));
457     EXPECT_THAT(html, HasSubstr(no_content));
458     EXPECT_THAT(html, Not(HasSubstr(some_title)));
459     EXPECT_THAT(html, Not(HasSubstr(some_content)));
460   }
461
462   {  // Test non-empty title and missing content for article.
463     scoped_ptr<DistilledArticleProto> article_proto(
464         new DistilledArticleProto());
465     article_proto->set_title(some_title);
466     std::string html = viewer::GetUnsafeArticleHtml(article_proto.get(),
467         DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF);
468     EXPECT_THAT(html, HasSubstr(no_title));
469     EXPECT_THAT(html, HasSubstr(no_content));
470     EXPECT_THAT(html, Not(HasSubstr(some_title)));
471     EXPECT_THAT(html, Not(HasSubstr(some_content)));
472   }
473
474   {  // Test non-empty title and content for page.
475     scoped_ptr<DistilledPageProto> page_proto(new DistilledPageProto());
476     page_proto->set_title(some_title);
477     page_proto->set_html(some_content);
478     std::string html = viewer::GetUnsafePartialArticleHtml(page_proto.get(),
479         DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF);
480     EXPECT_THAT(html, HasSubstr(some_title));
481     EXPECT_THAT(html, HasSubstr(some_content));
482     EXPECT_THAT(html, Not(HasSubstr(no_title)));
483     EXPECT_THAT(html, Not(HasSubstr(no_content)));
484   }
485
486   {  // Test empty title and content for page.
487     scoped_ptr<DistilledPageProto> page_proto(new DistilledPageProto());
488     page_proto->set_title("");
489     page_proto->set_html("");
490     std::string html = viewer::GetUnsafePartialArticleHtml(page_proto.get(),
491         DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF);
492     EXPECT_THAT(html, HasSubstr(no_title));
493     EXPECT_THAT(html, HasSubstr(no_content));
494     EXPECT_THAT(html, Not(HasSubstr(some_title)));
495     EXPECT_THAT(html, Not(HasSubstr(some_content)));
496   }
497
498   {  // Test missing title and non-empty content for page.
499     scoped_ptr<DistilledPageProto> page_proto(new DistilledPageProto());
500     page_proto->set_html(some_content);
501     std::string html = viewer::GetUnsafePartialArticleHtml(page_proto.get(),
502         DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF);
503     EXPECT_THAT(html, HasSubstr(no_title));
504     EXPECT_THAT(html, HasSubstr(some_content));
505     EXPECT_THAT(html, Not(HasSubstr(some_title)));
506     EXPECT_THAT(html, Not(HasSubstr(no_content)));
507   }
508
509   {  // Test non-empty title and missing content for page.
510     scoped_ptr<DistilledPageProto> page_proto(new DistilledPageProto());
511     page_proto->set_title(some_title);
512     std::string html = viewer::GetUnsafePartialArticleHtml(page_proto.get(),
513         DistilledPagePrefs::LIGHT, DistilledPagePrefs::SERIF);
514     EXPECT_THAT(html, HasSubstr(some_title));
515     EXPECT_THAT(html, HasSubstr(no_content));
516     EXPECT_THAT(html, Not(HasSubstr(no_title)));
517     EXPECT_THAT(html, Not(HasSubstr(some_content)));
518   }
519 }
520
521 }  // namespace dom_distiller