af3b96d1df6b9619f645f6c3f179874717a2289f
[platform/framework/web/crosswalk.git] / src / components / dom_distiller / content / distiller_page_web_contents_browsertest.cc
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/memory/weak_ptr.h"
6 #include "base/path_service.h"
7 #include "base/run_loop.h"
8 #include "base/values.h"
9 #include "components/dom_distiller/content/distiller_page_web_contents.h"
10 #include "components/dom_distiller/content/web_contents_main_frame_observer.h"
11 #include "components/dom_distiller/core/distiller_page.h"
12 #include "content/public/browser/browser_context.h"
13 #include "content/public/browser/navigation_controller.h"
14 #include "content/public/browser/render_frame_host.h"
15 #include "content/public/browser/web_contents_observer.h"
16 #include "content/public/test/content_browser_test.h"
17 #include "content/shell/browser/shell.h"
18 #include "grit/component_resources.h"
19 #include "net/test/embedded_test_server/embedded_test_server.h"
20 #include "testing/gmock/include/gmock/gmock.h"
21 #include "ui/base/resource/resource_bundle.h"
22
23 using content::ContentBrowserTest;
24 using testing::ContainsRegex;
25 using testing::HasSubstr;
26 using testing::Not;
27
28 namespace dom_distiller {
29
30 const char* kSimpleArticlePath = "/simple_article.html";
31 const char* kVideoArticlePath = "/video_article.html";
32
33 class DistillerPageWebContentsTest : public ContentBrowserTest {
34  public:
35   // ContentBrowserTest:
36   virtual void SetUpOnMainThread() OVERRIDE {
37     AddComponentsResources();
38     SetUpTestServer();
39     ContentBrowserTest::SetUpOnMainThread();
40   }
41
42   void DistillPage(const base::Closure& quit_closure, const std::string& url) {
43     quit_closure_ = quit_closure;
44     distiller_page_->DistillPage(
45         embedded_test_server()->GetURL(url),
46         dom_distiller::proto::DomDistillerOptions(),
47         base::Bind(&DistillerPageWebContentsTest::OnPageDistillationFinished,
48                    this));
49   }
50
51   void OnPageDistillationFinished(scoped_ptr<DistilledPageInfo> distilled_page,
52                                   bool distillation_successful) {
53     page_info_ = distilled_page.Pass();
54     quit_closure_.Run();
55   }
56
57  private:
58   void AddComponentsResources() {
59     base::FilePath pak_file;
60     base::FilePath pak_dir;
61     PathService::Get(base::DIR_MODULE, &pak_dir);
62     pak_file = pak_dir.Append(FILE_PATH_LITERAL("components_resources.pak"));
63     ui::ResourceBundle::GetSharedInstance().AddDataPackFromPath(
64         pak_file, ui::SCALE_FACTOR_NONE);
65   }
66
67   void SetUpTestServer() {
68     base::FilePath path;
69     PathService::Get(base::DIR_SOURCE_ROOT, &path);
70     path = path.AppendASCII("components/test/data/dom_distiller");
71     embedded_test_server()->ServeFilesFromDirectory(path);
72     ASSERT_TRUE(embedded_test_server()->InitializeAndWaitUntilReady());
73   }
74
75  protected:
76   void RunUseCurrentWebContentsTest(const std::string& url,
77                                     bool expect_new_web_contents,
78                                     bool setup_main_frame_observer,
79                                     bool wait_for_document_loaded);
80
81   DistillerPageWebContents* distiller_page_;
82   base::Closure quit_closure_;
83   scoped_ptr<DistilledPageInfo> page_info_;
84 };
85
86 // Use this class to be able to leak the WebContents, which is needed for when
87 // the current WebContents is used for distillation.
88 class TestDistillerPageWebContents : public DistillerPageWebContents {
89  public:
90   TestDistillerPageWebContents(
91       content::BrowserContext* browser_context,
92       const gfx::Size& render_view_size,
93       scoped_ptr<SourcePageHandleWebContents> optional_web_contents_handle,
94       bool expect_new_web_contents)
95       : DistillerPageWebContents(browser_context, render_view_size,
96                                  optional_web_contents_handle.Pass()),
97         expect_new_web_contents_(expect_new_web_contents),
98         new_web_contents_created_(false) {}
99
100   virtual void CreateNewWebContents(const GURL& url) OVERRIDE {
101     ASSERT_EQ(true, expect_new_web_contents_);
102     new_web_contents_created_ = true;
103     // DistillerPageWebContents::CreateNewWebContents resets the scoped_ptr to
104     // the WebContents, so intentionally leak WebContents here, since it is
105     // owned by the shell.
106     content::WebContents* web_contents = web_contents_.release();
107     web_contents->GetLastCommittedURL();
108     DistillerPageWebContents::CreateNewWebContents(url);
109   }
110
111   virtual ~TestDistillerPageWebContents() {
112     if (!expect_new_web_contents_) {
113       // Intentionally leaking WebContents, since it is owned by the shell.
114       content::WebContents* web_contents = web_contents_.release();
115       web_contents->GetLastCommittedURL();
116     }
117   }
118
119   bool new_web_contents_created() { return new_web_contents_created_; }
120
121  private:
122   bool expect_new_web_contents_;
123   bool new_web_contents_created_;
124 };
125
126 // Helper class to know how far in the loading process the current WebContents
127 // has come. It will call the callback either after
128 // DidCommitProvisionalLoadForFrame or DocumentLoadedInFrame is called for the
129 // main frame, based on the value of |wait_for_document_loaded|.
130 class WebContentsMainFrameHelper : public content::WebContentsObserver {
131  public:
132   WebContentsMainFrameHelper(content::WebContents* web_contents,
133                              const base::Closure& callback,
134                              bool wait_for_document_loaded)
135       : WebContentsObserver(web_contents),
136         callback_(callback),
137         wait_for_document_loaded_(wait_for_document_loaded) {}
138
139   virtual void DidCommitProvisionalLoadForFrame(
140       content::RenderFrameHost* render_frame_host,
141       const GURL& url,
142       content::PageTransition transition_type) OVERRIDE {
143     if (wait_for_document_loaded_)
144       return;
145     if (!render_frame_host->GetParent())
146       callback_.Run();
147   }
148
149   virtual void DocumentLoadedInFrame(
150       content::RenderFrameHost* render_frame_host) OVERRIDE {
151     if (wait_for_document_loaded_) {
152       if (!render_frame_host->GetParent())
153         callback_.Run();
154     }
155   }
156
157  private:
158   base::Closure callback_;
159   bool wait_for_document_loaded_;
160 };
161
162 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, BasicDistillationWorks) {
163   DistillerPageWebContents distiller_page(
164       shell()->web_contents()->GetBrowserContext(),
165       shell()->web_contents()->GetContainerBounds().size(),
166       scoped_ptr<SourcePageHandleWebContents>());
167   distiller_page_ = &distiller_page;
168
169   base::RunLoop run_loop;
170   DistillPage(run_loop.QuitClosure(), kSimpleArticlePath);
171   run_loop.Run();
172
173   EXPECT_EQ("Test Page Title", page_info_.get()->title);
174   EXPECT_THAT(page_info_.get()->html, HasSubstr("Lorem ipsum"));
175   EXPECT_THAT(page_info_.get()->html, Not(HasSubstr("questionable content")));
176   EXPECT_EQ("", page_info_.get()->next_page_url);
177   EXPECT_EQ("", page_info_.get()->prev_page_url);
178 }
179
180 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, HandlesRelativeLinks) {
181   DistillerPageWebContents distiller_page(
182       shell()->web_contents()->GetBrowserContext(),
183       shell()->web_contents()->GetContainerBounds().size(),
184       scoped_ptr<SourcePageHandleWebContents>());
185   distiller_page_ = &distiller_page;
186
187   base::RunLoop run_loop;
188   DistillPage(run_loop.QuitClosure(), kSimpleArticlePath);
189   run_loop.Run();
190
191   // A relative link should've been updated.
192   EXPECT_THAT(page_info_.get()->html,
193               ContainsRegex("href=\"http://127.0.0.1:.*/relativelink.html\""));
194   EXPECT_THAT(page_info_.get()->html,
195               HasSubstr("href=\"http://www.google.com/absolutelink.html\""));
196 }
197
198 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, HandlesRelativeImages) {
199   DistillerPageWebContents distiller_page(
200       shell()->web_contents()->GetBrowserContext(),
201       shell()->web_contents()->GetContainerBounds().size(),
202       scoped_ptr<SourcePageHandleWebContents>());
203   distiller_page_ = &distiller_page;
204
205   base::RunLoop run_loop;
206   DistillPage(run_loop.QuitClosure(), kSimpleArticlePath);
207   run_loop.Run();
208
209   // A relative link should've been updated.
210   EXPECT_THAT(page_info_.get()->html,
211               ContainsRegex("src=\"http://127.0.0.1:.*/relativeimage.png\""));
212   EXPECT_THAT(page_info_.get()->html,
213               HasSubstr("src=\"http://www.google.com/absoluteimage.png\""));
214 }
215
216
217 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, HandlesRelativeVideos) {
218   DistillerPageWebContents distiller_page(
219       shell()->web_contents()->GetBrowserContext(),
220       shell()->web_contents()->GetContainerBounds().size(),
221       scoped_ptr<SourcePageHandleWebContents>());
222   distiller_page_ = &distiller_page;
223
224   base::RunLoop run_loop;
225   DistillPage(run_loop.QuitClosure(), kVideoArticlePath);
226   run_loop.Run();
227
228   // A relative source/track should've been updated.
229   EXPECT_THAT(
230       page_info_.get()->html,
231       ContainsRegex("src=\"http://127.0.0.1:.*/relative_video.mp4\""));
232   EXPECT_THAT(
233       page_info_.get()->html,
234       ContainsRegex("src=\"http://127.0.0.1:.*/relative_track_en.vtt\""));
235   EXPECT_THAT(
236       page_info_.get()->html,
237       HasSubstr("src=\"http://www.google.com/absolute_video.ogg\""));
238   EXPECT_THAT(
239       page_info_.get()->html,
240       HasSubstr("src=\"http://www.google.com/absolute_track_fr.vtt\""));
241 }
242
243 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, VisibilityDetection) {
244   DistillerPageWebContents distiller_page(
245       shell()->web_contents()->GetBrowserContext(),
246       shell()->web_contents()->GetContainerBounds().size(),
247       scoped_ptr<SourcePageHandleWebContents>());
248   distiller_page_ = &distiller_page;
249
250   // visble_style.html and invisible_style.html only differ by the visibility
251   // internal stylesheet.
252
253   {
254     base::RunLoop run_loop;
255     DistillPage(run_loop.QuitClosure(), "/visible_style.html");
256     run_loop.Run();
257     EXPECT_THAT(page_info_.get()->html, HasSubstr("Lorem ipsum"));
258   }
259
260   {
261     base::RunLoop run_loop;
262     DistillPage(run_loop.QuitClosure(), "/invisible_style.html");
263     run_loop.Run();
264     EXPECT_THAT(page_info_.get()->html, Not(HasSubstr("Lorem ipsum")));
265   }
266 }
267
268 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest,
269                        UsingCurrentWebContentsWrongUrl) {
270   std::string url("/bogus");
271   bool expect_new_web_contents = true;
272   bool setup_main_frame_observer = true;
273   bool wait_for_document_loaded = true;
274   RunUseCurrentWebContentsTest(url,
275                                expect_new_web_contents,
276                                setup_main_frame_observer,
277                                wait_for_document_loaded);
278 }
279
280 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest,
281                        UsingCurrentWebContentsNoMainFrameObserver) {
282   std::string url(kSimpleArticlePath);
283   bool expect_new_web_contents = true;
284   bool setup_main_frame_observer = false;
285   bool wait_for_document_loaded = true;
286   RunUseCurrentWebContentsTest(url,
287                                expect_new_web_contents,
288                                setup_main_frame_observer,
289                                wait_for_document_loaded);
290 }
291
292 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest,
293                        UsingCurrentWebContentsNotFinishedLoadingYet) {
294   std::string url(kSimpleArticlePath);
295   bool expect_new_web_contents = false;
296   bool setup_main_frame_observer = true;
297   bool wait_for_document_loaded = false;
298   RunUseCurrentWebContentsTest(url,
299                                expect_new_web_contents,
300                                setup_main_frame_observer,
301                                wait_for_document_loaded);
302 }
303
304 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest,
305                        UsingCurrentWebContentsReadyForDistillation) {
306   std::string url(kSimpleArticlePath);
307   bool expect_new_web_contents = false;
308   bool setup_main_frame_observer = true;
309   bool wait_for_document_loaded = true;
310   RunUseCurrentWebContentsTest(url,
311                                expect_new_web_contents,
312                                setup_main_frame_observer,
313                                wait_for_document_loaded);
314 }
315
316 void DistillerPageWebContentsTest::RunUseCurrentWebContentsTest(
317     const std::string& url,
318     bool expect_new_web_contents,
319     bool setup_main_frame_observer,
320     bool wait_for_document_loaded) {
321   content::WebContents* current_web_contents = shell()->web_contents();
322   if (setup_main_frame_observer) {
323     dom_distiller::WebContentsMainFrameObserver::CreateForWebContents(
324         current_web_contents);
325   }
326   base::RunLoop url_loaded_runner;
327   WebContentsMainFrameHelper main_frame_loaded(current_web_contents,
328                                                url_loaded_runner.QuitClosure(),
329                                                wait_for_document_loaded);
330   current_web_contents->GetController().LoadURL(
331       embedded_test_server()->GetURL(url),
332       content::Referrer(),
333       content::PAGE_TRANSITION_TYPED,
334       std::string());
335   url_loaded_runner.Run();
336
337   scoped_ptr<content::WebContents> old_web_contents_sptr(current_web_contents);
338   scoped_ptr<SourcePageHandleWebContents> source_page_handle(
339       new SourcePageHandleWebContents(old_web_contents_sptr.Pass()));
340
341   TestDistillerPageWebContents distiller_page(
342       shell()->web_contents()->GetBrowserContext(),
343       shell()->web_contents()->GetContainerBounds().size(),
344       source_page_handle.Pass(),
345       expect_new_web_contents);
346   distiller_page_ = &distiller_page;
347
348   base::RunLoop run_loop;
349   DistillPage(run_loop.QuitClosure(), kSimpleArticlePath);
350   run_loop.Run();
351
352   // Sanity check of distillation process.
353   EXPECT_EQ(expect_new_web_contents, distiller_page.new_web_contents_created());
354   EXPECT_EQ("Test Page Title", page_info_.get()->title);
355 }
356
357 IN_PROC_BROWSER_TEST_F(DistillerPageWebContentsTest, MarkupInfo) {
358   DistillerPageWebContents distiller_page(
359       shell()->web_contents()->GetBrowserContext(),
360       shell()->web_contents()->GetContainerBounds().size(),
361       scoped_ptr<SourcePageHandleWebContents>());
362   distiller_page_ = &distiller_page;
363
364   base::RunLoop run_loop;
365   DistillPage(run_loop.QuitClosure(), "/markup_article.html");
366   run_loop.Run();
367
368   EXPECT_THAT(page_info_.get()->html, HasSubstr("Lorem ipsum"));
369   EXPECT_EQ("Marked-up Markup Test Page Title", page_info_.get()->title);
370
371   const DistilledPageInfo::MarkupInfo& markup_info = page_info_->markup_info;
372   EXPECT_EQ("Marked-up Markup Test Page Title", markup_info.title);
373   EXPECT_EQ("Article", markup_info.type);
374   EXPECT_EQ("http://test/markup.html", markup_info.url);
375   EXPECT_EQ("This page tests Markup Info.", markup_info.description);
376   EXPECT_EQ("Whoever Published", markup_info.publisher);
377   EXPECT_EQ("Copyright 2000-2014 Whoever Copyrighted", markup_info.copyright);
378   EXPECT_EQ("Whoever Authored", markup_info.author);
379
380   const DistilledPageInfo::MarkupArticle& markup_article = markup_info.article;
381   EXPECT_EQ("Whatever Section", markup_article.section);
382   EXPECT_EQ("July 23, 2014", markup_article.published_time);
383   EXPECT_EQ("2014-07-23T23:59", markup_article.modified_time);
384   EXPECT_EQ("", markup_article.expiration_time);
385   ASSERT_EQ(1U, markup_article.authors.size());
386   EXPECT_EQ("Whoever Authored", markup_article.authors[0]);
387
388   ASSERT_EQ(2U, markup_info.images.size());
389
390   const DistilledPageInfo::MarkupImage& markup_image1 = markup_info.images[0];
391   EXPECT_EQ("http://test/markup1.jpeg", markup_image1.url);
392   EXPECT_EQ("https://test/markup1.jpeg", markup_image1.secure_url);
393   EXPECT_EQ("jpeg", markup_image1.type);
394   EXPECT_EQ("", markup_image1.caption);
395   EXPECT_EQ(600, markup_image1.width);
396   EXPECT_EQ(400, markup_image1.height);
397
398   const DistilledPageInfo::MarkupImage& markup_image2 = markup_info.images[1];
399   EXPECT_EQ("http://test/markup2.gif", markup_image2.url);
400   EXPECT_EQ("https://test/markup2.gif", markup_image2.secure_url);
401   EXPECT_EQ("gif", markup_image2.type);
402   EXPECT_EQ("", markup_image2.caption);
403   EXPECT_EQ(1000, markup_image2.width);
404   EXPECT_EQ(600, markup_image2.height);
405 }
406
407 }  // namespace dom_distiller