Upstream version 5.34.104.0
[platform/framework/web/crosswalk.git] / src / components / dom_distiller / core / distiller_unittest.cc
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <map>
6 #include <string>
7 #include <vector>
8
9 #include "base/bind.h"
10 #include "base/bind_helpers.h"
11 #include "base/location.h"
12 #include "base/memory/scoped_ptr.h"
13 #include "base/message_loop/message_loop.h"
14 #include "base/strings/string_number_conversions.h"
15 #include "base/values.h"
16 #include "components/dom_distiller/core/distiller.h"
17 #include "components/dom_distiller/core/distiller_page.h"
18 #include "components/dom_distiller/core/proto/distilled_article.pb.h"
19 #include "components/dom_distiller/core/proto/distilled_page.pb.h"
20 #include "net/url_request/url_request_context_getter.h"
21 #include "testing/gmock/include/gmock/gmock.h"
22 #include "testing/gtest/include/gtest/gtest.h"
23
24 using std::vector;
25 using std::string;
26 using::testing::Invoke;
27 using::testing::Return;
28 using::testing::_;
29
30 namespace {
31   const char kTitle[] = "Title";
32   const char kContent[] = "Content";
33   const char kURL[] = "http://a.com/";
34   const size_t kTotalImages = 2;
35   const char* kImageURLs[kTotalImages] = {"http://a.com/img1.jpg",
36                                           "http://a.com/img2.jpg"};
37   const char* kImageData[kTotalImages] = {"abcde", "12345"};
38
39   const string GetImageName(int page_num, int image_num) {
40     return base::IntToString(page_num) + "_" + base::IntToString(image_num);
41   }
42
43   scoped_ptr<base::ListValue> CreateDistilledValueReturnedFromJS(
44       const string& title,
45       const string& content,
46       const vector<int>& image_indices,
47       const string& next_page_url,
48       const string& prev_page_url = "") {
49     scoped_ptr<base::ListValue> list(new base::ListValue());
50
51     list->AppendString(title);
52     list->AppendString(content);
53     list->AppendString(next_page_url);
54     list->AppendString(prev_page_url);
55     for (size_t i = 0; i < image_indices.size(); ++i) {
56       list->AppendString(kImageURLs[image_indices[i]]);
57     }
58     return list.Pass();
59   }
60
61 }  // namespace
62
63 namespace dom_distiller {
64
65 class TestDistillerURLFetcher : public DistillerURLFetcher {
66  public:
67   TestDistillerURLFetcher() : DistillerURLFetcher(NULL) {
68     responses_[kImageURLs[0]] = string(kImageData[0]);
69     responses_[kImageURLs[1]] = string(kImageData[1]);
70   }
71
72   void CallCallback(string url, const URLFetcherCallback& callback) {
73     callback.Run(responses_[url]);
74   }
75
76   virtual void FetchURL(const string& url,
77                         const URLFetcherCallback& callback) OVERRIDE {
78     ASSERT_TRUE(base::MessageLoop::current());
79     base::MessageLoop::current()->PostTask(
80         FROM_HERE,
81         base::Bind(&TestDistillerURLFetcher::CallCallback,
82                    base::Unretained(this), url, callback));
83   }
84
85   std::map<string, string> responses_;
86 };
87
88
89 class TestDistillerURLFetcherFactory : public DistillerURLFetcherFactory {
90  public:
91   TestDistillerURLFetcherFactory() : DistillerURLFetcherFactory(NULL) {}
92   virtual ~TestDistillerURLFetcherFactory() {}
93   virtual DistillerURLFetcher* CreateDistillerURLFetcher() const OVERRIDE {
94     return new TestDistillerURLFetcher();
95   }
96 };
97
98
99 class MockDistillerPage : public DistillerPage {
100  public:
101   MOCK_METHOD0(InitImpl, void());
102   MOCK_METHOD1(LoadURLImpl, void(const GURL& gurl));
103   MOCK_METHOD1(ExecuteJavaScriptImpl, void(const string& script));
104
105   explicit MockDistillerPage(DistillerPage::Delegate* delegate)
106       : DistillerPage(delegate) {}
107 };
108
109
110 class MockDistillerPageFactory : public DistillerPageFactory {
111  public:
112   MOCK_CONST_METHOD1(
113       CreateDistillerPageMock,
114       DistillerPage*(DistillerPage::Delegate* delegate));
115
116   virtual scoped_ptr<DistillerPage> CreateDistillerPage(
117       DistillerPage::Delegate* delegate) const OVERRIDE {
118     return scoped_ptr<DistillerPage>(CreateDistillerPageMock(delegate));
119   }
120 };
121
122 class DistillerTest : public testing::Test {
123  public:
124   virtual ~DistillerTest() {}
125   void OnDistillPageDone(scoped_ptr<DistilledArticleProto> proto) {
126     article_proto_ = proto.Pass();
127   }
128
129  protected:
130   scoped_ptr<DistillerImpl> distiller_;
131   scoped_ptr<DistilledArticleProto> article_proto_;
132   MockDistillerPageFactory page_factory_;
133   TestDistillerURLFetcherFactory url_fetcher_factory_;
134 };
135
136 ACTION_P3(DistillerPageOnExecuteJavaScriptDone, distiller_page, url, list) {
137   distiller_page->OnExecuteJavaScriptDone(url, list);
138 }
139
140 ACTION_P2(CreateMockDistillerPage, list, kurl) {
141   DistillerPage::Delegate* delegate = arg0;
142   MockDistillerPage* distiller_page = new MockDistillerPage(delegate);
143   EXPECT_CALL(*distiller_page, InitImpl());
144   EXPECT_CALL(*distiller_page, LoadURLImpl(kurl))
145       .WillOnce(testing::InvokeWithoutArgs(distiller_page,
146                                            &DistillerPage::OnLoadURLDone));
147   EXPECT_CALL(*distiller_page, ExecuteJavaScriptImpl(_)).WillOnce(
148       DistillerPageOnExecuteJavaScriptDone(distiller_page, kurl, list));
149   return distiller_page;
150 }
151
152 ACTION_P4(CreateMockDistillerPages, lists, kurls, num_pages, start_page_num) {
153   DistillerPage::Delegate* delegate = arg0;
154   MockDistillerPage* distiller_page = new MockDistillerPage(delegate);
155   EXPECT_CALL(*distiller_page, InitImpl());
156   {
157     testing::InSequence s;
158     // Distiller prefers distilling past pages first. E.g. when distillation
159     // starts on page 2 then pages are distilled in the order: 2, 1, 0, 3, 4.
160     vector<int> page_nums;
161     for (int page = start_page_num; page >= 0; --page)
162       page_nums.push_back(page);
163     for (int page = start_page_num + 1; page < num_pages; ++page)
164       page_nums.push_back(page);
165
166     for (size_t page_num = 0; page_num < page_nums.size(); ++page_num) {
167       int page = page_nums[page_num];
168       GURL url = GURL(kurls[page]);
169       EXPECT_CALL(*distiller_page, LoadURLImpl(url))
170           .WillOnce(testing::InvokeWithoutArgs(distiller_page,
171                                                &DistillerPage::OnLoadURLDone));
172       EXPECT_CALL(*distiller_page, ExecuteJavaScriptImpl(_))
173           .WillOnce(DistillerPageOnExecuteJavaScriptDone(
174               distiller_page, url, lists[page].get()));
175     }
176   }
177   return distiller_page;
178 }
179
180 TEST_F(DistillerTest, DistillPage) {
181   base::MessageLoopForUI loop;
182   scoped_ptr<base::ListValue> list =
183       CreateDistilledValueReturnedFromJS(kTitle, kContent, vector<int>(), "");
184   EXPECT_CALL(page_factory_, CreateDistillerPageMock(_))
185       .WillOnce(CreateMockDistillerPage(list.get(), GURL(kURL)));
186   distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_));
187   distiller_->Init();
188   distiller_->DistillPage(
189       GURL(kURL),
190       base::Bind(&DistillerTest::OnDistillPageDone, base::Unretained(this)));
191   base::MessageLoop::current()->RunUntilIdle();
192   EXPECT_EQ(kTitle, article_proto_->title());
193   EXPECT_EQ(article_proto_->pages_size(), 1);
194   const DistilledPageProto& first_page = article_proto_->pages(0);
195   EXPECT_EQ(kContent, first_page.html());
196   EXPECT_EQ(kURL, first_page.url());
197 }
198
199 TEST_F(DistillerTest, DistillPageWithImages) {
200   base::MessageLoopForUI loop;
201   vector<int> image_indices;
202   image_indices.push_back(0);
203   image_indices.push_back(1);
204   scoped_ptr<base::ListValue> list =
205       CreateDistilledValueReturnedFromJS(kTitle, kContent, image_indices, "");
206   EXPECT_CALL(page_factory_,
207               CreateDistillerPageMock(_)).WillOnce(
208                   CreateMockDistillerPage(list.get(), GURL(kURL)));
209   distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_));
210   distiller_->Init();
211   distiller_->DistillPage(
212       GURL(kURL),
213       base::Bind(&DistillerTest::OnDistillPageDone, base::Unretained(this)));
214   base::MessageLoop::current()->RunUntilIdle();
215   EXPECT_EQ(kTitle, article_proto_->title());
216   EXPECT_EQ(article_proto_->pages_size(), 1);
217   const DistilledPageProto& first_page = article_proto_->pages(0);
218   EXPECT_EQ(kContent, first_page.html());
219   EXPECT_EQ(kURL, first_page.url());
220   EXPECT_EQ(2, first_page.image_size());
221   EXPECT_EQ(kImageData[0], first_page.image(0).data());
222   EXPECT_EQ(GetImageName(1, 0), first_page.image(0).name());
223   EXPECT_EQ(kImageData[1], first_page.image(1).data());
224   EXPECT_EQ(GetImageName(1, 1), first_page.image(1).name());
225 }
226
227 TEST_F(DistillerTest, DistillMultiplePages) {
228   base::MessageLoopForUI loop;
229   const int kNumPages = 8;
230   vector<int> image_indices[kNumPages];
231   string content[kNumPages];
232   string page_urls[kNumPages];
233   scoped_ptr<base::ListValue> list[kNumPages];
234
235   int next_image_number = 0;
236
237   for (int page_num = 0; page_num < kNumPages; ++page_num) {
238     // Each page has different number of images.
239     int tot_images = (page_num + kTotalImages) % (kTotalImages + 1);
240     for (int img_num = 0; img_num < tot_images; img_num++) {
241       image_indices[page_num].push_back(next_image_number);
242       next_image_number = (next_image_number + 1) % kTotalImages;
243     }
244
245     page_urls[page_num] = "http://a.com/" + base::IntToString(page_num);
246     content[page_num] = "Content for page:" + base::IntToString(page_num);
247   }
248   for (int i = 0; i < kNumPages; ++i) {
249     string next_page_url = "";
250     if (i + 1 < kNumPages)
251       next_page_url = page_urls[i + 1];
252
253     list[i] = CreateDistilledValueReturnedFromJS(
254         kTitle, content[i], image_indices[i], next_page_url);
255   }
256
257   EXPECT_CALL(page_factory_, CreateDistillerPageMock(_))
258       .WillOnce(CreateMockDistillerPages(list, page_urls, kNumPages, 0));
259
260   distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_));
261   distiller_->Init();
262   distiller_->DistillPage(
263       GURL(page_urls[0]),
264       base::Bind(&DistillerTest::OnDistillPageDone, base::Unretained(this)));
265   base::MessageLoop::current()->RunUntilIdle();
266   EXPECT_EQ(kTitle, article_proto_->title());
267   EXPECT_EQ(article_proto_->pages_size(), kNumPages);
268   for (int page_num = 0; page_num < kNumPages; ++page_num) {
269     const DistilledPageProto& page = article_proto_->pages(page_num);
270     EXPECT_EQ(content[page_num], page.html());
271     EXPECT_EQ(page_urls[page_num], page.url());
272     EXPECT_EQ(image_indices[page_num].size(),
273               static_cast<size_t>(page.image_size()));
274     for (size_t img_num = 0; img_num < image_indices[page_num].size();
275          ++img_num) {
276       EXPECT_EQ(kImageData[image_indices[page_num][img_num]],
277                 page.image(img_num).data());
278       EXPECT_EQ(GetImageName(page_num + 1, img_num),
279                 page.image(img_num).name());
280     }
281   }
282 }
283
284 TEST_F(DistillerTest, DistillLinkLoop) {
285   base::MessageLoopForUI loop;
286   // Create a loop, the next page is same as the current page. This could
287   // happen if javascript misparses a next page link.
288   scoped_ptr<base::ListValue> list =
289       CreateDistilledValueReturnedFromJS(kTitle, kContent, vector<int>(), kURL);
290   EXPECT_CALL(page_factory_, CreateDistillerPageMock(_))
291       .WillOnce(CreateMockDistillerPage(list.get(), GURL(kURL)));
292   distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_));
293   distiller_->Init();
294   distiller_->DistillPage(
295       GURL(kURL),
296       base::Bind(&DistillerTest::OnDistillPageDone, base::Unretained(this)));
297   base::MessageLoop::current()->RunUntilIdle();
298   EXPECT_EQ(kTitle, article_proto_->title());
299   EXPECT_EQ(article_proto_->pages_size(), 1);
300 }
301
302 TEST_F(DistillerTest, CheckMaxPageLimit) {
303   base::MessageLoopForUI loop;
304   const size_t kMaxPagesInArticle = 10;
305   string page_urls[kMaxPagesInArticle];
306   scoped_ptr<base::ListValue> list[kMaxPagesInArticle];
307
308   // Note: Next page url of the last page of article is set. So distiller will
309   // try to do kMaxPagesInArticle + 1 calls if the max article limit does not
310   // work.
311   string url_prefix = "http://a.com/";
312   for (size_t page_num = 0; page_num < kMaxPagesInArticle; ++page_num) {
313     page_urls[page_num] = url_prefix + base::IntToString(page_num + 1);
314     string content = "Content for page:" + base::IntToString(page_num);
315     string next_page_url = url_prefix + base::IntToString(page_num + 2);
316     list[page_num] = CreateDistilledValueReturnedFromJS(
317         kTitle, content, vector<int>(), next_page_url);
318   }
319
320   EXPECT_CALL(page_factory_, CreateDistillerPageMock(_))
321       .WillOnce(CreateMockDistillerPages(
322           list, page_urls, static_cast<int>(kMaxPagesInArticle), 0));
323
324   distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_));
325
326   distiller_->SetMaxNumPagesInArticle(kMaxPagesInArticle);
327
328   distiller_->Init();
329   distiller_->DistillPage(
330       GURL(page_urls[0]),
331       base::Bind(&DistillerTest::OnDistillPageDone, base::Unretained(this)));
332   base::MessageLoop::current()->RunUntilIdle();
333   EXPECT_EQ(kTitle, article_proto_->title());
334   EXPECT_EQ(kMaxPagesInArticle,
335             static_cast<size_t>(article_proto_->pages_size()));
336
337   // Now check if distilling an article with exactly the page limit works by
338   // resetting the next page url of the last page of the article.
339   list[kMaxPagesInArticle - 1] =
340       CreateDistilledValueReturnedFromJS(kTitle, "Content", vector<int>(), "");
341   EXPECT_CALL(page_factory_, CreateDistillerPageMock(_))
342       .WillOnce(CreateMockDistillerPages(
343           list, page_urls, static_cast<int>(kMaxPagesInArticle), 0));
344
345   distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_));
346   distiller_->SetMaxNumPagesInArticle(kMaxPagesInArticle);
347
348   distiller_->Init();
349   distiller_->DistillPage(
350       GURL(page_urls[0]),
351       base::Bind(&DistillerTest::OnDistillPageDone, base::Unretained(this)));
352   base::MessageLoop::current()->RunUntilIdle();
353   EXPECT_EQ(kTitle, article_proto_->title());
354   EXPECT_EQ(kMaxPagesInArticle,
355             static_cast<size_t>(article_proto_->pages_size()));
356
357   // Now check if distilling an article with exactly the page limit works by
358   // resetting the next page url of the last page of the article.
359   list[kMaxPagesInArticle - 1] =
360       CreateDistilledValueReturnedFromJS(kTitle, "Content", vector<int>(), "");
361   EXPECT_CALL(page_factory_, CreateDistillerPageMock(_))
362       .WillOnce(CreateMockDistillerPages(
363           list, page_urls, static_cast<int>(kMaxPagesInArticle), 0));
364
365   distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_));
366   distiller_->SetMaxNumPagesInArticle(kMaxPagesInArticle);
367
368   distiller_->Init();
369   distiller_->DistillPage(
370       GURL(page_urls[0]),
371       base::Bind(&DistillerTest::OnDistillPageDone, base::Unretained(this)));
372   base::MessageLoop::current()->RunUntilIdle();
373   EXPECT_EQ(kTitle, article_proto_->title());
374   EXPECT_EQ(kMaxPagesInArticle,
375             static_cast<size_t>(article_proto_->pages_size()));
376 }
377
378 TEST_F(DistillerTest, SinglePageDistillationFailure) {
379   base::MessageLoopForUI loop;
380   // To simulate failure return a null value.
381   scoped_ptr<base::Value> nullValue(base::Value::CreateNullValue());
382   EXPECT_CALL(page_factory_, CreateDistillerPageMock(_))
383       .WillOnce(CreateMockDistillerPage(nullValue.get(), GURL(kURL)));
384   distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_));
385   distiller_->Init();
386   distiller_->DistillPage(
387       GURL(kURL),
388       base::Bind(&DistillerTest::OnDistillPageDone, base::Unretained(this)));
389   base::MessageLoop::current()->RunUntilIdle();
390   EXPECT_EQ("", article_proto_->title());
391   EXPECT_EQ(0, article_proto_->pages_size());
392 }
393
394 TEST_F(DistillerTest, MultiplePagesDistillationFailure) {
395   base::MessageLoopForUI loop;
396   const int kNumPages = 8;
397   string content[kNumPages];
398   string page_urls[kNumPages];
399   scoped_ptr<base::Value> distilled_values[kNumPages];
400   // The page number of the failed page.
401   int failed_page_num = 3;
402   string url_prefix = "http://a.com/";
403   for (int page_num = 0; page_num < kNumPages; ++page_num) {
404     page_urls[page_num] = url_prefix + base::IntToString(page_num);
405     content[page_num] = "Content for page:" + base::IntToString(page_num);
406     string next_page_url = url_prefix + base::IntToString(page_num + 1);
407     if (page_num != failed_page_num) {
408       distilled_values[page_num] = CreateDistilledValueReturnedFromJS(
409           kTitle, content[page_num], vector<int>(), next_page_url);
410     } else {
411       distilled_values[page_num].reset(base::Value::CreateNullValue());
412     }
413   }
414
415   // Expect only calls till the failed page number.
416   EXPECT_CALL(page_factory_, CreateDistillerPageMock(_))
417       .WillOnce(CreateMockDistillerPages(
418           distilled_values, page_urls, failed_page_num + 1, 0));
419
420   distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_));
421   distiller_->Init();
422   distiller_->DistillPage(
423       GURL(page_urls[0]),
424       base::Bind(&DistillerTest::OnDistillPageDone, base::Unretained(this)));
425   base::MessageLoop::current()->RunUntilIdle();
426   EXPECT_EQ(kTitle, article_proto_->title());
427   EXPECT_EQ(article_proto_->pages_size(), failed_page_num);
428   for (int page_num = 0; page_num < failed_page_num; ++page_num) {
429     const DistilledPageProto& page = article_proto_->pages(page_num);
430     EXPECT_EQ(content[page_num], page.html());
431     EXPECT_EQ(page_urls[page_num], page.url());
432   }
433 }
434
435 TEST_F(DistillerTest, DistillPreviousPage) {
436   base::MessageLoopForUI loop;
437   const int kNumPages = 8;
438   string content[kNumPages];
439   string page_urls[kNumPages];
440   scoped_ptr<base::Value> distilled_values[kNumPages];
441
442   // The page number of the article on which distillation starts.
443   int start_page_number = 3;
444   string url_prefix = "http://a.com/";
445   for (int page_no = 0; page_no < kNumPages; ++page_no) {
446     page_urls[page_no] = url_prefix + base::IntToString(page_no);
447     content[page_no] = "Content for page:" + base::IntToString(page_no);
448     string next_page_url = (page_no + 1 < kNumPages)
449                                ? url_prefix + base::IntToString(page_no + 1)
450                                : "";
451     string prev_page_url = (page_no > 0) ? page_urls[page_no - 1] : "";
452     distilled_values[page_no] = CreateDistilledValueReturnedFromJS(
453         kTitle, content[page_no], vector<int>(), next_page_url, prev_page_url);
454   }
455
456   EXPECT_CALL(page_factory_, CreateDistillerPageMock(_))
457       .WillOnce(CreateMockDistillerPages(
458           distilled_values, page_urls, kNumPages, start_page_number));
459
460   distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_));
461   distiller_->Init();
462   distiller_->DistillPage(
463       GURL(page_urls[start_page_number]),
464       base::Bind(&DistillerTest::OnDistillPageDone, base::Unretained(this)));
465   base::MessageLoop::current()->RunUntilIdle();
466   EXPECT_EQ(kTitle, article_proto_->title());
467   EXPECT_EQ(kNumPages, article_proto_->pages_size());
468   for (int page_no = 0; page_no < kNumPages; ++page_no) {
469     const DistilledPageProto& page = article_proto_->pages(page_no);
470     EXPECT_EQ(content[page_no], page.html());
471     EXPECT_EQ(page_urls[page_no], page.url());
472   }
473 }
474
475 }  // namespace dom_distiller