1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
10 #include "base/bind_helpers.h"
11 #include "base/location.h"
12 #include "base/memory/scoped_ptr.h"
13 #include "base/message_loop/message_loop.h"
14 #include "base/strings/string_number_conversions.h"
15 #include "base/values.h"
16 #include "components/dom_distiller/core/distiller.h"
17 #include "components/dom_distiller/core/distiller_page.h"
18 #include "components/dom_distiller/core/proto/distilled_article.pb.h"
19 #include "components/dom_distiller/core/proto/distilled_page.pb.h"
20 #include "net/url_request/url_request_context_getter.h"
21 #include "testing/gmock/include/gmock/gmock.h"
22 #include "testing/gtest/include/gtest/gtest.h"
26 using::testing::Invoke;
27 using::testing::Return;
31 const char kTitle[] = "Title";
32 const char kContent[] = "Content";
33 const char kURL[] = "http://a.com/";
34 const size_t kTotalImages = 2;
35 const char* kImageURLs[kTotalImages] = {"http://a.com/img1.jpg",
36 "http://a.com/img2.jpg"};
37 const char* kImageData[kTotalImages] = {"abcde", "12345"};
39 const string GetImageName(int page_num, int image_num) {
40 return base::IntToString(page_num) + "_" + base::IntToString(image_num);
43 scoped_ptr<base::ListValue> CreateDistilledValueReturnedFromJS(
45 const string& content,
46 const vector<int>& image_indices,
47 const string& next_page_url,
48 const string& prev_page_url = "") {
49 scoped_ptr<base::ListValue> list(new base::ListValue());
51 list->AppendString(title);
52 list->AppendString(content);
53 list->AppendString(next_page_url);
54 list->AppendString(prev_page_url);
55 for (size_t i = 0; i < image_indices.size(); ++i) {
56 list->AppendString(kImageURLs[image_indices[i]]);
63 namespace dom_distiller {
65 class TestDistillerURLFetcher : public DistillerURLFetcher {
67 TestDistillerURLFetcher() : DistillerURLFetcher(NULL) {
68 responses_[kImageURLs[0]] = string(kImageData[0]);
69 responses_[kImageURLs[1]] = string(kImageData[1]);
72 void CallCallback(string url, const URLFetcherCallback& callback) {
73 callback.Run(responses_[url]);
76 virtual void FetchURL(const string& url,
77 const URLFetcherCallback& callback) OVERRIDE {
78 ASSERT_TRUE(base::MessageLoop::current());
79 base::MessageLoop::current()->PostTask(
81 base::Bind(&TestDistillerURLFetcher::CallCallback,
82 base::Unretained(this), url, callback));
85 std::map<string, string> responses_;
89 class TestDistillerURLFetcherFactory : public DistillerURLFetcherFactory {
91 TestDistillerURLFetcherFactory() : DistillerURLFetcherFactory(NULL) {}
92 virtual ~TestDistillerURLFetcherFactory() {}
93 virtual DistillerURLFetcher* CreateDistillerURLFetcher() const OVERRIDE {
94 return new TestDistillerURLFetcher();
99 class MockDistillerPage : public DistillerPage {
101 MOCK_METHOD0(InitImpl, void());
102 MOCK_METHOD1(LoadURLImpl, void(const GURL& gurl));
103 MOCK_METHOD1(ExecuteJavaScriptImpl, void(const string& script));
105 explicit MockDistillerPage(DistillerPage::Delegate* delegate)
106 : DistillerPage(delegate) {}
110 class MockDistillerPageFactory : public DistillerPageFactory {
113 CreateDistillerPageMock,
114 DistillerPage*(DistillerPage::Delegate* delegate));
116 virtual scoped_ptr<DistillerPage> CreateDistillerPage(
117 DistillerPage::Delegate* delegate) const OVERRIDE {
118 return scoped_ptr<DistillerPage>(CreateDistillerPageMock(delegate));
122 class DistillerTest : public testing::Test {
124 virtual ~DistillerTest() {}
125 void OnDistillPageDone(scoped_ptr<DistilledArticleProto> proto) {
126 article_proto_ = proto.Pass();
130 scoped_ptr<DistillerImpl> distiller_;
131 scoped_ptr<DistilledArticleProto> article_proto_;
132 MockDistillerPageFactory page_factory_;
133 TestDistillerURLFetcherFactory url_fetcher_factory_;
136 ACTION_P3(DistillerPageOnExecuteJavaScriptDone, distiller_page, url, list) {
137 distiller_page->OnExecuteJavaScriptDone(url, list);
140 ACTION_P2(CreateMockDistillerPage, list, kurl) {
141 DistillerPage::Delegate* delegate = arg0;
142 MockDistillerPage* distiller_page = new MockDistillerPage(delegate);
143 EXPECT_CALL(*distiller_page, InitImpl());
144 EXPECT_CALL(*distiller_page, LoadURLImpl(kurl))
145 .WillOnce(testing::InvokeWithoutArgs(distiller_page,
146 &DistillerPage::OnLoadURLDone));
147 EXPECT_CALL(*distiller_page, ExecuteJavaScriptImpl(_)).WillOnce(
148 DistillerPageOnExecuteJavaScriptDone(distiller_page, kurl, list));
149 return distiller_page;
152 ACTION_P4(CreateMockDistillerPages, lists, kurls, num_pages, start_page_num) {
153 DistillerPage::Delegate* delegate = arg0;
154 MockDistillerPage* distiller_page = new MockDistillerPage(delegate);
155 EXPECT_CALL(*distiller_page, InitImpl());
157 testing::InSequence s;
158 // Distiller prefers distilling past pages first. E.g. when distillation
159 // starts on page 2 then pages are distilled in the order: 2, 1, 0, 3, 4.
160 vector<int> page_nums;
161 for (int page = start_page_num; page >= 0; --page)
162 page_nums.push_back(page);
163 for (int page = start_page_num + 1; page < num_pages; ++page)
164 page_nums.push_back(page);
166 for (size_t page_num = 0; page_num < page_nums.size(); ++page_num) {
167 int page = page_nums[page_num];
168 GURL url = GURL(kurls[page]);
169 EXPECT_CALL(*distiller_page, LoadURLImpl(url))
170 .WillOnce(testing::InvokeWithoutArgs(distiller_page,
171 &DistillerPage::OnLoadURLDone));
172 EXPECT_CALL(*distiller_page, ExecuteJavaScriptImpl(_))
173 .WillOnce(DistillerPageOnExecuteJavaScriptDone(
174 distiller_page, url, lists[page].get()));
177 return distiller_page;
180 TEST_F(DistillerTest, DistillPage) {
181 base::MessageLoopForUI loop;
182 scoped_ptr<base::ListValue> list =
183 CreateDistilledValueReturnedFromJS(kTitle, kContent, vector<int>(), "");
184 EXPECT_CALL(page_factory_, CreateDistillerPageMock(_))
185 .WillOnce(CreateMockDistillerPage(list.get(), GURL(kURL)));
186 distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_));
188 distiller_->DistillPage(
190 base::Bind(&DistillerTest::OnDistillPageDone, base::Unretained(this)));
191 base::MessageLoop::current()->RunUntilIdle();
192 EXPECT_EQ(kTitle, article_proto_->title());
193 EXPECT_EQ(article_proto_->pages_size(), 1);
194 const DistilledPageProto& first_page = article_proto_->pages(0);
195 EXPECT_EQ(kContent, first_page.html());
196 EXPECT_EQ(kURL, first_page.url());
199 TEST_F(DistillerTest, DistillPageWithImages) {
200 base::MessageLoopForUI loop;
201 vector<int> image_indices;
202 image_indices.push_back(0);
203 image_indices.push_back(1);
204 scoped_ptr<base::ListValue> list =
205 CreateDistilledValueReturnedFromJS(kTitle, kContent, image_indices, "");
206 EXPECT_CALL(page_factory_,
207 CreateDistillerPageMock(_)).WillOnce(
208 CreateMockDistillerPage(list.get(), GURL(kURL)));
209 distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_));
211 distiller_->DistillPage(
213 base::Bind(&DistillerTest::OnDistillPageDone, base::Unretained(this)));
214 base::MessageLoop::current()->RunUntilIdle();
215 EXPECT_EQ(kTitle, article_proto_->title());
216 EXPECT_EQ(article_proto_->pages_size(), 1);
217 const DistilledPageProto& first_page = article_proto_->pages(0);
218 EXPECT_EQ(kContent, first_page.html());
219 EXPECT_EQ(kURL, first_page.url());
220 EXPECT_EQ(2, first_page.image_size());
221 EXPECT_EQ(kImageData[0], first_page.image(0).data());
222 EXPECT_EQ(GetImageName(1, 0), first_page.image(0).name());
223 EXPECT_EQ(kImageData[1], first_page.image(1).data());
224 EXPECT_EQ(GetImageName(1, 1), first_page.image(1).name());
227 TEST_F(DistillerTest, DistillMultiplePages) {
228 base::MessageLoopForUI loop;
229 const int kNumPages = 8;
230 vector<int> image_indices[kNumPages];
231 string content[kNumPages];
232 string page_urls[kNumPages];
233 scoped_ptr<base::ListValue> list[kNumPages];
235 int next_image_number = 0;
237 for (int page_num = 0; page_num < kNumPages; ++page_num) {
238 // Each page has different number of images.
239 int tot_images = (page_num + kTotalImages) % (kTotalImages + 1);
240 for (int img_num = 0; img_num < tot_images; img_num++) {
241 image_indices[page_num].push_back(next_image_number);
242 next_image_number = (next_image_number + 1) % kTotalImages;
245 page_urls[page_num] = "http://a.com/" + base::IntToString(page_num);
246 content[page_num] = "Content for page:" + base::IntToString(page_num);
248 for (int i = 0; i < kNumPages; ++i) {
249 string next_page_url = "";
250 if (i + 1 < kNumPages)
251 next_page_url = page_urls[i + 1];
253 list[i] = CreateDistilledValueReturnedFromJS(
254 kTitle, content[i], image_indices[i], next_page_url);
257 EXPECT_CALL(page_factory_, CreateDistillerPageMock(_))
258 .WillOnce(CreateMockDistillerPages(list, page_urls, kNumPages, 0));
260 distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_));
262 distiller_->DistillPage(
264 base::Bind(&DistillerTest::OnDistillPageDone, base::Unretained(this)));
265 base::MessageLoop::current()->RunUntilIdle();
266 EXPECT_EQ(kTitle, article_proto_->title());
267 EXPECT_EQ(article_proto_->pages_size(), kNumPages);
268 for (int page_num = 0; page_num < kNumPages; ++page_num) {
269 const DistilledPageProto& page = article_proto_->pages(page_num);
270 EXPECT_EQ(content[page_num], page.html());
271 EXPECT_EQ(page_urls[page_num], page.url());
272 EXPECT_EQ(image_indices[page_num].size(),
273 static_cast<size_t>(page.image_size()));
274 for (size_t img_num = 0; img_num < image_indices[page_num].size();
276 EXPECT_EQ(kImageData[image_indices[page_num][img_num]],
277 page.image(img_num).data());
278 EXPECT_EQ(GetImageName(page_num + 1, img_num),
279 page.image(img_num).name());
284 TEST_F(DistillerTest, DistillLinkLoop) {
285 base::MessageLoopForUI loop;
286 // Create a loop, the next page is same as the current page. This could
287 // happen if javascript misparses a next page link.
288 scoped_ptr<base::ListValue> list =
289 CreateDistilledValueReturnedFromJS(kTitle, kContent, vector<int>(), kURL);
290 EXPECT_CALL(page_factory_, CreateDistillerPageMock(_))
291 .WillOnce(CreateMockDistillerPage(list.get(), GURL(kURL)));
292 distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_));
294 distiller_->DistillPage(
296 base::Bind(&DistillerTest::OnDistillPageDone, base::Unretained(this)));
297 base::MessageLoop::current()->RunUntilIdle();
298 EXPECT_EQ(kTitle, article_proto_->title());
299 EXPECT_EQ(article_proto_->pages_size(), 1);
302 TEST_F(DistillerTest, CheckMaxPageLimit) {
303 base::MessageLoopForUI loop;
304 const size_t kMaxPagesInArticle = 10;
305 string page_urls[kMaxPagesInArticle];
306 scoped_ptr<base::ListValue> list[kMaxPagesInArticle];
308 // Note: Next page url of the last page of article is set. So distiller will
309 // try to do kMaxPagesInArticle + 1 calls if the max article limit does not
311 string url_prefix = "http://a.com/";
312 for (size_t page_num = 0; page_num < kMaxPagesInArticle; ++page_num) {
313 page_urls[page_num] = url_prefix + base::IntToString(page_num + 1);
314 string content = "Content for page:" + base::IntToString(page_num);
315 string next_page_url = url_prefix + base::IntToString(page_num + 2);
316 list[page_num] = CreateDistilledValueReturnedFromJS(
317 kTitle, content, vector<int>(), next_page_url);
320 EXPECT_CALL(page_factory_, CreateDistillerPageMock(_))
321 .WillOnce(CreateMockDistillerPages(
322 list, page_urls, static_cast<int>(kMaxPagesInArticle), 0));
324 distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_));
326 distiller_->SetMaxNumPagesInArticle(kMaxPagesInArticle);
329 distiller_->DistillPage(
331 base::Bind(&DistillerTest::OnDistillPageDone, base::Unretained(this)));
332 base::MessageLoop::current()->RunUntilIdle();
333 EXPECT_EQ(kTitle, article_proto_->title());
334 EXPECT_EQ(kMaxPagesInArticle,
335 static_cast<size_t>(article_proto_->pages_size()));
337 // Now check if distilling an article with exactly the page limit works by
338 // resetting the next page url of the last page of the article.
339 list[kMaxPagesInArticle - 1] =
340 CreateDistilledValueReturnedFromJS(kTitle, "Content", vector<int>(), "");
341 EXPECT_CALL(page_factory_, CreateDistillerPageMock(_))
342 .WillOnce(CreateMockDistillerPages(
343 list, page_urls, static_cast<int>(kMaxPagesInArticle), 0));
345 distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_));
346 distiller_->SetMaxNumPagesInArticle(kMaxPagesInArticle);
349 distiller_->DistillPage(
351 base::Bind(&DistillerTest::OnDistillPageDone, base::Unretained(this)));
352 base::MessageLoop::current()->RunUntilIdle();
353 EXPECT_EQ(kTitle, article_proto_->title());
354 EXPECT_EQ(kMaxPagesInArticle,
355 static_cast<size_t>(article_proto_->pages_size()));
357 // Now check if distilling an article with exactly the page limit works by
358 // resetting the next page url of the last page of the article.
359 list[kMaxPagesInArticle - 1] =
360 CreateDistilledValueReturnedFromJS(kTitle, "Content", vector<int>(), "");
361 EXPECT_CALL(page_factory_, CreateDistillerPageMock(_))
362 .WillOnce(CreateMockDistillerPages(
363 list, page_urls, static_cast<int>(kMaxPagesInArticle), 0));
365 distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_));
366 distiller_->SetMaxNumPagesInArticle(kMaxPagesInArticle);
369 distiller_->DistillPage(
371 base::Bind(&DistillerTest::OnDistillPageDone, base::Unretained(this)));
372 base::MessageLoop::current()->RunUntilIdle();
373 EXPECT_EQ(kTitle, article_proto_->title());
374 EXPECT_EQ(kMaxPagesInArticle,
375 static_cast<size_t>(article_proto_->pages_size()));
378 TEST_F(DistillerTest, SinglePageDistillationFailure) {
379 base::MessageLoopForUI loop;
380 // To simulate failure return a null value.
381 scoped_ptr<base::Value> nullValue(base::Value::CreateNullValue());
382 EXPECT_CALL(page_factory_, CreateDistillerPageMock(_))
383 .WillOnce(CreateMockDistillerPage(nullValue.get(), GURL(kURL)));
384 distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_));
386 distiller_->DistillPage(
388 base::Bind(&DistillerTest::OnDistillPageDone, base::Unretained(this)));
389 base::MessageLoop::current()->RunUntilIdle();
390 EXPECT_EQ("", article_proto_->title());
391 EXPECT_EQ(0, article_proto_->pages_size());
394 TEST_F(DistillerTest, MultiplePagesDistillationFailure) {
395 base::MessageLoopForUI loop;
396 const int kNumPages = 8;
397 string content[kNumPages];
398 string page_urls[kNumPages];
399 scoped_ptr<base::Value> distilled_values[kNumPages];
400 // The page number of the failed page.
401 int failed_page_num = 3;
402 string url_prefix = "http://a.com/";
403 for (int page_num = 0; page_num < kNumPages; ++page_num) {
404 page_urls[page_num] = url_prefix + base::IntToString(page_num);
405 content[page_num] = "Content for page:" + base::IntToString(page_num);
406 string next_page_url = url_prefix + base::IntToString(page_num + 1);
407 if (page_num != failed_page_num) {
408 distilled_values[page_num] = CreateDistilledValueReturnedFromJS(
409 kTitle, content[page_num], vector<int>(), next_page_url);
411 distilled_values[page_num].reset(base::Value::CreateNullValue());
415 // Expect only calls till the failed page number.
416 EXPECT_CALL(page_factory_, CreateDistillerPageMock(_))
417 .WillOnce(CreateMockDistillerPages(
418 distilled_values, page_urls, failed_page_num + 1, 0));
420 distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_));
422 distiller_->DistillPage(
424 base::Bind(&DistillerTest::OnDistillPageDone, base::Unretained(this)));
425 base::MessageLoop::current()->RunUntilIdle();
426 EXPECT_EQ(kTitle, article_proto_->title());
427 EXPECT_EQ(article_proto_->pages_size(), failed_page_num);
428 for (int page_num = 0; page_num < failed_page_num; ++page_num) {
429 const DistilledPageProto& page = article_proto_->pages(page_num);
430 EXPECT_EQ(content[page_num], page.html());
431 EXPECT_EQ(page_urls[page_num], page.url());
435 TEST_F(DistillerTest, DistillPreviousPage) {
436 base::MessageLoopForUI loop;
437 const int kNumPages = 8;
438 string content[kNumPages];
439 string page_urls[kNumPages];
440 scoped_ptr<base::Value> distilled_values[kNumPages];
442 // The page number of the article on which distillation starts.
443 int start_page_number = 3;
444 string url_prefix = "http://a.com/";
445 for (int page_no = 0; page_no < kNumPages; ++page_no) {
446 page_urls[page_no] = url_prefix + base::IntToString(page_no);
447 content[page_no] = "Content for page:" + base::IntToString(page_no);
448 string next_page_url = (page_no + 1 < kNumPages)
449 ? url_prefix + base::IntToString(page_no + 1)
451 string prev_page_url = (page_no > 0) ? page_urls[page_no - 1] : "";
452 distilled_values[page_no] = CreateDistilledValueReturnedFromJS(
453 kTitle, content[page_no], vector<int>(), next_page_url, prev_page_url);
456 EXPECT_CALL(page_factory_, CreateDistillerPageMock(_))
457 .WillOnce(CreateMockDistillerPages(
458 distilled_values, page_urls, kNumPages, start_page_number));
460 distiller_.reset(new DistillerImpl(page_factory_, url_fetcher_factory_));
462 distiller_->DistillPage(
463 GURL(page_urls[start_page_number]),
464 base::Bind(&DistillerTest::OnDistillPageDone, base::Unretained(this)));
465 base::MessageLoop::current()->RunUntilIdle();
466 EXPECT_EQ(kTitle, article_proto_->title());
467 EXPECT_EQ(kNumPages, article_proto_->pages_size());
468 for (int page_no = 0; page_no < kNumPages; ++page_no) {
469 const DistilledPageProto& page = article_proto_->pages(page_no);
470 EXPECT_EQ(content[page_no], page.html());
471 EXPECT_EQ(page_urls[page_no], page.url());
475 } // namespace dom_distiller