Upstream version 6.35.121.0
[platform/framework/web/crosswalk.git] / src / components / dom_distiller / standalone / content_extractor.cc
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <sstream>
6
7 #include "base/command_line.h"
8 #include "base/files/scoped_temp_dir.h"
9 #include "base/message_loop/message_loop.h"
10 #include "base/path_service.h"
11 #include "base/run_loop.h"
12 #include "components/dom_distiller/content/distiller_page_web_contents.h"
13 #include "components/dom_distiller/core/distiller.h"
14 #include "components/dom_distiller/core/dom_distiller_database.h"
15 #include "components/dom_distiller/core/dom_distiller_service.h"
16 #include "components/dom_distiller/core/dom_distiller_store.h"
17 #include "components/dom_distiller/core/proto/distilled_article.pb.h"
18 #include "components/dom_distiller/core/proto/distilled_page.pb.h"
19 #include "components/dom_distiller/core/task_tracker.h"
20 #include "content/public/browser/browser_context.h"
21 #include "content/public/browser/browser_thread.h"
22 #include "content/public/test/content_browser_test.h"
23 #include "content/shell/browser/shell.h"
24 #include "net/dns/mock_host_resolver.h"
25 #include "ui/base/resource/resource_bundle.h"
26
27 using content::ContentBrowserTest;
28
29 namespace dom_distiller {
30
31 namespace {
32
33 const char* kUrlSwitch = "url";
34
35 scoped_ptr<DomDistillerService> CreateDomDistillerService(
36     content::BrowserContext* context,
37     const base::FilePath& db_path) {
38   scoped_refptr<base::SequencedTaskRunner> background_task_runner =
39       content::BrowserThread::GetBlockingPool()->GetSequencedTaskRunner(
40           content::BrowserThread::GetBlockingPool()->GetSequenceToken());
41
42   // TODO(cjhopman): use an in-memory database instead of an on-disk one with
43   // temporary directory.
44   scoped_ptr<DomDistillerDatabase> db(
45       new DomDistillerDatabase(background_task_runner));
46   scoped_ptr<DomDistillerStore> dom_distiller_store(new DomDistillerStore(
47       db.PassAs<DomDistillerDatabaseInterface>(), db_path));
48
49   scoped_ptr<DistillerPageFactory> distiller_page_factory(
50       new DistillerPageWebContentsFactory(context));
51   scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory(
52       new DistillerURLFetcherFactory(context->GetRequestContext()));
53   scoped_ptr<DistillerFactory> distiller_factory(new DistillerFactoryImpl(
54       distiller_page_factory.Pass(), distiller_url_fetcher_factory.Pass()));
55
56   return scoped_ptr<DomDistillerService>(new DomDistillerService(
57       dom_distiller_store.PassAs<DomDistillerStoreInterface>(),
58       distiller_factory.Pass()));
59 }
60
61 void AddComponentsResources() {
62   base::FilePath pak_file;
63   base::FilePath pak_dir;
64   PathService::Get(base::DIR_MODULE, &pak_dir);
65   pak_file = pak_dir.Append(FILE_PATH_LITERAL("components_resources.pak"));
66   ui::ResourceBundle::GetSharedInstance().AddDataPackFromPath(
67       pak_file, ui::SCALE_FACTOR_NONE);
68 }
69
70 void LogArticle(const DistilledArticleProto& article_proto) {
71   std::stringstream output;
72   output << "Article Title: " << article_proto.title() << std::endl;
73   output << "# of pages: " << article_proto.pages_size() << std::endl;
74   for (int i = 0; i < article_proto.pages_size(); ++i) {
75     const DistilledPageProto& page = article_proto.pages(i);
76     output << "Page " << i << std::endl;
77     output << "URL: " << page.url() << std::endl;
78     output << "Content: " << page.html() << std::endl;
79   }
80   VLOG(0) << output.str();
81 }
82
83 }  // namespace
84
85 class ContentExtractionRequest : public ViewRequestDelegate {
86  public:
87   void Start(DomDistillerService* service, base::Closure finished_callback) {
88     finished_callback_ = finished_callback;
89     viewer_handle_ = service->ViewUrl(this, url_);
90   }
91
92   DistilledArticleProto GetArticleCopy() {
93     return *article_proto_;
94   }
95
96   static scoped_ptr<ContentExtractionRequest> CreateForCommandLine(
97       const CommandLine& command_line) {
98     GURL url;
99     if (command_line.HasSwitch(kUrlSwitch)) {
100       std::string url_string = command_line.GetSwitchValueASCII(kUrlSwitch);
101       url = GURL(url_string);
102     }
103     if (!url.is_valid()) {
104       ADD_FAILURE() << "No valid url provided";
105       return scoped_ptr<ContentExtractionRequest>();
106     }
107     return scoped_ptr<ContentExtractionRequest>(
108         new ContentExtractionRequest(url));
109   }
110
111  private:
112   ContentExtractionRequest(const GURL& url) : url_(url) {}
113
114   virtual void OnArticleUpdated(ArticleDistillationUpdate article_update)
115       OVERRIDE {}
116
117   virtual void OnArticleReady(const DistilledArticleProto* article_proto)
118       OVERRIDE {
119     article_proto_ = article_proto;
120     base::MessageLoop::current()->PostTask(
121         FROM_HERE,
122         finished_callback_);
123   }
124
125   const DistilledArticleProto* article_proto_;
126   scoped_ptr<ViewerHandle> viewer_handle_;
127   GURL url_;
128   base::Closure finished_callback_;
129 };
130
131 class ContentExtractor : public ContentBrowserTest {
132   // Change behavior of the default host resolver to avoid DNS lookup errors, so
133   // we can make network calls.
134   virtual void SetUpOnMainThread() OVERRIDE {
135     EnableDNSLookupForThisTest();
136     CHECK(db_dir_.CreateUniqueTempDir());
137     AddComponentsResources();
138   }
139
140   virtual void TearDownOnMainThread() OVERRIDE {
141     DisableDNSLookupForThisTest();
142   }
143
144  protected:
145   // Creates the DomDistillerService and creates and starts the extraction
146   // request.
147   void Start() {
148     content::BrowserContext* context =
149         shell()->web_contents()->GetBrowserContext();
150     service_ = CreateDomDistillerService(context,
151                                          db_dir_.path());
152     const CommandLine& command_line = *CommandLine::ForCurrentProcess();
153     request_ = ContentExtractionRequest::CreateForCommandLine(command_line);
154     request_->Start(
155         service_.get(),
156         base::Bind(&ContentExtractor::Finish, base::Unretained(this)));
157   }
158
159  private:
160   // Change behavior of the default host resolver to allow DNS lookup
161   // to proceed instead of being blocked by the test infrastructure.
162   void EnableDNSLookupForThisTest() {
163     // mock_host_resolver_override_ takes ownership of the resolver.
164     scoped_refptr<net::RuleBasedHostResolverProc> resolver =
165         new net::RuleBasedHostResolverProc(host_resolver());
166     resolver->AllowDirectLookup("*");
167     mock_host_resolver_override_.reset(
168         new net::ScopedDefaultHostResolverProc(resolver.get()));
169   }
170
171   // We need to reset the DNS lookup when we finish, or the test will fail.
172   void DisableDNSLookupForThisTest() {
173     mock_host_resolver_override_.reset();
174   }
175
176   void Finish() {
177     LogArticle(request_->GetArticleCopy());
178     request_.reset();
179     service_.reset();
180     base::MessageLoop::current()->PostTask(
181         FROM_HERE, base::MessageLoop::QuitWhenIdleClosure());
182   }
183
184   base::ScopedTempDir db_dir_;
185   scoped_ptr<net::ScopedDefaultHostResolverProc> mock_host_resolver_override_;
186   scoped_ptr<DomDistillerService> service_;
187   scoped_ptr<ContentExtractionRequest> request_;
188 };
189
190 IN_PROC_BROWSER_TEST_F(ContentExtractor, MANUAL_ExtractUrl) {
191   Start();
192   base::RunLoop().Run();
193 }
194
195 }  // namespace dom_distiller