1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/translate/content/renderer/translate_helper.h"
8 #include "base/compiler_specific.h"
9 #include "base/logging.h"
10 #include "base/message_loop/message_loop.h"
11 #include "base/metrics/histogram.h"
12 #include "base/strings/string16.h"
13 #include "base/strings/string_util.h"
14 #include "base/strings/utf_string_conversions.h"
15 #include "components/translate/content/common/translate_messages.h"
16 #include "components/translate/core/common/translate_constants.h"
17 #include "components/translate/core/common/translate_metrics.h"
18 #include "components/translate/core/common/translate_util.h"
19 #include "components/translate/core/language_detection/language_detection_util.h"
20 #include "content/public/common/content_constants.h"
21 #include "content/public/common/url_constants.h"
22 #include "content/public/renderer/render_thread.h"
23 #include "content/public/renderer/render_view.h"
24 #include "ipc/ipc_platform_file.h"
25 #include "third_party/WebKit/public/web/WebDocument.h"
26 #include "third_party/WebKit/public/web/WebElement.h"
27 #include "third_party/WebKit/public/web/WebFrame.h"
28 #include "third_party/WebKit/public/web/WebNode.h"
29 #include "third_party/WebKit/public/web/WebNodeList.h"
30 #include "third_party/WebKit/public/web/WebScriptSource.h"
31 #include "third_party/WebKit/public/web/WebView.h"
32 #include "third_party/WebKit/public/web/WebWidget.h"
34 #include "v8/include/v8.h"
36 using base::ASCIIToUTF16;
37 using blink::WebDocument;
38 using blink::WebElement;
39 using blink::WebFrame;
41 using blink::WebNodeList;
42 using blink::WebScriptSource;
43 using blink::WebSecurityOrigin;
44 using blink::WebString;
45 using blink::WebVector;
50 // The delay in milliseconds that we'll wait before checking to see if the
51 // translate library injected in the page is ready.
52 const int kTranslateInitCheckDelayMs = 150;
54 // The maximum number of times we'll check to see if the translate library
55 // injected in the page is ready.
56 const int kMaxTranslateInitCheckAttempts = 5;
58 // The delay we wait in milliseconds before checking whether the translation has
60 const int kTranslateStatusCheckDelayMs = 400;
62 // Language name passed to the Translate element for it to detect the language.
63 const char kAutoDetectionLanguage[] = "auto";
65 // Isolated world sets following content-security-policy.
66 const char kContentSecurityPolicy[] = "script-src 'self' 'unsafe-eval'";
68 // Whether or not we have set the CLD callback yet.
69 bool g_cld_callback_set = false;
75 ////////////////////////////////////////////////////////////////////////////////
76 // TranslateHelper, public:
78 TranslateHelper::TranslateHelper(content::RenderView* render_view,
81 const std::string& extension_scheme)
82 : content::RenderViewObserver(render_view),
84 translation_pending_(false),
85 cld_data_provider_(translate::CreateRendererCldDataProviderFor(this)),
86 cld_data_polling_started_(false),
87 cld_data_polling_canceled_(false),
88 deferred_page_capture_(false),
89 deferred_page_seq_no_(-1),
91 extension_group_(extension_group),
92 extension_scheme_(extension_scheme),
93 weak_method_factory_(this) {
96 TranslateHelper::~TranslateHelper() {
97 CancelPendingTranslation();
98 CancelCldDataPolling();
101 void TranslateHelper::PrepareForUrl(const GURL& url) {
103 Send(new ChromeViewHostMsg_TranslateAssignedSequenceNumber(
104 routing_id(), page_seq_no_));
105 deferred_page_capture_ = false;
106 deferred_page_seq_no_ = -1;
107 deferred_contents_.clear();
108 if (cld_data_polling_started_)
111 // TODO(andrewhayden): Refactor translate_manager.cc's IsTranslatableURL to
112 // components/translate/core/common/translate_util.cc, and ignore any URL
113 // that fails that check. This will require moving unit tests and rewiring
114 // other function calls as well, so for now replicate the logic here.
117 if (url.SchemeIs(content::kChromeUIScheme))
119 if (url.SchemeIs(content::kChromeDevToolsScheme))
121 if (url.SchemeIs(url::kFtpScheme))
123 if (url.SchemeIs(extension_scheme_.c_str()))
126 // Start polling for CLD data.
127 cld_data_polling_started_ = true;
128 TranslateHelper::SendCldDataRequest(0, 1000);
131 void TranslateHelper::PageCaptured(const base::string16& contents) {
132 PageCapturedImpl(page_seq_no_, contents);
135 void TranslateHelper::PageCapturedImpl(int page_seq_no,
136 const base::string16& contents) {
137 // Get the document language as set by WebKit from the http-equiv
138 // meta tag for "content-language". This may or may not also
139 // have a value derived from the actual Content-Language HTTP
140 // header. The two actually have different meanings (despite the
141 // original intent of http-equiv to be an equivalent) with the former
142 // being the language of the document and the latter being the
143 // language of the intended audience (a distinction really only
144 // relevant for things like langauge textbooks). This distinction
145 // shouldn't affect translation.
146 WebFrame* main_frame = GetMainFrame();
147 if (!main_frame || page_seq_no_ != page_seq_no)
150 if (!cld_data_provider_->IsCldDataAvailable()) {
151 // We're in dynamic mode and CLD data isn't loaded. Retry when CLD data
152 // is loaded, if ever.
153 deferred_page_capture_ = true;
154 deferred_page_seq_no_ = page_seq_no;
155 deferred_contents_ = contents;
156 RecordLanguageDetectionTiming(DEFERRED);
160 if (deferred_page_seq_no_ == -1) {
161 // CLD data was available before language detection was requested.
162 RecordLanguageDetectionTiming(ON_TIME);
164 // This is a request that was triggered because CLD data is now available
165 // and was previously deferred.
166 RecordLanguageDetectionTiming(RESUMED);
169 WebDocument document = main_frame->document();
170 std::string content_language = document.contentLanguage().utf8();
171 WebElement html_element = document.documentElement();
172 std::string html_lang;
173 // |html_element| can be null element, e.g. in
174 // BrowserTest.WindowOpenClose.
175 if (!html_element.isNull())
176 html_lang = html_element.getAttribute("lang").utf8();
177 std::string cld_language;
178 bool is_cld_reliable;
179 std::string language = DeterminePageLanguage(
180 content_language, html_lang, contents, &cld_language, &is_cld_reliable);
182 if (language.empty())
185 language_determined_time_ = base::TimeTicks::Now();
187 GURL url(document.url());
188 LanguageDetectionDetails details;
189 details.time = base::Time::Now();
191 details.content_language = content_language;
192 details.cld_language = cld_language;
193 details.is_cld_reliable = is_cld_reliable;
194 details.html_root_language = html_lang;
195 details.adopted_language = language;
197 // TODO(hajimehoshi): If this affects performance, it should be set only if
198 // translate-internals tab exists.
199 details.contents = contents;
201 Send(new ChromeViewHostMsg_TranslateLanguageDetermined(
204 IsTranslationAllowed(&document) && !language.empty()));
207 void TranslateHelper::CancelPendingTranslation() {
208 weak_method_factory_.InvalidateWeakPtrs();
209 translation_pending_ = false;
210 source_lang_.clear();
211 target_lang_.clear();
212 CancelCldDataPolling();
215 ////////////////////////////////////////////////////////////////////////////////
216 // TranslateHelper, protected:
218 bool TranslateHelper::IsTranslateLibAvailable() {
219 return ExecuteScriptAndGetBoolResult(
220 "typeof cr != 'undefined' && typeof cr.googleTranslate != 'undefined' && "
221 "typeof cr.googleTranslate.translate == 'function'", false);
224 bool TranslateHelper::IsTranslateLibReady() {
225 return ExecuteScriptAndGetBoolResult("cr.googleTranslate.libReady", false);
228 bool TranslateHelper::HasTranslationFinished() {
229 return ExecuteScriptAndGetBoolResult("cr.googleTranslate.finished", true);
232 bool TranslateHelper::HasTranslationFailed() {
233 return ExecuteScriptAndGetBoolResult("cr.googleTranslate.error", true);
236 bool TranslateHelper::StartTranslation() {
237 std::string script = "cr.googleTranslate.translate('" +
242 return ExecuteScriptAndGetBoolResult(script, false);
245 std::string TranslateHelper::GetOriginalPageLanguage() {
246 return ExecuteScriptAndGetStringResult("cr.googleTranslate.sourceLang");
249 base::TimeDelta TranslateHelper::AdjustDelay(int delayInMs) {
250 // Just converts |delayInMs| without any modification in practical cases.
251 // Tests will override this function to return modified value.
252 return base::TimeDelta::FromMilliseconds(delayInMs);
255 void TranslateHelper::ExecuteScript(const std::string& script) {
256 WebFrame* main_frame = GetMainFrame();
260 WebScriptSource source = WebScriptSource(ASCIIToUTF16(script));
261 main_frame->executeScriptInIsolatedWorld(
262 world_id_, &source, 1, extension_group_);
265 bool TranslateHelper::ExecuteScriptAndGetBoolResult(const std::string& script,
267 WebFrame* main_frame = GetMainFrame();
271 v8::HandleScope handle_scope(v8::Isolate::GetCurrent());
272 WebVector<v8::Local<v8::Value> > results;
273 WebScriptSource source = WebScriptSource(ASCIIToUTF16(script));
274 main_frame->executeScriptInIsolatedWorld(
275 world_id_, &source, 1, extension_group_, &results);
276 if (results.size() != 1 || results[0].IsEmpty() || !results[0]->IsBoolean()) {
281 return results[0]->BooleanValue();
284 std::string TranslateHelper::ExecuteScriptAndGetStringResult(
285 const std::string& script) {
286 WebFrame* main_frame = GetMainFrame();
288 return std::string();
290 v8::HandleScope handle_scope(v8::Isolate::GetCurrent());
291 WebVector<v8::Local<v8::Value> > results;
292 WebScriptSource source = WebScriptSource(ASCIIToUTF16(script));
293 main_frame->executeScriptInIsolatedWorld(
294 world_id_, &source, 1, extension_group_, &results);
295 if (results.size() != 1 || results[0].IsEmpty() || !results[0]->IsString()) {
297 return std::string();
300 v8::Local<v8::String> v8_str = results[0]->ToString();
301 int length = v8_str->Utf8Length() + 1;
302 scoped_ptr<char[]> str(new char[length]);
303 v8_str->WriteUtf8(str.get(), length);
304 return std::string(str.get());
307 double TranslateHelper::ExecuteScriptAndGetDoubleResult(
308 const std::string& script) {
309 WebFrame* main_frame = GetMainFrame();
313 v8::HandleScope handle_scope(v8::Isolate::GetCurrent());
314 WebVector<v8::Local<v8::Value> > results;
315 WebScriptSource source = WebScriptSource(ASCIIToUTF16(script));
316 main_frame->executeScriptInIsolatedWorld(
317 world_id_, &source, 1, extension_group_, &results);
318 if (results.size() != 1 || results[0].IsEmpty() || !results[0]->IsNumber()) {
323 return results[0]->NumberValue();
326 ////////////////////////////////////////////////////////////////////////////////
327 // TranslateHelper, private:
331 bool TranslateHelper::IsTranslationAllowed(WebDocument* document) {
332 WebElement head = document->head();
333 if (head.isNull() || !head.hasChildNodes())
336 const WebString meta(ASCIIToUTF16("meta"));
337 const WebString name(ASCIIToUTF16("name"));
338 const WebString google(ASCIIToUTF16("google"));
339 const WebString value(ASCIIToUTF16("value"));
340 const WebString content(ASCIIToUTF16("content"));
342 WebNodeList children = head.childNodes();
343 for (size_t i = 0; i < children.length(); ++i) {
344 WebNode node = children.item(i);
345 if (!node.isElementNode())
347 WebElement element = node.to<WebElement>();
348 // Check if a tag is <meta>.
349 if (!element.hasHTMLTagName(meta))
351 // Check if the tag contains name="google".
352 WebString attribute = element.getAttribute(name);
353 if (attribute.isNull() || attribute != google)
355 // Check if the tag contains value="notranslate", or content="notranslate".
356 attribute = element.getAttribute(value);
357 if (attribute.isNull())
358 attribute = element.getAttribute(content);
359 if (attribute.isNull())
361 if (LowerCaseEqualsASCII(attribute, "notranslate"))
367 bool TranslateHelper::OnMessageReceived(const IPC::Message& message) {
369 IPC_BEGIN_MESSAGE_MAP(TranslateHelper, message)
370 IPC_MESSAGE_HANDLER(ChromeViewMsg_TranslatePage, OnTranslatePage)
371 IPC_MESSAGE_HANDLER(ChromeViewMsg_RevertTranslation, OnRevertTranslation)
372 IPC_MESSAGE_UNHANDLED(handled = false)
373 IPC_END_MESSAGE_MAP()
375 handled = cld_data_provider_->OnMessageReceived(message);
380 void TranslateHelper::OnTranslatePage(int page_seq_no,
381 const std::string& translate_script,
382 const std::string& source_lang,
383 const std::string& target_lang) {
384 WebFrame* main_frame = GetMainFrame();
385 if (!main_frame || page_seq_no_ != page_seq_no)
386 return; // We navigated away, nothing to do.
388 // A similar translation is already under way, nothing to do.
389 if (translation_pending_ && target_lang_ == target_lang)
392 // Any pending translation is now irrelevant.
393 CancelPendingTranslation();
396 translation_pending_ = true;
398 // If the source language is undetermined, we'll let the translate element
400 source_lang_ = (source_lang != kUnknownLanguageCode) ? source_lang
401 : kAutoDetectionLanguage;
402 target_lang_ = target_lang;
404 ReportUserActionDuration(language_determined_time_, base::TimeTicks::Now());
406 GURL url(main_frame->document().url());
407 ReportPageScheme(url.scheme());
409 // Set up v8 isolated world with proper content-security-policy and
411 WebFrame* frame = GetMainFrame();
413 frame->setIsolatedWorldContentSecurityPolicy(
414 world_id_, WebString::fromUTF8(kContentSecurityPolicy));
416 GURL security_origin = GetTranslateSecurityOrigin();
417 frame->setIsolatedWorldSecurityOrigin(
418 world_id_, WebSecurityOrigin::create(security_origin));
421 if (!IsTranslateLibAvailable()) {
422 // Evaluate the script to add the translation related method to the global
423 // context of the page.
424 ExecuteScript(translate_script);
425 DCHECK(IsTranslateLibAvailable());
428 TranslatePageImpl(page_seq_no, 0);
431 void TranslateHelper::OnRevertTranslation(int page_seq_no) {
432 if (page_seq_no_ != page_seq_no)
433 return; // We navigated away, nothing to do.
435 if (!IsTranslateLibAvailable()) {
440 CancelPendingTranslation();
442 ExecuteScript("cr.googleTranslate.revert()");
445 void TranslateHelper::CheckTranslateStatus(int page_seq_no) {
446 // If this is not the same page, the translation has been canceled. If the
447 // view is gone, the page is closing.
448 if (page_seq_no_ != page_seq_no || !render_view()->GetWebView())
451 // First check if there was an error.
452 if (HasTranslationFailed()) {
453 // TODO(toyoshim): Check |errorCode| of translate.js and notify it here.
454 NotifyBrowserTranslationFailed(TranslateErrors::TRANSLATION_ERROR);
455 return; // There was an error.
458 if (HasTranslationFinished()) {
459 std::string actual_source_lang;
460 // Translation was successfull, if it was auto, retrieve the source
461 // language the Translate Element detected.
462 if (source_lang_ == kAutoDetectionLanguage) {
463 actual_source_lang = GetOriginalPageLanguage();
464 if (actual_source_lang.empty()) {
465 NotifyBrowserTranslationFailed(TranslateErrors::UNKNOWN_LANGUAGE);
467 } else if (actual_source_lang == target_lang_) {
468 NotifyBrowserTranslationFailed(TranslateErrors::IDENTICAL_LANGUAGES);
472 actual_source_lang = source_lang_;
475 if (!translation_pending_) {
480 translation_pending_ = false;
482 // Check JavaScript performance counters for UMA reports.
483 ReportTimeToTranslate(
484 ExecuteScriptAndGetDoubleResult("cr.googleTranslate.translationTime"));
486 // Notify the browser we are done.
488 new ChromeViewHostMsg_PageTranslated(render_view()->GetRoutingID(),
491 TranslateErrors::NONE));
495 // The translation is still pending, check again later.
496 base::MessageLoop::current()->PostDelayedTask(
498 base::Bind(&TranslateHelper::CheckTranslateStatus,
499 weak_method_factory_.GetWeakPtr(), page_seq_no),
500 AdjustDelay(kTranslateStatusCheckDelayMs));
503 void TranslateHelper::TranslatePageImpl(int page_seq_no, int count) {
504 DCHECK_LT(count, kMaxTranslateInitCheckAttempts);
505 if (page_seq_no_ != page_seq_no || !render_view()->GetWebView())
508 if (!IsTranslateLibReady()) {
509 // The library is not ready, try again later, unless we have tried several
510 // times unsucessfully already.
511 if (++count >= kMaxTranslateInitCheckAttempts) {
512 NotifyBrowserTranslationFailed(TranslateErrors::INITIALIZATION_ERROR);
515 base::MessageLoop::current()->PostDelayedTask(
517 base::Bind(&TranslateHelper::TranslatePageImpl,
518 weak_method_factory_.GetWeakPtr(),
520 AdjustDelay(count * kTranslateInitCheckDelayMs));
524 // The library is loaded, and ready for translation now.
525 // Check JavaScript performance counters for UMA reports.
527 ExecuteScriptAndGetDoubleResult("cr.googleTranslate.readyTime"));
529 ExecuteScriptAndGetDoubleResult("cr.googleTranslate.loadTime"));
531 if (!StartTranslation()) {
532 NotifyBrowserTranslationFailed(TranslateErrors::TRANSLATION_ERROR);
535 // Check the status of the translation.
536 base::MessageLoop::current()->PostDelayedTask(
538 base::Bind(&TranslateHelper::CheckTranslateStatus,
539 weak_method_factory_.GetWeakPtr(), page_seq_no),
540 AdjustDelay(kTranslateStatusCheckDelayMs));
543 void TranslateHelper::NotifyBrowserTranslationFailed(
544 TranslateErrors::Type error) {
545 translation_pending_ = false;
546 // Notify the browser there was an error.
547 render_view()->Send(new ChromeViewHostMsg_PageTranslated(
548 render_view()->GetRoutingID(), source_lang_, target_lang_, error));
551 WebFrame* TranslateHelper::GetMainFrame() {
552 WebView* web_view = render_view()->GetWebView();
554 // When the tab is going to be closed, the web_view can be NULL.
558 return web_view->mainFrame();
561 void TranslateHelper::CancelCldDataPolling() {
562 cld_data_polling_canceled_ = true;
565 void TranslateHelper::SendCldDataRequest(const int delay_millis,
566 const int next_delay_millis) {
567 // Terminate immediately if told to stop polling.
568 if (cld_data_polling_canceled_)
571 // Terminate immediately if data is already loaded.
572 if (cld_data_provider_->IsCldDataAvailable())
575 if (!g_cld_callback_set) {
576 g_cld_callback_set = true;
577 cld_data_provider_->SetCldAvailableCallback(
578 base::Bind(&TranslateHelper::OnCldDataAvailable,
579 weak_method_factory_.GetWeakPtr()));
582 // Else, make an asynchronous request to get the data we need.
583 cld_data_provider_->SendCldDataRequest();
585 // ... and enqueue another delayed task to call again. This will start a
586 // chain of polling that will last until the pointer stops being NULL,
587 // which is the right thing to do.
588 // NB: In the great majority of cases, the data file will be available and
589 // the very first delayed task will be a no-op that terminates the chain.
590 // It's only while downloading the file that this will chain for a
591 // nontrivial amount of time.
592 // Use a weak pointer to avoid keeping this helper object around forever.
593 base::MessageLoop::current()->PostDelayedTask(
595 base::Bind(&TranslateHelper::SendCldDataRequest,
596 weak_method_factory_.GetWeakPtr(),
599 base::TimeDelta::FromMilliseconds(delay_millis));
602 void TranslateHelper::OnCldDataAvailable() {
603 if (deferred_page_capture_) {
604 deferred_page_capture_ = false; // Don't do this a second time.
605 PageCapturedImpl(deferred_page_seq_no_, deferred_contents_);
606 deferred_page_seq_no_ = -1; // Clean up for sanity
607 deferred_contents_.clear(); // Clean up for sanity
611 void TranslateHelper::RecordLanguageDetectionTiming(
612 LanguageDetectionTiming timing) {
613 // The following comment is copied from page_load_histograms.cc, and applies
614 // just as equally here:
616 // Since there are currently no guarantees that renderer histograms will be
617 // sent to the browser, we initiate a PostTask here to be sure that we send
618 // the histograms we generated. Without this call, pages that don't have an
619 // on-close-handler might generate data that is lost when the renderer is
620 // shutdown abruptly (perchance because the user closed the tab).
621 DVLOG(1) << "Language detection timing: " << timing;
622 UMA_HISTOGRAM_ENUMERATION("Translate.LanguageDetectionTiming", timing,
623 LANGUAGE_DETECTION_TIMING_MAX_VALUE);
625 // Note on performance: Under normal circumstances, this should get called
626 // once per page load. The code will either manage to do it ON_TIME or will
627 // be DEFERRED until CLD is ready. In the latter case, CLD is in dynamic mode
628 // and may eventually become available, triggering the RESUMED event; after
629 // this, everything should start being ON_TIME. This should never run more
630 // than twice in a page load, under any conditions.
631 // Also note that language detection is triggered off of a delay AFTER the
632 // page load completed event has fired, making this very much off the critical
634 content::RenderThread::Get()->UpdateHistograms(
635 content::kHistogramSynchronizerReservedSequenceNumber);
638 } // namespace translate