1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/renderer/translate/translate_helper.h"
8 #include "base/compiler_specific.h"
9 #include "base/logging.h"
10 #include "base/message_loop/message_loop.h"
11 #include "base/metrics/histogram.h"
12 #include "base/strings/string16.h"
13 #include "base/strings/string_util.h"
14 #include "base/strings/utf_string_conversions.h"
15 #include "chrome/renderer/isolated_world_ids.h"
16 #include "components/translate/content/common/translate_messages.h"
17 #include "components/translate/core/common/translate_constants.h"
18 #include "components/translate/core/common/translate_metrics.h"
19 #include "components/translate/core/common/translate_util.h"
20 #include "components/translate/core/language_detection/language_detection_util.h"
21 #include "content/public/common/content_constants.h"
22 #include "content/public/renderer/render_thread.h"
23 #include "content/public/renderer/render_view.h"
24 #include "extensions/common/constants.h"
25 #include "extensions/renderer/extension_groups.h"
26 #include "ipc/ipc_platform_file.h"
27 #include "content/public/common/url_constants.h"
28 #include "third_party/WebKit/public/web/WebDocument.h"
29 #include "third_party/WebKit/public/web/WebElement.h"
30 #include "third_party/WebKit/public/web/WebFrame.h"
31 #include "third_party/WebKit/public/web/WebNode.h"
32 #include "third_party/WebKit/public/web/WebNodeList.h"
33 #include "third_party/WebKit/public/web/WebScriptSource.h"
34 #include "third_party/WebKit/public/web/WebView.h"
35 #include "third_party/WebKit/public/web/WebWidget.h"
37 #include "v8/include/v8.h"
39 using base::ASCIIToUTF16;
40 using blink::WebDocument;
41 using blink::WebElement;
42 using blink::WebFrame;
44 using blink::WebNodeList;
45 using blink::WebScriptSource;
46 using blink::WebSecurityOrigin;
47 using blink::WebString;
48 using blink::WebVector;
53 // The delay in milliseconds that we'll wait before checking to see if the
54 // translate library injected in the page is ready.
55 const int kTranslateInitCheckDelayMs = 150;
57 // The maximum number of times we'll check to see if the translate library
58 // injected in the page is ready.
59 const int kMaxTranslateInitCheckAttempts = 5;
61 // The delay we wait in milliseconds before checking whether the translation has
63 const int kTranslateStatusCheckDelayMs = 400;
65 // Language name passed to the Translate element for it to detect the language.
66 const char kAutoDetectionLanguage[] = "auto";
68 // Isolated world sets following content-security-policy.
69 const char kContentSecurityPolicy[] = "script-src 'self' 'unsafe-eval'";
71 // Whether or not we have set the CLD callback yet.
72 bool g_cld_callback_set = false;
77 ////////////////////////////////////////////////////////////////////////////////
78 // TranslateHelper, public:
80 TranslateHelper::TranslateHelper(content::RenderView* render_view)
81 : content::RenderViewObserver(render_view),
83 translation_pending_(false),
84 weak_method_factory_(this),
85 cld_data_provider_(translate::CreateRendererCldDataProviderFor(this)),
86 cld_data_polling_started_(false),
87 cld_data_polling_canceled_(false),
88 deferred_page_capture_(false),
89 deferred_page_seq_no_(-1) {
92 TranslateHelper::~TranslateHelper() {
93 CancelPendingTranslation();
94 CancelCldDataPolling();
97 void TranslateHelper::PrepareForUrl(const GURL& url) {
99 Send(new ChromeViewHostMsg_TranslateAssignedSequenceNumber(
100 routing_id(), page_seq_no_));
101 deferred_page_capture_ = false;
102 deferred_page_seq_no_ = -1;
103 deferred_contents_.clear();
104 if (cld_data_polling_started_)
107 // TODO(andrewhayden): Refactor translate_manager.cc's IsTranslatableURL to
108 // components/translate/core/common/translate_util.cc, and ignore any URL
109 // that fails that check. This will require moving unit tests and rewiring
110 // other function calls as well, so for now replicate the logic here.
113 if (url.SchemeIs(content::kChromeUIScheme))
115 if (url.SchemeIs(content::kChromeDevToolsScheme))
117 if (url.SchemeIs(url::kFtpScheme))
119 if (url.SchemeIs(extensions::kExtensionScheme))
122 // Start polling for CLD data.
123 cld_data_polling_started_ = true;
124 TranslateHelper::SendCldDataRequest(0, 1000);
127 void TranslateHelper::PageCaptured(const base::string16& contents) {
128 PageCapturedImpl(page_seq_no_, contents);
131 void TranslateHelper::PageCapturedImpl(int page_seq_no,
132 const base::string16& contents) {
133 // Get the document language as set by WebKit from the http-equiv
134 // meta tag for "content-language". This may or may not also
135 // have a value derived from the actual Content-Language HTTP
136 // header. The two actually have different meanings (despite the
137 // original intent of http-equiv to be an equivalent) with the former
138 // being the language of the document and the latter being the
139 // language of the intended audience (a distinction really only
140 // relevant for things like langauge textbooks). This distinction
141 // shouldn't affect translation.
142 WebFrame* main_frame = GetMainFrame();
143 if (!main_frame || page_seq_no_ != page_seq_no)
146 if (!cld_data_provider_->IsCldDataAvailable()) {
147 // We're in dynamic mode and CLD data isn't loaded. Retry when CLD data
148 // is loaded, if ever.
149 deferred_page_capture_ = true;
150 deferred_page_seq_no_ = page_seq_no;
151 deferred_contents_ = contents;
152 RecordLanguageDetectionTiming(DEFERRED);
156 if (deferred_page_seq_no_ == -1) {
157 // CLD data was available before language detection was requested.
158 RecordLanguageDetectionTiming(ON_TIME);
160 // This is a request that was triggered because CLD data is now available
161 // and was previously deferred.
162 RecordLanguageDetectionTiming(RESUMED);
165 WebDocument document = main_frame->document();
166 std::string content_language = document.contentLanguage().utf8();
167 WebElement html_element = document.documentElement();
168 std::string html_lang;
169 // |html_element| can be null element, e.g. in
170 // BrowserTest.WindowOpenClose.
171 if (!html_element.isNull())
172 html_lang = html_element.getAttribute("lang").utf8();
173 std::string cld_language;
174 bool is_cld_reliable;
175 std::string language = translate::DeterminePageLanguage(
176 content_language, html_lang, contents, &cld_language, &is_cld_reliable);
178 if (language.empty())
181 language_determined_time_ = base::TimeTicks::Now();
183 GURL url(document.url());
184 translate::LanguageDetectionDetails details;
185 details.time = base::Time::Now();
187 details.content_language = content_language;
188 details.cld_language = cld_language;
189 details.is_cld_reliable = is_cld_reliable;
190 details.html_root_language = html_lang;
191 details.adopted_language = language;
193 // TODO(hajimehoshi): If this affects performance, it should be set only if
194 // translate-internals tab exists.
195 details.contents = contents;
197 Send(new ChromeViewHostMsg_TranslateLanguageDetermined(
200 IsTranslationAllowed(&document) && !language.empty()));
203 void TranslateHelper::CancelPendingTranslation() {
204 weak_method_factory_.InvalidateWeakPtrs();
205 translation_pending_ = false;
206 source_lang_.clear();
207 target_lang_.clear();
208 CancelCldDataPolling();
211 ////////////////////////////////////////////////////////////////////////////////
212 // TranslateHelper, protected:
214 bool TranslateHelper::IsTranslateLibAvailable() {
215 return ExecuteScriptAndGetBoolResult(
216 "typeof cr != 'undefined' && typeof cr.googleTranslate != 'undefined' && "
217 "typeof cr.googleTranslate.translate == 'function'", false);
220 bool TranslateHelper::IsTranslateLibReady() {
221 return ExecuteScriptAndGetBoolResult("cr.googleTranslate.libReady", false);
224 bool TranslateHelper::HasTranslationFinished() {
225 return ExecuteScriptAndGetBoolResult("cr.googleTranslate.finished", true);
228 bool TranslateHelper::HasTranslationFailed() {
229 return ExecuteScriptAndGetBoolResult("cr.googleTranslate.error", true);
232 bool TranslateHelper::StartTranslation() {
233 std::string script = "cr.googleTranslate.translate('" +
238 return ExecuteScriptAndGetBoolResult(script, false);
241 std::string TranslateHelper::GetOriginalPageLanguage() {
242 return ExecuteScriptAndGetStringResult("cr.googleTranslate.sourceLang");
245 base::TimeDelta TranslateHelper::AdjustDelay(int delayInMs) {
246 // Just converts |delayInMs| without any modification in practical cases.
247 // Tests will override this function to return modified value.
248 return base::TimeDelta::FromMilliseconds(delayInMs);
251 void TranslateHelper::ExecuteScript(const std::string& script) {
252 WebFrame* main_frame = GetMainFrame();
256 WebScriptSource source = WebScriptSource(ASCIIToUTF16(script));
257 main_frame->executeScriptInIsolatedWorld(
258 chrome::ISOLATED_WORLD_ID_TRANSLATE,
261 extensions::EXTENSION_GROUP_INTERNAL_TRANSLATE_SCRIPTS);
264 bool TranslateHelper::ExecuteScriptAndGetBoolResult(const std::string& script,
266 WebFrame* main_frame = GetMainFrame();
270 v8::HandleScope handle_scope(v8::Isolate::GetCurrent());
271 WebVector<v8::Local<v8::Value> > results;
272 WebScriptSource source = WebScriptSource(ASCIIToUTF16(script));
273 main_frame->executeScriptInIsolatedWorld(
274 chrome::ISOLATED_WORLD_ID_TRANSLATE,
277 extensions::EXTENSION_GROUP_INTERNAL_TRANSLATE_SCRIPTS,
279 if (results.size() != 1 || results[0].IsEmpty() || !results[0]->IsBoolean()) {
284 return results[0]->BooleanValue();
287 std::string TranslateHelper::ExecuteScriptAndGetStringResult(
288 const std::string& script) {
289 WebFrame* main_frame = GetMainFrame();
291 return std::string();
293 v8::HandleScope handle_scope(v8::Isolate::GetCurrent());
294 WebVector<v8::Local<v8::Value> > results;
295 WebScriptSource source = WebScriptSource(ASCIIToUTF16(script));
296 main_frame->executeScriptInIsolatedWorld(
297 chrome::ISOLATED_WORLD_ID_TRANSLATE,
300 extensions::EXTENSION_GROUP_INTERNAL_TRANSLATE_SCRIPTS,
302 if (results.size() != 1 || results[0].IsEmpty() || !results[0]->IsString()) {
304 return std::string();
307 v8::Local<v8::String> v8_str = results[0]->ToString();
308 int length = v8_str->Utf8Length() + 1;
309 scoped_ptr<char[]> str(new char[length]);
310 v8_str->WriteUtf8(str.get(), length);
311 return std::string(str.get());
314 double TranslateHelper::ExecuteScriptAndGetDoubleResult(
315 const std::string& script) {
316 WebFrame* main_frame = GetMainFrame();
320 v8::HandleScope handle_scope(v8::Isolate::GetCurrent());
321 WebVector<v8::Local<v8::Value> > results;
322 WebScriptSource source = WebScriptSource(ASCIIToUTF16(script));
323 main_frame->executeScriptInIsolatedWorld(
324 chrome::ISOLATED_WORLD_ID_TRANSLATE,
327 extensions::EXTENSION_GROUP_INTERNAL_TRANSLATE_SCRIPTS,
329 if (results.size() != 1 || results[0].IsEmpty() || !results[0]->IsNumber()) {
334 return results[0]->NumberValue();
337 ////////////////////////////////////////////////////////////////////////////////
338 // TranslateHelper, private:
342 bool TranslateHelper::IsTranslationAllowed(WebDocument* document) {
343 WebElement head = document->head();
344 if (head.isNull() || !head.hasChildNodes())
347 const WebString meta(ASCIIToUTF16("meta"));
348 const WebString name(ASCIIToUTF16("name"));
349 const WebString google(ASCIIToUTF16("google"));
350 const WebString value(ASCIIToUTF16("value"));
351 const WebString content(ASCIIToUTF16("content"));
353 WebNodeList children = head.childNodes();
354 for (size_t i = 0; i < children.length(); ++i) {
355 WebNode node = children.item(i);
356 if (!node.isElementNode())
358 WebElement element = node.to<WebElement>();
359 // Check if a tag is <meta>.
360 if (!element.hasHTMLTagName(meta))
362 // Check if the tag contains name="google".
363 WebString attribute = element.getAttribute(name);
364 if (attribute.isNull() || attribute != google)
366 // Check if the tag contains value="notranslate", or content="notranslate".
367 attribute = element.getAttribute(value);
368 if (attribute.isNull())
369 attribute = element.getAttribute(content);
370 if (attribute.isNull())
372 if (LowerCaseEqualsASCII(attribute, "notranslate"))
378 bool TranslateHelper::OnMessageReceived(const IPC::Message& message) {
380 IPC_BEGIN_MESSAGE_MAP(TranslateHelper, message)
381 IPC_MESSAGE_HANDLER(ChromeViewMsg_TranslatePage, OnTranslatePage)
382 IPC_MESSAGE_HANDLER(ChromeViewMsg_RevertTranslation, OnRevertTranslation)
383 IPC_MESSAGE_UNHANDLED(handled = false)
384 IPC_END_MESSAGE_MAP()
386 handled = cld_data_provider_->OnMessageReceived(message);
391 void TranslateHelper::OnTranslatePage(int page_seq_no,
392 const std::string& translate_script,
393 const std::string& source_lang,
394 const std::string& target_lang) {
395 WebFrame* main_frame = GetMainFrame();
396 if (!main_frame || page_seq_no_ != page_seq_no)
397 return; // We navigated away, nothing to do.
399 // A similar translation is already under way, nothing to do.
400 if (translation_pending_ && target_lang_ == target_lang)
403 // Any pending translation is now irrelevant.
404 CancelPendingTranslation();
407 translation_pending_ = true;
409 // If the source language is undetermined, we'll let the translate element
411 source_lang_ = (source_lang != translate::kUnknownLanguageCode) ?
412 source_lang : kAutoDetectionLanguage;
413 target_lang_ = target_lang;
415 translate::ReportUserActionDuration(language_determined_time_,
416 base::TimeTicks::Now());
418 GURL url(main_frame->document().url());
419 translate::ReportPageScheme(url.scheme());
421 // Set up v8 isolated world with proper content-security-policy and
423 WebFrame* frame = GetMainFrame();
425 frame->setIsolatedWorldContentSecurityPolicy(
426 chrome::ISOLATED_WORLD_ID_TRANSLATE,
427 WebString::fromUTF8(kContentSecurityPolicy));
429 GURL security_origin = translate::GetTranslateSecurityOrigin();
430 frame->setIsolatedWorldSecurityOrigin(
431 chrome::ISOLATED_WORLD_ID_TRANSLATE,
432 WebSecurityOrigin::create(security_origin));
435 if (!IsTranslateLibAvailable()) {
436 // Evaluate the script to add the translation related method to the global
437 // context of the page.
438 ExecuteScript(translate_script);
439 DCHECK(IsTranslateLibAvailable());
442 TranslatePageImpl(page_seq_no, 0);
445 void TranslateHelper::OnRevertTranslation(int page_seq_no) {
446 if (page_seq_no_ != page_seq_no)
447 return; // We navigated away, nothing to do.
449 if (!IsTranslateLibAvailable()) {
454 CancelPendingTranslation();
456 ExecuteScript("cr.googleTranslate.revert()");
459 void TranslateHelper::CheckTranslateStatus(int page_seq_no) {
460 // If this is not the same page, the translation has been canceled. If the
461 // view is gone, the page is closing.
462 if (page_seq_no_ != page_seq_no || !render_view()->GetWebView())
465 // First check if there was an error.
466 if (HasTranslationFailed()) {
467 // TODO(toyoshim): Check |errorCode| of translate.js and notify it here.
468 NotifyBrowserTranslationFailed(
469 translate::TranslateErrors::TRANSLATION_ERROR);
470 return; // There was an error.
473 if (HasTranslationFinished()) {
474 std::string actual_source_lang;
475 // Translation was successfull, if it was auto, retrieve the source
476 // language the Translate Element detected.
477 if (source_lang_ == kAutoDetectionLanguage) {
478 actual_source_lang = GetOriginalPageLanguage();
479 if (actual_source_lang.empty()) {
480 NotifyBrowserTranslationFailed(
481 translate::TranslateErrors::UNKNOWN_LANGUAGE);
483 } else if (actual_source_lang == target_lang_) {
484 NotifyBrowserTranslationFailed(
485 translate::TranslateErrors::IDENTICAL_LANGUAGES);
489 actual_source_lang = source_lang_;
492 if (!translation_pending_) {
497 translation_pending_ = false;
499 // Check JavaScript performance counters for UMA reports.
500 translate::ReportTimeToTranslate(
501 ExecuteScriptAndGetDoubleResult("cr.googleTranslate.translationTime"));
503 // Notify the browser we are done.
505 new ChromeViewHostMsg_PageTranslated(render_view()->GetRoutingID(),
508 translate::TranslateErrors::NONE));
512 // The translation is still pending, check again later.
513 base::MessageLoop::current()->PostDelayedTask(
515 base::Bind(&TranslateHelper::CheckTranslateStatus,
516 weak_method_factory_.GetWeakPtr(), page_seq_no),
517 AdjustDelay(kTranslateStatusCheckDelayMs));
520 void TranslateHelper::TranslatePageImpl(int page_seq_no, int count) {
521 DCHECK_LT(count, kMaxTranslateInitCheckAttempts);
522 if (page_seq_no_ != page_seq_no || !render_view()->GetWebView())
525 if (!IsTranslateLibReady()) {
526 // The library is not ready, try again later, unless we have tried several
527 // times unsucessfully already.
528 if (++count >= kMaxTranslateInitCheckAttempts) {
529 NotifyBrowserTranslationFailed(
530 translate::TranslateErrors::INITIALIZATION_ERROR);
533 base::MessageLoop::current()->PostDelayedTask(
535 base::Bind(&TranslateHelper::TranslatePageImpl,
536 weak_method_factory_.GetWeakPtr(),
538 AdjustDelay(count * kTranslateInitCheckDelayMs));
542 // The library is loaded, and ready for translation now.
543 // Check JavaScript performance counters for UMA reports.
544 translate::ReportTimeToBeReady(
545 ExecuteScriptAndGetDoubleResult("cr.googleTranslate.readyTime"));
546 translate::ReportTimeToLoad(
547 ExecuteScriptAndGetDoubleResult("cr.googleTranslate.loadTime"));
549 if (!StartTranslation()) {
550 NotifyBrowserTranslationFailed(
551 translate::TranslateErrors::TRANSLATION_ERROR);
554 // Check the status of the translation.
555 base::MessageLoop::current()->PostDelayedTask(
557 base::Bind(&TranslateHelper::CheckTranslateStatus,
558 weak_method_factory_.GetWeakPtr(), page_seq_no),
559 AdjustDelay(kTranslateStatusCheckDelayMs));
562 void TranslateHelper::NotifyBrowserTranslationFailed(
563 translate::TranslateErrors::Type error) {
564 translation_pending_ = false;
565 // Notify the browser there was an error.
566 render_view()->Send(new ChromeViewHostMsg_PageTranslated(
567 render_view()->GetRoutingID(), source_lang_, target_lang_, error));
570 WebFrame* TranslateHelper::GetMainFrame() {
571 WebView* web_view = render_view()->GetWebView();
573 // When the tab is going to be closed, the web_view can be NULL.
577 return web_view->mainFrame();
580 void TranslateHelper::CancelCldDataPolling() {
581 cld_data_polling_canceled_ = true;
584 void TranslateHelper::SendCldDataRequest(const int delay_millis,
585 const int next_delay_millis) {
586 // Terminate immediately if told to stop polling.
587 if (cld_data_polling_canceled_)
590 // Terminate immediately if data is already loaded.
591 if (cld_data_provider_->IsCldDataAvailable())
594 if (!g_cld_callback_set) {
595 g_cld_callback_set = true;
596 cld_data_provider_->SetCldAvailableCallback(
597 base::Bind(&TranslateHelper::OnCldDataAvailable,
598 weak_method_factory_.GetWeakPtr()));
601 // Else, make an asynchronous request to get the data we need.
602 cld_data_provider_->SendCldDataRequest();
604 // ... and enqueue another delayed task to call again. This will start a
605 // chain of polling that will last until the pointer stops being NULL,
606 // which is the right thing to do.
607 // NB: In the great majority of cases, the data file will be available and
608 // the very first delayed task will be a no-op that terminates the chain.
609 // It's only while downloading the file that this will chain for a
610 // nontrivial amount of time.
611 // Use a weak pointer to avoid keeping this helper object around forever.
612 base::MessageLoop::current()->PostDelayedTask(
614 base::Bind(&TranslateHelper::SendCldDataRequest,
615 weak_method_factory_.GetWeakPtr(),
618 base::TimeDelta::FromMilliseconds(delay_millis));
621 void TranslateHelper::OnCldDataAvailable() {
622 if (deferred_page_capture_) {
623 deferred_page_capture_ = false; // Don't do this a second time.
624 PageCapturedImpl(deferred_page_seq_no_, deferred_contents_);
625 deferred_page_seq_no_ = -1; // Clean up for sanity
626 deferred_contents_.clear(); // Clean up for sanity
630 void TranslateHelper::RecordLanguageDetectionTiming(
631 LanguageDetectionTiming timing) {
632 // The following comment is copied from page_load_histograms.cc, and applies
633 // just as equally here:
635 // Since there are currently no guarantees that renderer histograms will be
636 // sent to the browser, we initiate a PostTask here to be sure that we send
637 // the histograms we generated. Without this call, pages that don't have an
638 // on-close-handler might generate data that is lost when the renderer is
639 // shutdown abruptly (perchance because the user closed the tab).
640 DVLOG(1) << "Language detection timing: " << timing;
641 UMA_HISTOGRAM_ENUMERATION("Translate.LanguageDetectionTiming", timing,
642 LANGUAGE_DETECTION_TIMING_MAX_VALUE);
644 // Note on performance: Under normal circumstances, this should get called
645 // once per page load. The code will either manage to do it ON_TIME or will
646 // be DEFERRED until CLD is ready. In the latter case, CLD is in dynamic mode
647 // and may eventually become available, triggering the RESUMED event; after
648 // this, everything should start being ON_TIME. This should never run more
649 // than twice in a page load, under any conditions.
650 // Also note that language detection is triggered off of a delay AFTER the
651 // page load completed event has fired, making this very much off the critical
653 content::RenderThread::Get()->UpdateHistograms(
654 content::kHistogramSynchronizerReservedSequenceNumber);