1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/renderer/translate/translate_helper.h"
7 #if defined(CLD2_DYNAMIC_MODE)
11 #include "base/bind.h"
12 #include "base/compiler_specific.h"
13 #if defined(CLD2_DYNAMIC_MODE)
14 #include "base/files/memory_mapped_file.h"
16 #include "base/logging.h"
17 #include "base/message_loop/message_loop.h"
18 #include "base/strings/string16.h"
19 #include "base/strings/string_util.h"
20 #include "base/strings/utf_string_conversions.h"
21 #include "chrome/renderer/isolated_world_ids.h"
22 #include "components/translate/content/common/translate_messages.h"
23 #include "components/translate/core/common/translate_constants.h"
24 #include "components/translate/core/common/translate_metrics.h"
25 #include "components/translate/core/common/translate_util.h"
26 #include "components/translate/core/language_detection/language_detection_util.h"
27 #include "content/public/renderer/render_view.h"
28 #include "extensions/common/constants.h"
29 #include "extensions/renderer/extension_groups.h"
30 #include "ipc/ipc_platform_file.h"
31 #if defined(CLD2_DYNAMIC_MODE)
32 #include "content/public/common/url_constants.h"
33 #include "third_party/cld_2/src/public/compact_lang_det.h"
35 #include "third_party/WebKit/public/web/WebDocument.h"
36 #include "third_party/WebKit/public/web/WebElement.h"
37 #include "third_party/WebKit/public/web/WebFrame.h"
38 #include "third_party/WebKit/public/web/WebNode.h"
39 #include "third_party/WebKit/public/web/WebNodeList.h"
40 #include "third_party/WebKit/public/web/WebScriptSource.h"
41 #include "third_party/WebKit/public/web/WebView.h"
42 #include "third_party/WebKit/public/web/WebWidget.h"
44 #include "v8/include/v8.h"
46 using base::ASCIIToUTF16;
47 using blink::WebDocument;
48 using blink::WebElement;
49 using blink::WebFrame;
51 using blink::WebNodeList;
52 using blink::WebScriptSource;
53 using blink::WebSecurityOrigin;
54 using blink::WebString;
55 using blink::WebVector;
60 // The delay in milliseconds that we'll wait before checking to see if the
61 // translate library injected in the page is ready.
62 const int kTranslateInitCheckDelayMs = 150;
64 // The maximum number of times we'll check to see if the translate library
65 // injected in the page is ready.
66 const int kMaxTranslateInitCheckAttempts = 5;
68 // The delay we wait in milliseconds before checking whether the translation has
70 const int kTranslateStatusCheckDelayMs = 400;
72 // Language name passed to the Translate element for it to detect the language.
73 const char kAutoDetectionLanguage[] = "auto";
75 // Isolated world sets following content-security-policy.
76 const char kContentSecurityPolicy[] = "script-src 'self' 'unsafe-eval'";
80 #if defined(CLD2_DYNAMIC_MODE)
81 // The mmap for the CLD2 data must be held forever once it is available in the
82 // process. This is declared static in the translate_helper.h.
83 base::LazyInstance<TranslateHelper::CLDMmapWrapper>::Leaky
84 TranslateHelper::s_cld_mmap_ = LAZY_INSTANCE_INITIALIZER;
87 ////////////////////////////////////////////////////////////////////////////////
88 // TranslateHelper, public:
90 TranslateHelper::TranslateHelper(content::RenderView* render_view)
91 : content::RenderViewObserver(render_view),
93 translation_pending_(false),
94 weak_method_factory_(this)
95 #if defined(CLD2_DYNAMIC_MODE)
96 ,cld2_data_file_polling_started_(false),
97 cld2_data_file_polling_canceled_(false),
98 deferred_page_capture_(false),
99 deferred_page_id_(-1),
100 deferred_contents_(ASCIIToUTF16(""))
105 TranslateHelper::~TranslateHelper() {
106 CancelPendingTranslation();
107 #if defined(CLD2_DYNAMIC_MODE)
108 CancelCLD2DataFilePolling();
112 void TranslateHelper::PrepareForUrl(const GURL& url) {
113 #if defined(CLD2_DYNAMIC_MODE)
114 deferred_page_capture_ = false;
115 deferred_contents_.clear();
116 if (cld2_data_file_polling_started_)
119 // TODO(andrewhayden): Refactor translate_manager.cc's IsTranslatableURL to
120 // components/translate/core/common/translate_util.cc, and ignore any URL
121 // that fails that check. This will require moving unit tests and rewiring
122 // other function calls as well, so for now replicate the logic here.
125 if (url.SchemeIs(content::kChromeUIScheme))
127 if (url.SchemeIs(content::kChromeDevToolsScheme))
129 if (url.SchemeIs(url::kFtpScheme))
131 #if defined(OS_CHROMEOS)
132 if (url.SchemeIs(extensions::kExtensionScheme) &&
133 url.DomainIs(file_manager::kFileManagerAppId))
137 // Start polling for CLD data.
138 cld2_data_file_polling_started_ = true;
139 TranslateHelper::SendCLD2DataFileRequest(0, 1000);
143 #if defined(CLD2_DYNAMIC_MODE)
144 void TranslateHelper::DeferPageCaptured(const int page_id,
145 const base::string16& contents) {
146 deferred_page_capture_ = true;
147 deferred_page_id_ = page_id;
148 deferred_contents_ = contents;
152 void TranslateHelper::PageCaptured(int page_id,
153 const base::string16& contents) {
154 // Get the document language as set by WebKit from the http-equiv
155 // meta tag for "content-language". This may or may not also
156 // have a value derived from the actual Content-Language HTTP
157 // header. The two actually have different meanings (despite the
158 // original intent of http-equiv to be an equivalent) with the former
159 // being the language of the document and the latter being the
160 // language of the intended audience (a distinction really only
161 // relevant for things like langauge textbooks). This distinction
162 // shouldn't affect translation.
163 WebFrame* main_frame = GetMainFrame();
164 if (!main_frame || render_view()->GetPageId() != page_id)
167 // TODO(andrewhayden): UMA insertion point here: Track if data is available.
168 // TODO(andrewhayden): Retry insertion point here, retry till data available.
169 #if defined(CLD2_DYNAMIC_MODE)
170 if (!CLD2::isDataLoaded()) {
171 // We're in dynamic mode and CLD data isn't loaded. Retry when CLD data
172 // is loaded, if ever.
173 TranslateHelper::DeferPageCaptured(page_id, contents);
178 WebDocument document = main_frame->document();
179 std::string content_language = document.contentLanguage().utf8();
180 WebElement html_element = document.documentElement();
181 std::string html_lang;
182 // |html_element| can be null element, e.g. in
183 // BrowserTest.WindowOpenClose.
184 if (!html_element.isNull())
185 html_lang = html_element.getAttribute("lang").utf8();
186 std::string cld_language;
187 bool is_cld_reliable;
188 std::string language = translate::DeterminePageLanguage(
189 content_language, html_lang, contents, &cld_language, &is_cld_reliable);
191 if (language.empty())
194 language_determined_time_ = base::TimeTicks::Now();
196 GURL url(document.url());
197 LanguageDetectionDetails details;
198 details.time = base::Time::Now();
200 details.content_language = content_language;
201 details.cld_language = cld_language;
202 details.is_cld_reliable = is_cld_reliable;
203 details.html_root_language = html_lang;
204 details.adopted_language = language;
206 // TODO(hajimehoshi): If this affects performance, it should be set only if
207 // translate-internals tab exists.
208 details.contents = contents;
210 Send(new ChromeViewHostMsg_TranslateLanguageDetermined(
213 IsTranslationAllowed(&document) && !language.empty()));
216 void TranslateHelper::CancelPendingTranslation() {
217 weak_method_factory_.InvalidateWeakPtrs();
218 translation_pending_ = false;
219 source_lang_.clear();
220 target_lang_.clear();
221 #if defined(CLD2_DYNAMIC_MODE)
222 CancelCLD2DataFilePolling();
226 ////////////////////////////////////////////////////////////////////////////////
227 // TranslateHelper, protected:
229 bool TranslateHelper::IsTranslateLibAvailable() {
230 return ExecuteScriptAndGetBoolResult(
231 "typeof cr != 'undefined' && typeof cr.googleTranslate != 'undefined' && "
232 "typeof cr.googleTranslate.translate == 'function'", false);
235 bool TranslateHelper::IsTranslateLibReady() {
236 return ExecuteScriptAndGetBoolResult("cr.googleTranslate.libReady", false);
239 bool TranslateHelper::HasTranslationFinished() {
240 return ExecuteScriptAndGetBoolResult("cr.googleTranslate.finished", true);
243 bool TranslateHelper::HasTranslationFailed() {
244 return ExecuteScriptAndGetBoolResult("cr.googleTranslate.error", true);
247 bool TranslateHelper::StartTranslation() {
248 std::string script = "cr.googleTranslate.translate('" +
253 return ExecuteScriptAndGetBoolResult(script, false);
256 std::string TranslateHelper::GetOriginalPageLanguage() {
257 return ExecuteScriptAndGetStringResult("cr.googleTranslate.sourceLang");
260 base::TimeDelta TranslateHelper::AdjustDelay(int delayInMs) {
261 // Just converts |delayInMs| without any modification in practical cases.
262 // Tests will override this function to return modified value.
263 return base::TimeDelta::FromMilliseconds(delayInMs);
266 void TranslateHelper::ExecuteScript(const std::string& script) {
267 WebFrame* main_frame = GetMainFrame();
271 WebScriptSource source = WebScriptSource(ASCIIToUTF16(script));
272 main_frame->executeScriptInIsolatedWorld(
273 chrome::ISOLATED_WORLD_ID_TRANSLATE,
276 extensions::EXTENSION_GROUP_INTERNAL_TRANSLATE_SCRIPTS);
279 bool TranslateHelper::ExecuteScriptAndGetBoolResult(const std::string& script,
281 WebFrame* main_frame = GetMainFrame();
285 v8::HandleScope handle_scope(v8::Isolate::GetCurrent());
286 WebVector<v8::Local<v8::Value> > results;
287 WebScriptSource source = WebScriptSource(ASCIIToUTF16(script));
288 main_frame->executeScriptInIsolatedWorld(
289 chrome::ISOLATED_WORLD_ID_TRANSLATE,
292 extensions::EXTENSION_GROUP_INTERNAL_TRANSLATE_SCRIPTS,
294 if (results.size() != 1 || results[0].IsEmpty() || !results[0]->IsBoolean()) {
299 return results[0]->BooleanValue();
302 std::string TranslateHelper::ExecuteScriptAndGetStringResult(
303 const std::string& script) {
304 WebFrame* main_frame = GetMainFrame();
306 return std::string();
308 v8::HandleScope handle_scope(v8::Isolate::GetCurrent());
309 WebVector<v8::Local<v8::Value> > results;
310 WebScriptSource source = WebScriptSource(ASCIIToUTF16(script));
311 main_frame->executeScriptInIsolatedWorld(
312 chrome::ISOLATED_WORLD_ID_TRANSLATE,
315 extensions::EXTENSION_GROUP_INTERNAL_TRANSLATE_SCRIPTS,
317 if (results.size() != 1 || results[0].IsEmpty() || !results[0]->IsString()) {
319 return std::string();
322 v8::Local<v8::String> v8_str = results[0]->ToString();
323 int length = v8_str->Utf8Length() + 1;
324 scoped_ptr<char[]> str(new char[length]);
325 v8_str->WriteUtf8(str.get(), length);
326 return std::string(str.get());
329 double TranslateHelper::ExecuteScriptAndGetDoubleResult(
330 const std::string& script) {
331 WebFrame* main_frame = GetMainFrame();
335 v8::HandleScope handle_scope(v8::Isolate::GetCurrent());
336 WebVector<v8::Local<v8::Value> > results;
337 WebScriptSource source = WebScriptSource(ASCIIToUTF16(script));
338 main_frame->executeScriptInIsolatedWorld(
339 chrome::ISOLATED_WORLD_ID_TRANSLATE,
342 extensions::EXTENSION_GROUP_INTERNAL_TRANSLATE_SCRIPTS,
344 if (results.size() != 1 || results[0].IsEmpty() || !results[0]->IsNumber()) {
349 return results[0]->NumberValue();
352 ////////////////////////////////////////////////////////////////////////////////
353 // TranslateHelper, private:
357 bool TranslateHelper::IsTranslationAllowed(WebDocument* document) {
358 WebElement head = document->head();
359 if (head.isNull() || !head.hasChildNodes())
362 const WebString meta(ASCIIToUTF16("meta"));
363 const WebString name(ASCIIToUTF16("name"));
364 const WebString google(ASCIIToUTF16("google"));
365 const WebString value(ASCIIToUTF16("value"));
366 const WebString content(ASCIIToUTF16("content"));
368 WebNodeList children = head.childNodes();
369 for (size_t i = 0; i < children.length(); ++i) {
370 WebNode node = children.item(i);
371 if (!node.isElementNode())
373 WebElement element = node.to<WebElement>();
374 // Check if a tag is <meta>.
375 if (!element.hasTagName(meta))
377 // Check if the tag contains name="google".
378 WebString attribute = element.getAttribute(name);
379 if (attribute.isNull() || attribute != google)
381 // Check if the tag contains value="notranslate", or content="notranslate".
382 attribute = element.getAttribute(value);
383 if (attribute.isNull())
384 attribute = element.getAttribute(content);
385 if (attribute.isNull())
387 if (LowerCaseEqualsASCII(attribute, "notranslate"))
393 bool TranslateHelper::OnMessageReceived(const IPC::Message& message) {
395 IPC_BEGIN_MESSAGE_MAP(TranslateHelper, message)
396 IPC_MESSAGE_HANDLER(ChromeViewMsg_TranslatePage, OnTranslatePage)
397 IPC_MESSAGE_HANDLER(ChromeViewMsg_RevertTranslation, OnRevertTranslation)
398 #if defined(CLD2_DYNAMIC_MODE)
399 IPC_MESSAGE_HANDLER(ChromeViewMsg_CLDDataAvailable, OnCLDDataAvailable);
401 IPC_MESSAGE_UNHANDLED(handled = false)
402 IPC_END_MESSAGE_MAP()
406 void TranslateHelper::OnTranslatePage(int page_id,
407 const std::string& translate_script,
408 const std::string& source_lang,
409 const std::string& target_lang) {
410 WebFrame* main_frame = GetMainFrame();
412 page_id_ != page_id ||
413 render_view()->GetPageId() != page_id)
414 return; // We navigated away, nothing to do.
416 // A similar translation is already under way, nothing to do.
417 if (translation_pending_ && target_lang_ == target_lang)
420 // Any pending translation is now irrelevant.
421 CancelPendingTranslation();
424 translation_pending_ = true;
426 // If the source language is undetermined, we'll let the translate element
428 source_lang_ = (source_lang != translate::kUnknownLanguageCode) ?
429 source_lang : kAutoDetectionLanguage;
430 target_lang_ = target_lang;
432 translate::ReportUserActionDuration(language_determined_time_,
433 base::TimeTicks::Now());
435 GURL url(main_frame->document().url());
436 translate::ReportPageScheme(url.scheme());
438 // Set up v8 isolated world with proper content-security-policy and
440 WebFrame* frame = GetMainFrame();
442 frame->setIsolatedWorldContentSecurityPolicy(
443 chrome::ISOLATED_WORLD_ID_TRANSLATE,
444 WebString::fromUTF8(kContentSecurityPolicy));
446 GURL security_origin = translate::GetTranslateSecurityOrigin();
447 frame->setIsolatedWorldSecurityOrigin(
448 chrome::ISOLATED_WORLD_ID_TRANSLATE,
449 WebSecurityOrigin::create(security_origin));
452 if (!IsTranslateLibAvailable()) {
453 // Evaluate the script to add the translation related method to the global
454 // context of the page.
455 ExecuteScript(translate_script);
456 DCHECK(IsTranslateLibAvailable());
459 TranslatePageImpl(0);
462 void TranslateHelper::OnRevertTranslation(int page_id) {
463 if (page_id_ != page_id || render_view()->GetPageId() != page_id)
464 return; // We navigated away, nothing to do.
466 if (!IsTranslateLibAvailable()) {
471 CancelPendingTranslation();
473 ExecuteScript("cr.googleTranslate.revert()");
476 void TranslateHelper::CheckTranslateStatus() {
477 // If this is not the same page, the translation has been canceled. If the
478 // view is gone, the page is closing.
479 if (page_id_ != render_view()->GetPageId() || !render_view()->GetWebView())
482 // First check if there was an error.
483 if (HasTranslationFailed()) {
484 // TODO(toyoshim): Check |errorCode| of translate.js and notify it here.
485 NotifyBrowserTranslationFailed(TranslateErrors::TRANSLATION_ERROR);
486 return; // There was an error.
489 if (HasTranslationFinished()) {
490 std::string actual_source_lang;
491 // Translation was successfull, if it was auto, retrieve the source
492 // language the Translate Element detected.
493 if (source_lang_ == kAutoDetectionLanguage) {
494 actual_source_lang = GetOriginalPageLanguage();
495 if (actual_source_lang.empty()) {
496 NotifyBrowserTranslationFailed(TranslateErrors::UNKNOWN_LANGUAGE);
498 } else if (actual_source_lang == target_lang_) {
499 NotifyBrowserTranslationFailed(TranslateErrors::IDENTICAL_LANGUAGES);
503 actual_source_lang = source_lang_;
506 if (!translation_pending_) {
511 translation_pending_ = false;
513 // Check JavaScript performance counters for UMA reports.
514 translate::ReportTimeToTranslate(
515 ExecuteScriptAndGetDoubleResult("cr.googleTranslate.translationTime"));
517 // Notify the browser we are done.
518 render_view()->Send(new ChromeViewHostMsg_PageTranslated(
519 render_view()->GetRoutingID(), render_view()->GetPageId(),
520 actual_source_lang, target_lang_, TranslateErrors::NONE));
524 // The translation is still pending, check again later.
525 base::MessageLoop::current()->PostDelayedTask(
527 base::Bind(&TranslateHelper::CheckTranslateStatus,
528 weak_method_factory_.GetWeakPtr()),
529 AdjustDelay(kTranslateStatusCheckDelayMs));
532 void TranslateHelper::TranslatePageImpl(int count) {
533 DCHECK_LT(count, kMaxTranslateInitCheckAttempts);
534 if (page_id_ != render_view()->GetPageId() || !render_view()->GetWebView())
537 if (!IsTranslateLibReady()) {
538 // The library is not ready, try again later, unless we have tried several
539 // times unsucessfully already.
540 if (++count >= kMaxTranslateInitCheckAttempts) {
541 NotifyBrowserTranslationFailed(TranslateErrors::INITIALIZATION_ERROR);
544 base::MessageLoop::current()->PostDelayedTask(
546 base::Bind(&TranslateHelper::TranslatePageImpl,
547 weak_method_factory_.GetWeakPtr(),
549 AdjustDelay(count * kTranslateInitCheckDelayMs));
553 // The library is loaded, and ready for translation now.
554 // Check JavaScript performance counters for UMA reports.
555 translate::ReportTimeToBeReady(
556 ExecuteScriptAndGetDoubleResult("cr.googleTranslate.readyTime"));
557 translate::ReportTimeToLoad(
558 ExecuteScriptAndGetDoubleResult("cr.googleTranslate.loadTime"));
560 if (!StartTranslation()) {
561 NotifyBrowserTranslationFailed(TranslateErrors::TRANSLATION_ERROR);
564 // Check the status of the translation.
565 base::MessageLoop::current()->PostDelayedTask(
567 base::Bind(&TranslateHelper::CheckTranslateStatus,
568 weak_method_factory_.GetWeakPtr()),
569 AdjustDelay(kTranslateStatusCheckDelayMs));
572 void TranslateHelper::NotifyBrowserTranslationFailed(
573 TranslateErrors::Type error) {
574 translation_pending_ = false;
575 // Notify the browser there was an error.
576 render_view()->Send(new ChromeViewHostMsg_PageTranslated(
577 render_view()->GetRoutingID(), page_id_, source_lang_,
578 target_lang_, error));
581 WebFrame* TranslateHelper::GetMainFrame() {
582 WebView* web_view = render_view()->GetWebView();
584 // When the tab is going to be closed, the web_view can be NULL.
588 return web_view->mainFrame();
591 #if defined(CLD2_DYNAMIC_MODE)
592 void TranslateHelper::CancelCLD2DataFilePolling() {
593 cld2_data_file_polling_canceled_ = true;
596 void TranslateHelper::SendCLD2DataFileRequest(const int delay_millis,
597 const int next_delay_millis) {
598 // Terminate immediately if told to stop polling.
599 if (cld2_data_file_polling_canceled_)
602 // Terminate immediately if data is already loaded.
603 if (CLD2::isDataLoaded())
606 // Else, send the IPC message to the browser process requesting the data...
607 Send(new ChromeViewHostMsg_NeedCLDData(routing_id()));
609 // ... and enqueue another delayed task to call again. This will start a
610 // chain of polling that will last until the pointer stops being NULL,
611 // which is the right thing to do.
612 // NB: In the great majority of cases, the data file will be available and
613 // the very first delayed task will be a no-op that terminates the chain.
614 // It's only while downloading the file that this will chain for a
615 // nontrivial amount of time.
616 // Use a weak pointer to avoid keeping this helper object around forever.
617 base::MessageLoop::current()->PostDelayedTask(
619 base::Bind(&TranslateHelper::SendCLD2DataFileRequest,
620 weak_method_factory_.GetWeakPtr(),
621 next_delay_millis, next_delay_millis),
622 base::TimeDelta::FromMilliseconds(delay_millis));
625 void TranslateHelper::OnCLDDataAvailable(
626 const IPC::PlatformFileForTransit ipc_file_handle,
627 const uint64 data_offset,
628 const uint64 data_length) {
629 LoadCLDDData(IPC::PlatformFileForTransitToFile(ipc_file_handle), data_offset,
631 if (deferred_page_capture_ && CLD2::isDataLoaded()) {
632 deferred_page_capture_ = false; // Don't do this a second time.
633 PageCaptured(deferred_page_id_, deferred_contents_);
634 deferred_page_id_ = -1; // Clean up for sanity
635 deferred_contents_.clear(); // Clean up for sanity
639 void TranslateHelper::LoadCLDDData(
641 const uint64 data_offset,
642 const uint64 data_length) {
643 // Terminate immediately if told to stop polling.
644 if (cld2_data_file_polling_canceled_)
647 // Terminate immediately if data is already loaded.
648 if (CLD2::isDataLoaded())
651 if (!file.IsValid()) {
652 LOG(ERROR) << "Can't find the CLD data file.";
657 s_cld_mmap_.Get().value = new base::MemoryMappedFile();
658 bool initialized = s_cld_mmap_.Get().value->Initialize(file.Pass());
660 LOG(ERROR) << "mmap initialization failed";
661 delete s_cld_mmap_.Get().value;
662 s_cld_mmap_.Get().value = NULL;
667 uint64 max_int32 = std::numeric_limits<int32>::max();
668 if (data_length + data_offset > s_cld_mmap_.Get().value->length()
669 || data_length > max_int32) { // max signed 32 bit integer
670 LOG(ERROR) << "Illegal mmap config: data_offset="
671 << data_offset << ", data_length=" << data_length
672 << ", mmap->length()=" << s_cld_mmap_.Get().value->length();
673 delete s_cld_mmap_.Get().value;
674 s_cld_mmap_.Get().value = NULL;
678 // Initialize the CLD subsystem... and it's all done!
679 const uint8* data_ptr = s_cld_mmap_.Get().value->data() + data_offset;
680 CLD2::loadDataFromRawAddress(data_ptr, data_length);
681 DCHECK(CLD2::isDataLoaded()) << "Failed to load CLD data from mmap";