Update To 11.40.268.0
[platform/framework/web/crosswalk.git] / src / chrome / browser / metrics / thread_watcher.cc
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/metrics/thread_watcher.h"
6
7 #include <math.h>  // ceil
8
9 #include "base/bind.h"
10 #include "base/compiler_specific.h"
11 #include "base/debug/alias.h"
12 #include "base/debug/debugger.h"
13 #include "base/debug/dump_without_crashing.h"
14 #include "base/lazy_instance.h"
15 #include "base/metrics/field_trial.h"
16 #include "base/strings/string_number_conversions.h"
17 #include "base/strings/string_split.h"
18 #include "base/strings/string_tokenizer.h"
19 #include "base/strings/stringprintf.h"
20 #include "base/threading/thread_restrictions.h"
21 #include "build/build_config.h"
22 #include "chrome/browser/chrome_notification_types.h"
23 #include "chrome/common/chrome_switches.h"
24 #include "chrome/common/chrome_version_info.h"
25 #include "chrome/common/logging_chrome.h"
26 #include "content/public/browser/notification_service.h"
27
28 #if defined(OS_WIN)
29 #include "base/win/windows_version.h"
30 #endif
31
32 using content::BrowserThread;
33
34 namespace {
35
36 // The following are unique function names for forcing the crash when a thread
37 // is unresponsive. This makes it possible to tell from the callstack alone what
38 // thread was unresponsive.
39 //
40 // We disable optimizations for this block of functions so the compiler doesn't
41 // merge them all together.
42 MSVC_DISABLE_OPTIMIZE()
43 MSVC_PUSH_DISABLE_WARNING(4748)
44
45 void ReportThreadHang() {
46 #if defined(NDEBUG)
47   base::debug::DumpWithoutCrashing();
48 #else
49   base::debug::BreakDebugger();
50 #endif
51 }
52
53 #if !defined(OS_ANDROID) || !defined(NDEBUG)
54 // TODO(rtenneti): Enabled crashing, after getting data.
55 NOINLINE void StartupHang() {
56   ReportThreadHang();
57 }
58 #endif  // OS_ANDROID
59
60 NOINLINE void ShutdownHang() {
61   ReportThreadHang();
62 }
63
64 NOINLINE void ThreadUnresponsive_UI() {
65   ReportThreadHang();
66 }
67
68 NOINLINE void ThreadUnresponsive_DB() {
69   ReportThreadHang();
70 }
71
72 NOINLINE void ThreadUnresponsive_FILE() {
73   ReportThreadHang();
74 }
75
76 NOINLINE void ThreadUnresponsive_FILE_USER_BLOCKING() {
77   ReportThreadHang();
78 }
79
80 NOINLINE void ThreadUnresponsive_PROCESS_LAUNCHER() {
81   ReportThreadHang();
82 }
83
84 NOINLINE void ThreadUnresponsive_CACHE() {
85   ReportThreadHang();
86 }
87
88 NOINLINE void ThreadUnresponsive_IO() {
89   ReportThreadHang();
90 }
91
92 void CrashBecauseThreadWasUnresponsive(BrowserThread::ID thread_id) {
93   base::debug::Alias(&thread_id);
94
95   switch (thread_id) {
96     case BrowserThread::UI:
97       return ThreadUnresponsive_UI();
98     case BrowserThread::DB:
99       return ThreadUnresponsive_DB();
100     case BrowserThread::FILE:
101       return ThreadUnresponsive_FILE();
102     case BrowserThread::FILE_USER_BLOCKING:
103       return ThreadUnresponsive_FILE_USER_BLOCKING();
104     case BrowserThread::PROCESS_LAUNCHER:
105       return ThreadUnresponsive_PROCESS_LAUNCHER();
106     case BrowserThread::CACHE:
107       return ThreadUnresponsive_CACHE();
108     case BrowserThread::IO:
109       return ThreadUnresponsive_IO();
110     case BrowserThread::ID_COUNT:
111       CHECK(false);  // This shouldn't actually be reached!
112       break;
113
114     // Omission of the default hander is intentional -- that way the compiler
115     // should warn if our switch becomes outdated.
116   }
117
118   CHECK(false) << "Unknown thread was unresponsive.";  // Shouldn't be reached.
119 }
120
121 MSVC_POP_WARNING()
122 MSVC_ENABLE_OPTIMIZE();
123
124 }  // namespace
125
126 // ThreadWatcher methods and members.
127 ThreadWatcher::ThreadWatcher(const WatchingParams& params)
128     : thread_id_(params.thread_id),
129       thread_name_(params.thread_name),
130       watched_loop_(
131           BrowserThread::GetMessageLoopProxyForThread(params.thread_id)),
132       sleep_time_(params.sleep_time),
133       unresponsive_time_(params.unresponsive_time),
134       ping_time_(base::TimeTicks::Now()),
135       pong_time_(ping_time_),
136       ping_sequence_number_(0),
137       active_(false),
138       ping_count_(params.unresponsive_threshold),
139       response_time_histogram_(NULL),
140       unresponsive_time_histogram_(NULL),
141       unresponsive_count_(0),
142       hung_processing_complete_(false),
143       unresponsive_threshold_(params.unresponsive_threshold),
144       crash_on_hang_(params.crash_on_hang),
145       live_threads_threshold_(params.live_threads_threshold),
146       weak_ptr_factory_(this) {
147   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
148   Initialize();
149 }
150
151 ThreadWatcher::~ThreadWatcher() {}
152
153 // static
154 void ThreadWatcher::StartWatching(const WatchingParams& params) {
155   DCHECK_GE(params.sleep_time.InMilliseconds(), 0);
156   DCHECK_GE(params.unresponsive_time.InMilliseconds(),
157             params.sleep_time.InMilliseconds());
158
159   // If we are not on WatchDogThread, then post a task to call StartWatching on
160   // WatchDogThread.
161   if (!WatchDogThread::CurrentlyOnWatchDogThread()) {
162     WatchDogThread::PostTask(
163         FROM_HERE,
164         base::Bind(&ThreadWatcher::StartWatching, params));
165     return;
166   }
167
168   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
169
170   // Create a new thread watcher object for the given thread and activate it.
171   ThreadWatcher* watcher = new ThreadWatcher(params);
172
173   DCHECK(watcher);
174   // If we couldn't register the thread watcher object, we are shutting down,
175   // then don't activate thread watching.
176   if (!ThreadWatcherList::IsRegistered(params.thread_id))
177     return;
178   watcher->ActivateThreadWatching();
179 }
180
181 void ThreadWatcher::ActivateThreadWatching() {
182   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
183   if (active_) return;
184   active_ = true;
185   ping_count_ = unresponsive_threshold_;
186   ResetHangCounters();
187   base::MessageLoop::current()->PostTask(
188       FROM_HERE,
189       base::Bind(&ThreadWatcher::PostPingMessage,
190                  weak_ptr_factory_.GetWeakPtr()));
191 }
192
193 void ThreadWatcher::DeActivateThreadWatching() {
194   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
195   active_ = false;
196   ping_count_ = 0;
197   weak_ptr_factory_.InvalidateWeakPtrs();
198 }
199
200 void ThreadWatcher::WakeUp() {
201   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
202   // There is some user activity, PostPingMessage task of thread watcher if
203   // needed.
204   if (!active_) return;
205
206   // Throw away the previous |unresponsive_count_| and start over again. Just
207   // before going to sleep, |unresponsive_count_| could be very close to
208   // |unresponsive_threshold_| and when user becomes active,
209   // |unresponsive_count_| can go over |unresponsive_threshold_| if there was no
210   // response for ping messages. Reset |unresponsive_count_| to start measuring
211   // the unresponsiveness of the threads when system becomes active.
212   unresponsive_count_ = 0;
213
214   if (ping_count_ <= 0) {
215     ping_count_ = unresponsive_threshold_;
216     ResetHangCounters();
217     PostPingMessage();
218   } else {
219     ping_count_ = unresponsive_threshold_;
220   }
221 }
222
223 void ThreadWatcher::PostPingMessage() {
224   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
225   // If we have stopped watching or if the user is idle, then stop sending
226   // ping messages.
227   if (!active_ || ping_count_ <= 0)
228     return;
229
230   // Save the current time when we have sent ping message.
231   ping_time_ = base::TimeTicks::Now();
232
233   // Send a ping message to the watched thread. Callback will be called on
234   // the WatchDogThread.
235   base::Closure callback(
236       base::Bind(&ThreadWatcher::OnPongMessage, weak_ptr_factory_.GetWeakPtr(),
237                  ping_sequence_number_));
238   if (watched_loop_->PostTask(
239           FROM_HERE,
240           base::Bind(&ThreadWatcher::OnPingMessage, thread_id_,
241                      callback))) {
242       // Post a task to check the responsiveness of watched thread.
243       base::MessageLoop::current()->PostDelayedTask(
244           FROM_HERE,
245           base::Bind(&ThreadWatcher::OnCheckResponsiveness,
246                      weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_),
247           unresponsive_time_);
248   } else {
249     // Watched thread might have gone away, stop watching it.
250     DeActivateThreadWatching();
251   }
252 }
253
254 void ThreadWatcher::OnPongMessage(uint64 ping_sequence_number) {
255   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
256
257   // Record watched thread's response time.
258   base::TimeTicks now = base::TimeTicks::Now();
259   base::TimeDelta response_time = now - ping_time_;
260   response_time_histogram_->AddTime(response_time);
261
262   // Save the current time when we have got pong message.
263   pong_time_ = now;
264
265   // Check if there are any extra pings in flight.
266   DCHECK_EQ(ping_sequence_number_, ping_sequence_number);
267   if (ping_sequence_number_ != ping_sequence_number)
268     return;
269
270   // Increment sequence number for the next ping message to indicate watched
271   // thread is responsive.
272   ++ping_sequence_number_;
273
274   // If we have stopped watching or if the user is idle, then stop sending
275   // ping messages.
276   if (!active_ || --ping_count_ <= 0)
277     return;
278
279   base::MessageLoop::current()->PostDelayedTask(
280       FROM_HERE,
281       base::Bind(&ThreadWatcher::PostPingMessage,
282                  weak_ptr_factory_.GetWeakPtr()),
283       sleep_time_);
284 }
285
286 void ThreadWatcher::OnCheckResponsiveness(uint64 ping_sequence_number) {
287   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
288   // If we have stopped watching then consider thread as responding.
289   if (!active_) {
290     responsive_ = true;
291     return;
292   }
293   // If the latest ping_sequence_number_ is not same as the ping_sequence_number
294   // that is passed in, then we can assume OnPongMessage was called.
295   // OnPongMessage increments ping_sequence_number_.
296   if (ping_sequence_number_ != ping_sequence_number) {
297     // Reset unresponsive_count_ to zero because we got a response from the
298     // watched thread.
299     ResetHangCounters();
300
301     responsive_ = true;
302     return;
303   }
304   // Record that we got no response from watched thread.
305   GotNoResponse();
306
307   // Post a task to check the responsiveness of watched thread.
308   base::MessageLoop::current()->PostDelayedTask(
309       FROM_HERE,
310       base::Bind(&ThreadWatcher::OnCheckResponsiveness,
311                  weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_),
312       unresponsive_time_);
313   responsive_ = false;
314 }
315
316 void ThreadWatcher::Initialize() {
317   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
318   ThreadWatcherList::Register(this);
319
320   const std::string response_time_histogram_name =
321       "ThreadWatcher.ResponseTime." + thread_name_;
322   response_time_histogram_ = base::Histogram::FactoryTimeGet(
323       response_time_histogram_name,
324       base::TimeDelta::FromMilliseconds(1),
325       base::TimeDelta::FromSeconds(100), 50,
326       base::Histogram::kUmaTargetedHistogramFlag);
327
328   const std::string unresponsive_time_histogram_name =
329       "ThreadWatcher.Unresponsive." + thread_name_;
330   unresponsive_time_histogram_ = base::Histogram::FactoryTimeGet(
331       unresponsive_time_histogram_name,
332       base::TimeDelta::FromMilliseconds(1),
333       base::TimeDelta::FromSeconds(100), 50,
334       base::Histogram::kUmaTargetedHistogramFlag);
335
336   const std::string responsive_count_histogram_name =
337       "ThreadWatcher.ResponsiveThreads." + thread_name_;
338   responsive_count_histogram_ = base::LinearHistogram::FactoryGet(
339       responsive_count_histogram_name, 1, 10, 11,
340       base::Histogram::kUmaTargetedHistogramFlag);
341
342   const std::string unresponsive_count_histogram_name =
343       "ThreadWatcher.UnresponsiveThreads." + thread_name_;
344   unresponsive_count_histogram_ = base::LinearHistogram::FactoryGet(
345       unresponsive_count_histogram_name, 1, 10, 11,
346       base::Histogram::kUmaTargetedHistogramFlag);
347 }
348
349 // static
350 void ThreadWatcher::OnPingMessage(const BrowserThread::ID& thread_id,
351                                   const base::Closure& callback_task) {
352   // This method is called on watched thread.
353   DCHECK(BrowserThread::CurrentlyOn(thread_id));
354   WatchDogThread::PostTask(FROM_HERE, callback_task);
355 }
356
357 void ThreadWatcher::ResetHangCounters() {
358   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
359   unresponsive_count_ = 0;
360   hung_processing_complete_ = false;
361 }
362
363 void ThreadWatcher::GotNoResponse() {
364   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
365
366   ++unresponsive_count_;
367   if (!IsVeryUnresponsive())
368     return;
369
370   // Record total unresponsive_time since last pong message.
371   base::TimeDelta unresponse_time = base::TimeTicks::Now() - pong_time_;
372   unresponsive_time_histogram_->AddTime(unresponse_time);
373
374   // We have already collected stats for the non-responding watched thread.
375   if (hung_processing_complete_)
376     return;
377
378   // Record how other threads are responding.
379   uint32 responding_thread_count = 0;
380   uint32 unresponding_thread_count = 0;
381   ThreadWatcherList::GetStatusOfThreads(&responding_thread_count,
382                                         &unresponding_thread_count);
383
384   // Record how many watched threads are responding.
385   responsive_count_histogram_->Add(responding_thread_count);
386
387   // Record how many watched threads are not responding.
388   unresponsive_count_histogram_->Add(unresponding_thread_count);
389
390   // Crash the browser if the watched thread is to be crashed on hang and if the
391   // number of other threads responding is less than or equal to
392   // live_threads_threshold_ and at least one other thread is responding.
393   if (crash_on_hang_ &&
394       responding_thread_count > 0 &&
395       responding_thread_count <= live_threads_threshold_) {
396     static bool crashed_once = false;
397     if (!crashed_once) {
398       crashed_once = true;
399       CrashBecauseThreadWasUnresponsive(thread_id_);
400     }
401   }
402
403   hung_processing_complete_ = true;
404 }
405
406 bool ThreadWatcher::IsVeryUnresponsive() {
407   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
408   return unresponsive_count_ >= unresponsive_threshold_;
409 }
410
411 // ThreadWatcherList methods and members.
412 //
413 // static
414 ThreadWatcherList* ThreadWatcherList::g_thread_watcher_list_ = NULL;
415 // static
416 bool ThreadWatcherList::g_stopped_ = false;
417 // static
418 const int ThreadWatcherList::kSleepSeconds = 1;
419 // static
420 const int ThreadWatcherList::kUnresponsiveSeconds = 2;
421 // static
422 const int ThreadWatcherList::kUnresponsiveCount = 9;
423 // static
424 const int ThreadWatcherList::kLiveThreadsThreshold = 2;
425 // static, non-const for tests.
426 int ThreadWatcherList::g_initialize_delay_seconds = 120;
427
428 ThreadWatcherList::CrashDataThresholds::CrashDataThresholds(
429     uint32 live_threads_threshold,
430     uint32 unresponsive_threshold)
431     : live_threads_threshold(live_threads_threshold),
432       unresponsive_threshold(unresponsive_threshold) {
433 }
434
435 ThreadWatcherList::CrashDataThresholds::CrashDataThresholds()
436     : live_threads_threshold(kLiveThreadsThreshold),
437       unresponsive_threshold(kUnresponsiveCount) {
438 }
439
440 // static
441 void ThreadWatcherList::StartWatchingAll(const CommandLine& command_line) {
442   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
443   uint32 unresponsive_threshold;
444   CrashOnHangThreadMap crash_on_hang_threads;
445   ParseCommandLine(command_line,
446                    &unresponsive_threshold,
447                    &crash_on_hang_threads);
448
449   ThreadWatcherObserver::SetupNotifications(
450       base::TimeDelta::FromSeconds(kSleepSeconds * unresponsive_threshold));
451
452   WatchDogThread::PostTask(
453       FROM_HERE,
454       base::Bind(&ThreadWatcherList::SetStopped, false));
455
456   if (!WatchDogThread::PostDelayedTask(
457           FROM_HERE,
458           base::Bind(&ThreadWatcherList::InitializeAndStartWatching,
459                      unresponsive_threshold,
460                      crash_on_hang_threads),
461           base::TimeDelta::FromSeconds(g_initialize_delay_seconds))) {
462     // Disarm() the startup timebomb, if we couldn't post the task to start the
463     // ThreadWatcher (becasue WatchDog thread is not running).
464     StartupTimeBomb::DisarmStartupTimeBomb();
465   }
466 }
467
468 // static
469 void ThreadWatcherList::StopWatchingAll() {
470   // TODO(rtenneti): Enable ThreadWatcher.
471   ThreadWatcherObserver::RemoveNotifications();
472   DeleteAll();
473 }
474
475 // static
476 void ThreadWatcherList::Register(ThreadWatcher* watcher) {
477   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
478   if (!g_thread_watcher_list_)
479     return;
480   DCHECK(!g_thread_watcher_list_->Find(watcher->thread_id()));
481   g_thread_watcher_list_->registered_[watcher->thread_id()] = watcher;
482 }
483
484 // static
485 bool ThreadWatcherList::IsRegistered(const BrowserThread::ID thread_id) {
486   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
487   return NULL != ThreadWatcherList::Find(thread_id);
488 }
489
490 // static
491 void ThreadWatcherList::GetStatusOfThreads(uint32* responding_thread_count,
492                                            uint32* unresponding_thread_count) {
493   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
494   *responding_thread_count = 0;
495   *unresponding_thread_count = 0;
496   if (!g_thread_watcher_list_)
497     return;
498
499   for (RegistrationList::iterator it =
500            g_thread_watcher_list_->registered_.begin();
501        g_thread_watcher_list_->registered_.end() != it;
502        ++it) {
503     if (it->second->IsVeryUnresponsive())
504       ++(*unresponding_thread_count);
505     else
506       ++(*responding_thread_count);
507   }
508 }
509
510 // static
511 void ThreadWatcherList::WakeUpAll() {
512   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
513   if (!g_thread_watcher_list_)
514     return;
515
516   for (RegistrationList::iterator it =
517            g_thread_watcher_list_->registered_.begin();
518        g_thread_watcher_list_->registered_.end() != it;
519        ++it)
520     it->second->WakeUp();
521 }
522
523 ThreadWatcherList::ThreadWatcherList() {
524   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
525   CHECK(!g_thread_watcher_list_);
526   g_thread_watcher_list_ = this;
527 }
528
529 ThreadWatcherList::~ThreadWatcherList() {
530   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
531   DCHECK(this == g_thread_watcher_list_);
532   g_thread_watcher_list_ = NULL;
533 }
534
535 // static
536 void ThreadWatcherList::ParseCommandLine(
537     const CommandLine& command_line,
538     uint32* unresponsive_threshold,
539     CrashOnHangThreadMap* crash_on_hang_threads) {
540   // Initialize |unresponsive_threshold| to a default value.
541   // TODO(rtenneti): Changed the default value to 4 times, until we can triage
542   // hangs automatically (and to reduce the crash dumps).
543   *unresponsive_threshold = kUnresponsiveCount * 4;
544
545   // Increase the unresponsive_threshold on the Stable and Beta channels to
546   // reduce the number of crashes due to ThreadWatcher.
547   chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel();
548   if (channel == chrome::VersionInfo::CHANNEL_STABLE) {
549     *unresponsive_threshold *= 4;
550   } else if (channel == chrome::VersionInfo::CHANNEL_BETA) {
551     *unresponsive_threshold *= 2;
552   }
553
554 #if defined(OS_WIN)
555   // For Windows XP (old systems), double the unresponsive_threshold to give
556   // the OS a chance to schedule UI/IO threads a time slice to respond with a
557   // pong message (to get around limitations with the OS).
558   if (base::win::GetVersion() <= base::win::VERSION_XP)
559     *unresponsive_threshold *= 2;
560 #endif
561
562   uint32 crash_seconds = *unresponsive_threshold * kUnresponsiveSeconds;
563   std::string crash_on_hang_thread_names;
564   bool has_command_line_overwrite = false;
565   if (command_line.HasSwitch(switches::kCrashOnHangThreads)) {
566     crash_on_hang_thread_names =
567         command_line.GetSwitchValueASCII(switches::kCrashOnHangThreads);
568     has_command_line_overwrite = true;
569   } else if (channel != chrome::VersionInfo::CHANNEL_STABLE) {
570     // Default to crashing the browser if UI or IO or FILE threads are not
571     // responsive except in stable channel.
572     crash_on_hang_thread_names = base::StringPrintf(
573         "UI:%d:%d,IO:%d:%d,FILE:%d:%d",
574         kLiveThreadsThreshold, crash_seconds,
575         kLiveThreadsThreshold, crash_seconds,
576         kLiveThreadsThreshold, crash_seconds * 5);
577   }
578
579   ParseCommandLineCrashOnHangThreads(crash_on_hang_thread_names,
580                                      kLiveThreadsThreshold,
581                                      crash_seconds,
582                                      crash_on_hang_threads);
583
584   if (channel != chrome::VersionInfo::CHANNEL_CANARY ||
585       has_command_line_overwrite) {
586     return;
587   }
588
589   const char* kFieldTrialName = "ThreadWatcher";
590
591   // Nothing else to be done if the trial has already been set (i.e., when
592   // StartWatchingAll() has been already called once).
593   if (base::FieldTrialList::TrialExists(kFieldTrialName))
594     return;
595
596   // Set up a field trial for 100% of the users to crash if either UI or IO
597   // thread is not responsive for 30 seconds (or 15 pings).
598   scoped_refptr<base::FieldTrial> field_trial(
599       base::FieldTrialList::FactoryGetFieldTrial(
600           kFieldTrialName, 100, "default_hung_threads",
601           2014, 10, 30, base::FieldTrial::SESSION_RANDOMIZED, NULL));
602   int hung_thread_group = field_trial->AppendGroup("hung_thread", 100);
603   if (field_trial->group() == hung_thread_group) {
604     for (CrashOnHangThreadMap::iterator it = crash_on_hang_threads->begin();
605          crash_on_hang_threads->end() != it;
606          ++it) {
607       if (it->first == "FILE")
608         continue;
609       it->second.live_threads_threshold = INT_MAX;
610       if (it->first == "UI") {
611         // TODO(rtenneti): set unresponsive threshold to 120 seconds to catch
612         // the worst UI hangs and for fewer crashes due to ThreadWatcher. Reduce
613         // it to a more reasonable time ala IO thread.
614         it->second.unresponsive_threshold = 60;
615       } else {
616         it->second.unresponsive_threshold = 15;
617       }
618     }
619   }
620 }
621
622 // static
623 void ThreadWatcherList::ParseCommandLineCrashOnHangThreads(
624     const std::string& crash_on_hang_thread_names,
625     uint32 default_live_threads_threshold,
626     uint32 default_crash_seconds,
627     CrashOnHangThreadMap* crash_on_hang_threads) {
628   base::StringTokenizer tokens(crash_on_hang_thread_names, ",");
629   std::vector<std::string> values;
630   while (tokens.GetNext()) {
631     const std::string& token = tokens.token();
632     base::SplitString(token, ':', &values);
633     std::string thread_name = values[0];
634
635     uint32 live_threads_threshold = default_live_threads_threshold;
636     uint32 crash_seconds = default_crash_seconds;
637     if (values.size() >= 2 &&
638         (!base::StringToUint(values[1], &live_threads_threshold))) {
639       continue;
640     }
641     if (values.size() >= 3 &&
642         (!base::StringToUint(values[2], &crash_seconds))) {
643       continue;
644     }
645     uint32 unresponsive_threshold = static_cast<uint32>(
646         ceil(static_cast<float>(crash_seconds) / kUnresponsiveSeconds));
647
648     CrashDataThresholds crash_data(live_threads_threshold,
649                                    unresponsive_threshold);
650     // Use the last specifier.
651     (*crash_on_hang_threads)[thread_name] = crash_data;
652   }
653 }
654
655 // static
656 void ThreadWatcherList::InitializeAndStartWatching(
657     uint32 unresponsive_threshold,
658     const CrashOnHangThreadMap& crash_on_hang_threads) {
659   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
660
661   // Disarm the startup timebomb, even if stop has been called.
662   BrowserThread::PostTask(
663       BrowserThread::UI,
664       FROM_HERE,
665       base::Bind(&StartupTimeBomb::DisarmStartupTimeBomb));
666
667   // This method is deferred in relationship to its StopWatchingAll()
668   // counterpart. If a previous initialization has already happened, or if
669   // stop has been called, there's nothing left to do here.
670   if (g_thread_watcher_list_ || g_stopped_)
671     return;
672
673   ThreadWatcherList* thread_watcher_list = new ThreadWatcherList();
674   CHECK(thread_watcher_list);
675
676   const base::TimeDelta kSleepTime =
677       base::TimeDelta::FromSeconds(kSleepSeconds);
678   const base::TimeDelta kUnresponsiveTime =
679       base::TimeDelta::FromSeconds(kUnresponsiveSeconds);
680
681   StartWatching(BrowserThread::UI, "UI", kSleepTime, kUnresponsiveTime,
682                 unresponsive_threshold, crash_on_hang_threads);
683   StartWatching(BrowserThread::IO, "IO", kSleepTime, kUnresponsiveTime,
684                 unresponsive_threshold, crash_on_hang_threads);
685   StartWatching(BrowserThread::DB, "DB", kSleepTime, kUnresponsiveTime,
686                 unresponsive_threshold, crash_on_hang_threads);
687   StartWatching(BrowserThread::FILE, "FILE", kSleepTime, kUnresponsiveTime,
688                 unresponsive_threshold, crash_on_hang_threads);
689   StartWatching(BrowserThread::CACHE, "CACHE", kSleepTime, kUnresponsiveTime,
690                 unresponsive_threshold, crash_on_hang_threads);
691 }
692
693 // static
694 void ThreadWatcherList::StartWatching(
695     const BrowserThread::ID& thread_id,
696     const std::string& thread_name,
697     const base::TimeDelta& sleep_time,
698     const base::TimeDelta& unresponsive_time,
699     uint32 unresponsive_threshold,
700     const CrashOnHangThreadMap& crash_on_hang_threads) {
701   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
702
703   CrashOnHangThreadMap::const_iterator it =
704       crash_on_hang_threads.find(thread_name);
705   bool crash_on_hang = false;
706   uint32 live_threads_threshold = 0;
707   if (it != crash_on_hang_threads.end()) {
708     crash_on_hang = true;
709     live_threads_threshold = it->second.live_threads_threshold;
710     unresponsive_threshold = it->second.unresponsive_threshold;
711   }
712
713   ThreadWatcher::StartWatching(
714       ThreadWatcher::WatchingParams(thread_id,
715                                     thread_name,
716                                     sleep_time,
717                                     unresponsive_time,
718                                     unresponsive_threshold,
719                                     crash_on_hang,
720                                     live_threads_threshold));
721 }
722
723 // static
724 void ThreadWatcherList::DeleteAll() {
725   if (!WatchDogThread::CurrentlyOnWatchDogThread()) {
726     WatchDogThread::PostTask(
727         FROM_HERE,
728         base::Bind(&ThreadWatcherList::DeleteAll));
729     return;
730   }
731
732   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
733
734   SetStopped(true);
735
736   if (!g_thread_watcher_list_)
737     return;
738
739   // Delete all thread watcher objects.
740   while (!g_thread_watcher_list_->registered_.empty()) {
741     RegistrationList::iterator it = g_thread_watcher_list_->registered_.begin();
742     delete it->second;
743     g_thread_watcher_list_->registered_.erase(it);
744   }
745
746   delete g_thread_watcher_list_;
747 }
748
749 // static
750 ThreadWatcher* ThreadWatcherList::Find(const BrowserThread::ID& thread_id) {
751   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
752   if (!g_thread_watcher_list_)
753     return NULL;
754   RegistrationList::iterator it =
755       g_thread_watcher_list_->registered_.find(thread_id);
756   if (g_thread_watcher_list_->registered_.end() == it)
757     return NULL;
758   return it->second;
759 }
760
761 // static
762 void ThreadWatcherList::SetStopped(bool stopped) {
763   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
764   g_stopped_ = stopped;
765 }
766
767 // ThreadWatcherObserver methods and members.
768 //
769 // static
770 ThreadWatcherObserver* ThreadWatcherObserver::g_thread_watcher_observer_ = NULL;
771
772 ThreadWatcherObserver::ThreadWatcherObserver(
773     const base::TimeDelta& wakeup_interval)
774     : last_wakeup_time_(base::TimeTicks::Now()),
775       wakeup_interval_(wakeup_interval) {
776   CHECK(!g_thread_watcher_observer_);
777   g_thread_watcher_observer_ = this;
778 }
779
780 ThreadWatcherObserver::~ThreadWatcherObserver() {
781   DCHECK(this == g_thread_watcher_observer_);
782   g_thread_watcher_observer_ = NULL;
783 }
784
785 // static
786 void ThreadWatcherObserver::SetupNotifications(
787     const base::TimeDelta& wakeup_interval) {
788   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
789   ThreadWatcherObserver* observer = new ThreadWatcherObserver(wakeup_interval);
790   observer->registrar_.Add(
791       observer,
792       chrome::NOTIFICATION_BROWSER_OPENED,
793       content::NotificationService::AllBrowserContextsAndSources());
794   observer->registrar_.Add(observer,
795                            chrome::NOTIFICATION_BROWSER_CLOSED,
796                            content::NotificationService::AllSources());
797   observer->registrar_.Add(observer,
798                            chrome::NOTIFICATION_TAB_PARENTED,
799                            content::NotificationService::AllSources());
800   observer->registrar_.Add(observer,
801                            chrome::NOTIFICATION_TAB_CLOSING,
802                            content::NotificationService::AllSources());
803   observer->registrar_.Add(observer,
804                            content::NOTIFICATION_LOAD_START,
805                            content::NotificationService::AllSources());
806   observer->registrar_.Add(observer,
807                            content::NOTIFICATION_LOAD_STOP,
808                            content::NotificationService::AllSources());
809   observer->registrar_.Add(observer,
810                            content::NOTIFICATION_RENDERER_PROCESS_CLOSED,
811                            content::NotificationService::AllSources());
812   observer->registrar_.Add(observer,
813                            content::NOTIFICATION_RENDER_WIDGET_HOST_HANG,
814                            content::NotificationService::AllSources());
815   observer->registrar_.Add(observer,
816                            chrome::NOTIFICATION_OMNIBOX_OPENED_URL,
817                            content::NotificationService::AllSources());
818 }
819
820 // static
821 void ThreadWatcherObserver::RemoveNotifications() {
822   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
823   if (!g_thread_watcher_observer_)
824     return;
825   g_thread_watcher_observer_->registrar_.RemoveAll();
826   delete g_thread_watcher_observer_;
827 }
828
829 void ThreadWatcherObserver::Observe(
830     int type,
831     const content::NotificationSource& source,
832     const content::NotificationDetails& details) {
833   // There is some user activity, see if thread watchers are to be awakened.
834   base::TimeTicks now = base::TimeTicks::Now();
835   if ((now - last_wakeup_time_) < wakeup_interval_)
836     return;
837   last_wakeup_time_ = now;
838   WatchDogThread::PostTask(
839       FROM_HERE,
840       base::Bind(&ThreadWatcherList::WakeUpAll));
841 }
842
843 // WatchDogThread methods and members.
844
845 // This lock protects g_watchdog_thread.
846 static base::LazyInstance<base::Lock>::Leaky
847     g_watchdog_lock = LAZY_INSTANCE_INITIALIZER;
848
849 // The singleton of this class.
850 static WatchDogThread* g_watchdog_thread = NULL;
851
852 WatchDogThread::WatchDogThread() : Thread("BrowserWatchdog") {
853 }
854
855 WatchDogThread::~WatchDogThread() {
856   Stop();
857 }
858
859 // static
860 bool WatchDogThread::CurrentlyOnWatchDogThread() {
861   base::AutoLock lock(g_watchdog_lock.Get());
862   return g_watchdog_thread &&
863       g_watchdog_thread->message_loop() == base::MessageLoop::current();
864 }
865
866 // static
867 bool WatchDogThread::PostTask(const tracked_objects::Location& from_here,
868                               const base::Closure& task) {
869   return PostTaskHelper(from_here, task, base::TimeDelta());
870 }
871
872 // static
873 bool WatchDogThread::PostDelayedTask(const tracked_objects::Location& from_here,
874                                      const base::Closure& task,
875                                      base::TimeDelta delay) {
876   return PostTaskHelper(from_here, task, delay);
877 }
878
879 // static
880 bool WatchDogThread::PostTaskHelper(
881     const tracked_objects::Location& from_here,
882     const base::Closure& task,
883     base::TimeDelta delay) {
884   {
885     base::AutoLock lock(g_watchdog_lock.Get());
886
887     base::MessageLoop* message_loop = g_watchdog_thread ?
888         g_watchdog_thread->message_loop() : NULL;
889     if (message_loop) {
890       message_loop->PostDelayedTask(from_here, task, delay);
891       return true;
892     }
893   }
894
895   return false;
896 }
897
898 void WatchDogThread::Init() {
899   // This thread shouldn't be allowed to perform any blocking disk I/O.
900   base::ThreadRestrictions::SetIOAllowed(false);
901
902   base::AutoLock lock(g_watchdog_lock.Get());
903   CHECK(!g_watchdog_thread);
904   g_watchdog_thread = this;
905 }
906
907 void WatchDogThread::CleanUp() {
908   base::AutoLock lock(g_watchdog_lock.Get());
909   g_watchdog_thread = NULL;
910 }
911
912 namespace {
913
914 // StartupWatchDogThread methods and members.
915 //
916 // Class for detecting hangs during startup.
917 class StartupWatchDogThread : public base::Watchdog {
918  public:
919   // Constructor specifies how long the StartupWatchDogThread will wait before
920   // alarming.
921   explicit StartupWatchDogThread(const base::TimeDelta& duration)
922       : base::Watchdog(duration, "Startup watchdog thread", true) {
923   }
924
925   // Alarm is called if the time expires after an Arm() without someone calling
926   // Disarm(). When Alarm goes off, in release mode we get the crash dump
927   // without crashing and in debug mode we break into the debugger.
928   void Alarm() override {
929 #if !defined(NDEBUG)
930     StartupHang();
931     return;
932 #elif !defined(OS_ANDROID)
933     WatchDogThread::PostTask(FROM_HERE, base::Bind(&StartupHang));
934     return;
935 #else
936     // TODO(rtenneti): Enable crashing for Android.
937 #endif  // OS_ANDROID
938   }
939
940  private:
941   DISALLOW_COPY_AND_ASSIGN(StartupWatchDogThread);
942 };
943
944 // ShutdownWatchDogThread methods and members.
945 //
946 // Class for detecting hangs during shutdown.
947 class ShutdownWatchDogThread : public base::Watchdog {
948  public:
949   // Constructor specifies how long the ShutdownWatchDogThread will wait before
950   // alarming.
951   explicit ShutdownWatchDogThread(const base::TimeDelta& duration)
952       : base::Watchdog(duration, "Shutdown watchdog thread", true) {
953   }
954
955   // Alarm is called if the time expires after an Arm() without someone calling
956   // Disarm(). We crash the browser if this method is called.
957   void Alarm() override { ShutdownHang(); }
958
959  private:
960   DISALLOW_COPY_AND_ASSIGN(ShutdownWatchDogThread);
961 };
962 }  // namespace
963
964 // StartupTimeBomb methods and members.
965 //
966 // static
967 StartupTimeBomb* StartupTimeBomb::g_startup_timebomb_ = NULL;
968
969 StartupTimeBomb::StartupTimeBomb()
970     : startup_watchdog_(NULL),
971       thread_id_(base::PlatformThread::CurrentId()) {
972   CHECK(!g_startup_timebomb_);
973   g_startup_timebomb_ = this;
974 }
975
976 StartupTimeBomb::~StartupTimeBomb() {
977   DCHECK(this == g_startup_timebomb_);
978   DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
979   if (startup_watchdog_)
980     Disarm();
981   g_startup_timebomb_ = NULL;
982 }
983
984 void StartupTimeBomb::Arm(const base::TimeDelta& duration) {
985   DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
986   DCHECK(!startup_watchdog_);
987   startup_watchdog_ = new StartupWatchDogThread(duration);
988   startup_watchdog_->Arm();
989   return;
990 }
991
992 void StartupTimeBomb::Disarm() {
993   DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
994   if (startup_watchdog_) {
995     startup_watchdog_->Disarm();
996     startup_watchdog_->Cleanup();
997     DeleteStartupWatchdog();
998   }
999 }
1000
1001 void StartupTimeBomb::DeleteStartupWatchdog() {
1002   DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
1003   if (startup_watchdog_->IsJoinable()) {
1004     // Allow the watchdog thread to shutdown on UI. Watchdog thread shutdowns
1005     // very fast.
1006     base::ThreadRestrictions::SetIOAllowed(true);
1007     delete startup_watchdog_;
1008     startup_watchdog_ = NULL;
1009     return;
1010   }
1011   base::MessageLoop::current()->PostDelayedTask(
1012       FROM_HERE,
1013       base::Bind(&StartupTimeBomb::DeleteStartupWatchdog,
1014                  base::Unretained(this)),
1015       base::TimeDelta::FromSeconds(10));
1016 }
1017
1018 // static
1019 void StartupTimeBomb::DisarmStartupTimeBomb() {
1020   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
1021   if (g_startup_timebomb_)
1022     g_startup_timebomb_->Disarm();
1023 }
1024
1025 // ShutdownWatcherHelper methods and members.
1026 //
1027 // ShutdownWatcherHelper is a wrapper class for detecting hangs during
1028 // shutdown.
1029 ShutdownWatcherHelper::ShutdownWatcherHelper()
1030     : shutdown_watchdog_(NULL),
1031       thread_id_(base::PlatformThread::CurrentId()) {
1032 }
1033
1034 ShutdownWatcherHelper::~ShutdownWatcherHelper() {
1035   DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
1036   if (shutdown_watchdog_) {
1037     shutdown_watchdog_->Disarm();
1038     delete shutdown_watchdog_;
1039     shutdown_watchdog_ = NULL;
1040   }
1041 }
1042
1043 void ShutdownWatcherHelper::Arm(const base::TimeDelta& duration) {
1044   DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
1045   DCHECK(!shutdown_watchdog_);
1046   base::TimeDelta actual_duration = duration;
1047
1048   chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel();
1049   if (channel == chrome::VersionInfo::CHANNEL_STABLE) {
1050     actual_duration *= 20;
1051   } else if (channel == chrome::VersionInfo::CHANNEL_BETA ||
1052              channel == chrome::VersionInfo::CHANNEL_DEV) {
1053     actual_duration *= 10;
1054   }
1055
1056 #if defined(OS_WIN)
1057   // On Windows XP, give twice the time for shutdown.
1058   if (base::win::GetVersion() <= base::win::VERSION_XP)
1059     actual_duration *= 2;
1060 #endif
1061
1062   shutdown_watchdog_ = new ShutdownWatchDogThread(actual_duration);
1063   shutdown_watchdog_->Arm();
1064 }