- add sources.
[platform/framework/web/crosswalk.git] / src / chrome / browser / metrics / thread_watcher.cc
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/metrics/thread_watcher.h"
6
7 #include <math.h>  // ceil
8
9 #include "base/bind.h"
10 #include "base/compiler_specific.h"
11 #include "base/debug/alias.h"
12 #include "base/lazy_instance.h"
13 #include "base/strings/string_number_conversions.h"
14 #include "base/strings/string_split.h"
15 #include "base/strings/string_tokenizer.h"
16 #include "base/strings/stringprintf.h"
17 #include "base/threading/thread_restrictions.h"
18 #include "build/build_config.h"
19 #include "chrome/browser/metrics/metrics_service.h"
20 #include "chrome/common/chrome_switches.h"
21 #include "chrome/common/chrome_version_info.h"
22 #include "chrome/common/dump_without_crashing.h"
23 #include "chrome/common/logging_chrome.h"
24
25 #if defined(OS_WIN)
26 #include "base/win/windows_version.h"
27 #endif
28
29 using content::BrowserThread;
30
31 namespace {
32
33 // The following are unique function names for forcing the crash when a thread
34 // is unresponsive. This makes it possible to tell from the callstack alone what
35 // thread was unresponsive.
36 //
37 // We disable optimizations for this block of functions so the compiler doesn't
38 // merge them all together.
39 MSVC_DISABLE_OPTIMIZE()
40 MSVC_PUSH_DISABLE_WARNING(4748)
41
42 int* NullPointer() {
43   return reinterpret_cast<int*>(NULL);
44 }
45
46 void NullPointerCrash(int line_number) {
47 #ifndef NDEBUG
48   *NullPointer() = line_number;  // Crash.
49 #else
50   logging::DumpWithoutCrashing();
51 #endif
52 }
53
54 NOINLINE void ShutdownCrash() {
55   NullPointerCrash(__LINE__);
56 }
57
58 NOINLINE void ThreadUnresponsive_UI() {
59   NullPointerCrash(__LINE__);
60 }
61
62 NOINLINE void ThreadUnresponsive_DB() {
63   NullPointerCrash(__LINE__);
64 }
65
66 NOINLINE void ThreadUnresponsive_FILE() {
67   NullPointerCrash(__LINE__);
68 }
69
70 NOINLINE void ThreadUnresponsive_FILE_USER_BLOCKING() {
71   NullPointerCrash(__LINE__);
72 }
73
74 NOINLINE void ThreadUnresponsive_PROCESS_LAUNCHER() {
75   NullPointerCrash(__LINE__);
76 }
77
78 NOINLINE void ThreadUnresponsive_CACHE() {
79   NullPointerCrash(__LINE__);
80 }
81
82 NOINLINE void ThreadUnresponsive_IO() {
83   NullPointerCrash(__LINE__);
84 }
85
86 MSVC_POP_WARNING()
87 MSVC_ENABLE_OPTIMIZE();
88
89 void CrashBecauseThreadWasUnresponsive(BrowserThread::ID thread_id) {
90   base::debug::Alias(&thread_id);
91
92   switch (thread_id) {
93     case BrowserThread::UI:
94       return ThreadUnresponsive_UI();
95     case BrowserThread::DB:
96       return ThreadUnresponsive_DB();
97     case BrowserThread::FILE:
98       return ThreadUnresponsive_FILE();
99     case BrowserThread::FILE_USER_BLOCKING:
100       return ThreadUnresponsive_FILE_USER_BLOCKING();
101     case BrowserThread::PROCESS_LAUNCHER:
102       return ThreadUnresponsive_PROCESS_LAUNCHER();
103     case BrowserThread::CACHE:
104       return ThreadUnresponsive_CACHE();
105     case BrowserThread::IO:
106       return ThreadUnresponsive_IO();
107     case BrowserThread::ID_COUNT:
108       CHECK(false);  // This shouldn't actually be reached!
109       break;
110
111     // Omission of the default hander is intentional -- that way the compiler
112     // should warn if our switch becomes outdated.
113   }
114
115   CHECK(false) << "Unknown thread was unresponsive.";  // Shouldn't be reached.
116 }
117
118 }  // namespace
119
120 // ThreadWatcher methods and members.
121 ThreadWatcher::ThreadWatcher(const WatchingParams& params)
122     : thread_id_(params.thread_id),
123       thread_name_(params.thread_name),
124       watched_loop_(
125           BrowserThread::GetMessageLoopProxyForThread(params.thread_id)),
126       sleep_time_(params.sleep_time),
127       unresponsive_time_(params.unresponsive_time),
128       ping_time_(base::TimeTicks::Now()),
129       pong_time_(ping_time_),
130       ping_sequence_number_(0),
131       active_(false),
132       ping_count_(params.unresponsive_threshold),
133       response_time_histogram_(NULL),
134       unresponsive_time_histogram_(NULL),
135       unresponsive_count_(0),
136       hung_processing_complete_(false),
137       unresponsive_threshold_(params.unresponsive_threshold),
138       crash_on_hang_(params.crash_on_hang),
139       live_threads_threshold_(params.live_threads_threshold),
140       weak_ptr_factory_(this) {
141   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
142   Initialize();
143 }
144
145 ThreadWatcher::~ThreadWatcher() {}
146
147 // static
148 void ThreadWatcher::StartWatching(const WatchingParams& params) {
149   DCHECK_GE(params.sleep_time.InMilliseconds(), 0);
150   DCHECK_GE(params.unresponsive_time.InMilliseconds(),
151             params.sleep_time.InMilliseconds());
152
153   // If we are not on WatchDogThread, then post a task to call StartWatching on
154   // WatchDogThread.
155   if (!WatchDogThread::CurrentlyOnWatchDogThread()) {
156     WatchDogThread::PostTask(
157         FROM_HERE,
158         base::Bind(&ThreadWatcher::StartWatching, params));
159     return;
160   }
161
162   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
163
164   // Create a new thread watcher object for the given thread and activate it.
165   ThreadWatcher* watcher = new ThreadWatcher(params);
166
167   DCHECK(watcher);
168   // If we couldn't register the thread watcher object, we are shutting down,
169   // then don't activate thread watching.
170   if (!ThreadWatcherList::IsRegistered(params.thread_id))
171     return;
172   watcher->ActivateThreadWatching();
173 }
174
175 void ThreadWatcher::ActivateThreadWatching() {
176   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
177   if (active_) return;
178   active_ = true;
179   ping_count_ = unresponsive_threshold_;
180   ResetHangCounters();
181   base::MessageLoop::current()->PostTask(
182       FROM_HERE,
183       base::Bind(&ThreadWatcher::PostPingMessage,
184                  weak_ptr_factory_.GetWeakPtr()));
185 }
186
187 void ThreadWatcher::DeActivateThreadWatching() {
188   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
189   active_ = false;
190   ping_count_ = 0;
191   weak_ptr_factory_.InvalidateWeakPtrs();
192 }
193
194 void ThreadWatcher::WakeUp() {
195   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
196   // There is some user activity, PostPingMessage task of thread watcher if
197   // needed.
198   if (!active_) return;
199
200   // Throw away the previous |unresponsive_count_| and start over again. Just
201   // before going to sleep, |unresponsive_count_| could be very close to
202   // |unresponsive_threshold_| and when user becomes active,
203   // |unresponsive_count_| can go over |unresponsive_threshold_| if there was no
204   // response for ping messages. Reset |unresponsive_count_| to start measuring
205   // the unresponsiveness of the threads when system becomes active.
206   unresponsive_count_ = 0;
207
208   if (ping_count_ <= 0) {
209     ping_count_ = unresponsive_threshold_;
210     ResetHangCounters();
211     PostPingMessage();
212   } else {
213     ping_count_ = unresponsive_threshold_;
214   }
215 }
216
217 void ThreadWatcher::PostPingMessage() {
218   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
219   // If we have stopped watching or if the user is idle, then stop sending
220   // ping messages.
221   if (!active_ || ping_count_ <= 0)
222     return;
223
224   // Save the current time when we have sent ping message.
225   ping_time_ = base::TimeTicks::Now();
226
227   // Send a ping message to the watched thread. Callback will be called on
228   // the WatchDogThread.
229   base::Closure callback(
230       base::Bind(&ThreadWatcher::OnPongMessage, weak_ptr_factory_.GetWeakPtr(),
231                  ping_sequence_number_));
232   if (watched_loop_->PostTask(
233           FROM_HERE,
234           base::Bind(&ThreadWatcher::OnPingMessage, thread_id_,
235                      callback))) {
236       // Post a task to check the responsiveness of watched thread.
237       base::MessageLoop::current()->PostDelayedTask(
238           FROM_HERE,
239           base::Bind(&ThreadWatcher::OnCheckResponsiveness,
240                      weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_),
241           unresponsive_time_);
242   } else {
243     // Watched thread might have gone away, stop watching it.
244     DeActivateThreadWatching();
245   }
246 }
247
248 void ThreadWatcher::OnPongMessage(uint64 ping_sequence_number) {
249   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
250
251   // Record watched thread's response time.
252   base::TimeTicks now = base::TimeTicks::Now();
253   base::TimeDelta response_time = now - ping_time_;
254   response_time_histogram_->AddTime(response_time);
255
256   // Save the current time when we have got pong message.
257   pong_time_ = now;
258
259   // Check if there are any extra pings in flight.
260   DCHECK_EQ(ping_sequence_number_, ping_sequence_number);
261   if (ping_sequence_number_ != ping_sequence_number)
262     return;
263
264   // Increment sequence number for the next ping message to indicate watched
265   // thread is responsive.
266   ++ping_sequence_number_;
267
268   // If we have stopped watching or if the user is idle, then stop sending
269   // ping messages.
270   if (!active_ || --ping_count_ <= 0)
271     return;
272
273   base::MessageLoop::current()->PostDelayedTask(
274       FROM_HERE,
275       base::Bind(&ThreadWatcher::PostPingMessage,
276                  weak_ptr_factory_.GetWeakPtr()),
277       sleep_time_);
278 }
279
280 void ThreadWatcher::OnCheckResponsiveness(uint64 ping_sequence_number) {
281   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
282   // If we have stopped watching then consider thread as responding.
283   if (!active_) {
284     responsive_ = true;
285     return;
286   }
287   // If the latest ping_sequence_number_ is not same as the ping_sequence_number
288   // that is passed in, then we can assume OnPongMessage was called.
289   // OnPongMessage increments ping_sequence_number_.
290   if (ping_sequence_number_ != ping_sequence_number) {
291     // Reset unresponsive_count_ to zero because we got a response from the
292     // watched thread.
293     ResetHangCounters();
294
295     responsive_ = true;
296     return;
297   }
298   // Record that we got no response from watched thread.
299   GotNoResponse();
300
301   // Post a task to check the responsiveness of watched thread.
302   base::MessageLoop::current()->PostDelayedTask(
303       FROM_HERE,
304       base::Bind(&ThreadWatcher::OnCheckResponsiveness,
305                  weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_),
306       unresponsive_time_);
307   responsive_ = false;
308 }
309
310 void ThreadWatcher::Initialize() {
311   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
312   ThreadWatcherList::Register(this);
313
314   const std::string response_time_histogram_name =
315       "ThreadWatcher.ResponseTime." + thread_name_;
316   response_time_histogram_ = base::Histogram::FactoryTimeGet(
317       response_time_histogram_name,
318       base::TimeDelta::FromMilliseconds(1),
319       base::TimeDelta::FromSeconds(100), 50,
320       base::Histogram::kUmaTargetedHistogramFlag);
321
322   const std::string unresponsive_time_histogram_name =
323       "ThreadWatcher.Unresponsive." + thread_name_;
324   unresponsive_time_histogram_ = base::Histogram::FactoryTimeGet(
325       unresponsive_time_histogram_name,
326       base::TimeDelta::FromMilliseconds(1),
327       base::TimeDelta::FromSeconds(100), 50,
328       base::Histogram::kUmaTargetedHistogramFlag);
329
330   const std::string responsive_count_histogram_name =
331       "ThreadWatcher.ResponsiveThreads." + thread_name_;
332   responsive_count_histogram_ = base::LinearHistogram::FactoryGet(
333       responsive_count_histogram_name, 1, 10, 11,
334       base::Histogram::kUmaTargetedHistogramFlag);
335
336   const std::string unresponsive_count_histogram_name =
337       "ThreadWatcher.UnresponsiveThreads." + thread_name_;
338   unresponsive_count_histogram_ = base::LinearHistogram::FactoryGet(
339       unresponsive_count_histogram_name, 1, 10, 11,
340       base::Histogram::kUmaTargetedHistogramFlag);
341 }
342
343 // static
344 void ThreadWatcher::OnPingMessage(const BrowserThread::ID& thread_id,
345                                   const base::Closure& callback_task) {
346   // This method is called on watched thread.
347   DCHECK(BrowserThread::CurrentlyOn(thread_id));
348   WatchDogThread::PostTask(FROM_HERE, callback_task);
349 }
350
351 void ThreadWatcher::ResetHangCounters() {
352   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
353   unresponsive_count_ = 0;
354   hung_processing_complete_ = false;
355 }
356
357 void ThreadWatcher::GotNoResponse() {
358   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
359
360   ++unresponsive_count_;
361   if (!IsVeryUnresponsive())
362     return;
363
364   // Record total unresponsive_time since last pong message.
365   base::TimeDelta unresponse_time = base::TimeTicks::Now() - pong_time_;
366   unresponsive_time_histogram_->AddTime(unresponse_time);
367
368   // We have already collected stats for the non-responding watched thread.
369   if (hung_processing_complete_)
370     return;
371
372   // Record how other threads are responding.
373   uint32 responding_thread_count = 0;
374   uint32 unresponding_thread_count = 0;
375   ThreadWatcherList::GetStatusOfThreads(&responding_thread_count,
376                                         &unresponding_thread_count);
377
378   // Record how many watched threads are responding.
379   responsive_count_histogram_->Add(responding_thread_count);
380
381   // Record how many watched threads are not responding.
382   unresponsive_count_histogram_->Add(unresponding_thread_count);
383
384   // Crash the browser if the watched thread is to be crashed on hang and if the
385   // number of other threads responding is less than or equal to
386   // live_threads_threshold_ and at least one other thread is responding.
387   if (crash_on_hang_ &&
388       responding_thread_count > 0 &&
389       responding_thread_count <= live_threads_threshold_) {
390     static bool crashed_once = false;
391     if (!crashed_once) {
392       crashed_once = true;
393       CrashBecauseThreadWasUnresponsive(thread_id_);
394     }
395   }
396
397   hung_processing_complete_ = true;
398 }
399
400 bool ThreadWatcher::IsVeryUnresponsive() {
401   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
402   return unresponsive_count_ >= unresponsive_threshold_;
403 }
404
405 // ThreadWatcherList methods and members.
406 //
407 // static
408 ThreadWatcherList* ThreadWatcherList::g_thread_watcher_list_ = NULL;
409 // static
410 const int ThreadWatcherList::kSleepSeconds = 1;
411 // static
412 const int ThreadWatcherList::kUnresponsiveSeconds = 2;
413 // static
414 const int ThreadWatcherList::kUnresponsiveCount = 9;
415 // static
416 const int ThreadWatcherList::kLiveThreadsThreshold = 2;
417
418 ThreadWatcherList::CrashDataThresholds::CrashDataThresholds(
419     uint32 live_threads_threshold,
420     uint32 unresponsive_threshold)
421     : live_threads_threshold(live_threads_threshold),
422       unresponsive_threshold(unresponsive_threshold) {
423 }
424
425 ThreadWatcherList::CrashDataThresholds::CrashDataThresholds()
426     : live_threads_threshold(kLiveThreadsThreshold),
427       unresponsive_threshold(kUnresponsiveCount) {
428 }
429
430 // static
431 void ThreadWatcherList::StartWatchingAll(const CommandLine& command_line) {
432   // TODO(rtenneti): Enable ThreadWatcher.
433   uint32 unresponsive_threshold;
434   CrashOnHangThreadMap crash_on_hang_threads;
435   ParseCommandLine(command_line,
436                    &unresponsive_threshold,
437                    &crash_on_hang_threads);
438
439   ThreadWatcherObserver::SetupNotifications(
440       base::TimeDelta::FromSeconds(kSleepSeconds * unresponsive_threshold));
441
442   WatchDogThread::PostDelayedTask(
443       FROM_HERE,
444       base::Bind(&ThreadWatcherList::InitializeAndStartWatching,
445                  unresponsive_threshold,
446                  crash_on_hang_threads),
447       base::TimeDelta::FromSeconds(120));
448 }
449
450 // static
451 void ThreadWatcherList::StopWatchingAll() {
452   // TODO(rtenneti): Enable ThreadWatcher.
453   ThreadWatcherObserver::RemoveNotifications();
454   DeleteAll();
455 }
456
457 // static
458 void ThreadWatcherList::Register(ThreadWatcher* watcher) {
459   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
460   if (!g_thread_watcher_list_)
461     return;
462   DCHECK(!g_thread_watcher_list_->Find(watcher->thread_id()));
463   g_thread_watcher_list_->registered_[watcher->thread_id()] = watcher;
464 }
465
466 // static
467 bool ThreadWatcherList::IsRegistered(const BrowserThread::ID thread_id) {
468   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
469   return NULL != ThreadWatcherList::Find(thread_id);
470 }
471
472 // static
473 void ThreadWatcherList::GetStatusOfThreads(uint32* responding_thread_count,
474                                            uint32* unresponding_thread_count) {
475   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
476   *responding_thread_count = 0;
477   *unresponding_thread_count = 0;
478   if (!g_thread_watcher_list_)
479     return;
480
481   for (RegistrationList::iterator it =
482            g_thread_watcher_list_->registered_.begin();
483        g_thread_watcher_list_->registered_.end() != it;
484        ++it) {
485     if (it->second->IsVeryUnresponsive())
486       ++(*unresponding_thread_count);
487     else
488       ++(*responding_thread_count);
489   }
490 }
491
492 // static
493 void ThreadWatcherList::WakeUpAll() {
494   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
495   if (!g_thread_watcher_list_)
496     return;
497
498   for (RegistrationList::iterator it =
499            g_thread_watcher_list_->registered_.begin();
500        g_thread_watcher_list_->registered_.end() != it;
501        ++it)
502     it->second->WakeUp();
503 }
504
505 ThreadWatcherList::ThreadWatcherList() {
506   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
507   CHECK(!g_thread_watcher_list_);
508   g_thread_watcher_list_ = this;
509 }
510
511 ThreadWatcherList::~ThreadWatcherList() {
512   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
513   DCHECK(this == g_thread_watcher_list_);
514   g_thread_watcher_list_ = NULL;
515 }
516
517 // static
518 void ThreadWatcherList::ParseCommandLine(
519     const CommandLine& command_line,
520     uint32* unresponsive_threshold,
521     CrashOnHangThreadMap* crash_on_hang_threads) {
522   // Initialize |unresponsive_threshold| to a default value.
523   *unresponsive_threshold = kUnresponsiveCount;
524
525   // Increase the unresponsive_threshold on the Stable and Beta channels to
526   // reduce the number of crashes due to ThreadWatcher.
527   chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel();
528   if (channel == chrome::VersionInfo::CHANNEL_STABLE) {
529     *unresponsive_threshold *= 4;
530   } else if (channel == chrome::VersionInfo::CHANNEL_BETA) {
531     *unresponsive_threshold *= 2;
532   }
533
534 #if defined(OS_WIN)
535   // For Windows XP (old systems), double the unresponsive_threshold to give
536   // the OS a chance to schedule UI/IO threads a time slice to respond with a
537   // pong message (to get around limitations with the OS).
538   if (base::win::GetVersion() <= base::win::VERSION_XP)
539     *unresponsive_threshold *= 2;
540 #endif
541
542   uint32 crash_seconds = *unresponsive_threshold * kUnresponsiveSeconds;
543   std::string crash_on_hang_thread_names;
544   bool has_command_line_overwrite = false;
545   if (command_line.HasSwitch(switches::kCrashOnHangThreads)) {
546     crash_on_hang_thread_names =
547         command_line.GetSwitchValueASCII(switches::kCrashOnHangThreads);
548     has_command_line_overwrite = true;
549   } else if (channel != chrome::VersionInfo::CHANNEL_STABLE) {
550     // Default to crashing the browser if UI or IO or FILE threads are not
551     // responsive except in stable channel.
552     crash_on_hang_thread_names = base::StringPrintf(
553         "UI:%d:%d,IO:%d:%d,FILE:%d:%d",
554         kLiveThreadsThreshold, crash_seconds,
555         kLiveThreadsThreshold, crash_seconds,
556         kLiveThreadsThreshold, crash_seconds * 5);
557   }
558
559   ParseCommandLineCrashOnHangThreads(crash_on_hang_thread_names,
560                                      kLiveThreadsThreshold,
561                                      crash_seconds,
562                                      crash_on_hang_threads);
563
564   if (channel != chrome::VersionInfo::CHANNEL_CANARY ||
565       has_command_line_overwrite) {
566     return;
567   }
568
569   // Set up a field trial for 100% of the users to crash if either UI or IO
570   // thread is not responsive for 30 seconds (or 15 pings).
571   scoped_refptr<base::FieldTrial> field_trial(
572       base::FieldTrialList::FactoryGetFieldTrial(
573           "ThreadWatcher", 100, "default_hung_threads",
574           2014, 10, 30, base::FieldTrial::SESSION_RANDOMIZED, NULL));
575   int hung_thread_group = field_trial->AppendGroup("hung_thread", 100);
576   if (field_trial->group() == hung_thread_group) {
577     for (CrashOnHangThreadMap::iterator it = crash_on_hang_threads->begin();
578          crash_on_hang_threads->end() != it;
579          ++it) {
580       if (it->first == "FILE")
581         continue;
582       it->second.live_threads_threshold = INT_MAX;
583       if (it->first == "UI") {
584         // TODO(rtenneti): set unresponsive threshold to 120 seconds to catch
585         // the worst UI hangs and for fewer crashes due to ThreadWatcher. Reduce
586         // it to a more reasonable time ala IO thread.
587         it->second.unresponsive_threshold = 60;
588       } else {
589         it->second.unresponsive_threshold = 15;
590       }
591     }
592   }
593 }
594
595 // static
596 void ThreadWatcherList::ParseCommandLineCrashOnHangThreads(
597     const std::string& crash_on_hang_thread_names,
598     uint32 default_live_threads_threshold,
599     uint32 default_crash_seconds,
600     CrashOnHangThreadMap* crash_on_hang_threads) {
601   base::StringTokenizer tokens(crash_on_hang_thread_names, ",");
602   std::vector<std::string> values;
603   while (tokens.GetNext()) {
604     const std::string& token = tokens.token();
605     base::SplitString(token, ':', &values);
606     std::string thread_name = values[0];
607
608     uint32 live_threads_threshold = default_live_threads_threshold;
609     uint32 crash_seconds = default_crash_seconds;
610     if (values.size() >= 2 &&
611         (!base::StringToUint(values[1], &live_threads_threshold))) {
612       continue;
613     }
614     if (values.size() >= 3 &&
615         (!base::StringToUint(values[2], &crash_seconds))) {
616       continue;
617     }
618     uint32 unresponsive_threshold = static_cast<uint32>(
619         ceil(static_cast<float>(crash_seconds) / kUnresponsiveSeconds));
620
621     CrashDataThresholds crash_data(live_threads_threshold,
622                                    unresponsive_threshold);
623     // Use the last specifier.
624     (*crash_on_hang_threads)[thread_name] = crash_data;
625   }
626 }
627
628 // static
629 void ThreadWatcherList::InitializeAndStartWatching(
630     uint32 unresponsive_threshold,
631     const CrashOnHangThreadMap& crash_on_hang_threads) {
632   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
633
634   ThreadWatcherList* thread_watcher_list = new ThreadWatcherList();
635   CHECK(thread_watcher_list);
636
637   BrowserThread::PostTask(
638       BrowserThread::UI,
639       FROM_HERE,
640       base::Bind(&StartupTimeBomb::DisarmStartupTimeBomb));
641
642   const base::TimeDelta kSleepTime =
643       base::TimeDelta::FromSeconds(kSleepSeconds);
644   const base::TimeDelta kUnresponsiveTime =
645       base::TimeDelta::FromSeconds(kUnresponsiveSeconds);
646
647   StartWatching(BrowserThread::UI, "UI", kSleepTime, kUnresponsiveTime,
648                 unresponsive_threshold, crash_on_hang_threads);
649   StartWatching(BrowserThread::IO, "IO", kSleepTime, kUnresponsiveTime,
650                 unresponsive_threshold, crash_on_hang_threads);
651   StartWatching(BrowserThread::DB, "DB", kSleepTime, kUnresponsiveTime,
652                 unresponsive_threshold, crash_on_hang_threads);
653   StartWatching(BrowserThread::FILE, "FILE", kSleepTime, kUnresponsiveTime,
654                 unresponsive_threshold, crash_on_hang_threads);
655   StartWatching(BrowserThread::CACHE, "CACHE", kSleepTime, kUnresponsiveTime,
656                 unresponsive_threshold, crash_on_hang_threads);
657 }
658
659 // static
660 void ThreadWatcherList::StartWatching(
661     const BrowserThread::ID& thread_id,
662     const std::string& thread_name,
663     const base::TimeDelta& sleep_time,
664     const base::TimeDelta& unresponsive_time,
665     uint32 unresponsive_threshold,
666     const CrashOnHangThreadMap& crash_on_hang_threads) {
667   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
668
669   CrashOnHangThreadMap::const_iterator it =
670       crash_on_hang_threads.find(thread_name);
671   bool crash_on_hang = false;
672   uint32 live_threads_threshold = 0;
673   if (it != crash_on_hang_threads.end()) {
674     crash_on_hang = true;
675     live_threads_threshold = it->second.live_threads_threshold;
676     unresponsive_threshold = it->second.unresponsive_threshold;
677   }
678
679   ThreadWatcher::StartWatching(
680       ThreadWatcher::WatchingParams(thread_id,
681                                     thread_name,
682                                     sleep_time,
683                                     unresponsive_time,
684                                     unresponsive_threshold,
685                                     crash_on_hang,
686                                     live_threads_threshold));
687 }
688
689 // static
690 void ThreadWatcherList::DeleteAll() {
691   if (!WatchDogThread::CurrentlyOnWatchDogThread()) {
692     WatchDogThread::PostTask(
693         FROM_HERE,
694         base::Bind(&ThreadWatcherList::DeleteAll));
695     return;
696   }
697
698   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
699   if (!g_thread_watcher_list_)
700     return;
701
702   // Delete all thread watcher objects.
703   while (!g_thread_watcher_list_->registered_.empty()) {
704     RegistrationList::iterator it = g_thread_watcher_list_->registered_.begin();
705     delete it->second;
706     g_thread_watcher_list_->registered_.erase(it);
707   }
708
709   delete g_thread_watcher_list_;
710 }
711
712 // static
713 ThreadWatcher* ThreadWatcherList::Find(const BrowserThread::ID& thread_id) {
714   DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
715   if (!g_thread_watcher_list_)
716     return NULL;
717   RegistrationList::iterator it =
718       g_thread_watcher_list_->registered_.find(thread_id);
719   if (g_thread_watcher_list_->registered_.end() == it)
720     return NULL;
721   return it->second;
722 }
723
724 // ThreadWatcherObserver methods and members.
725 //
726 // static
727 ThreadWatcherObserver* ThreadWatcherObserver::g_thread_watcher_observer_ = NULL;
728
729 ThreadWatcherObserver::ThreadWatcherObserver(
730     const base::TimeDelta& wakeup_interval)
731     : last_wakeup_time_(base::TimeTicks::Now()),
732       wakeup_interval_(wakeup_interval) {
733   CHECK(!g_thread_watcher_observer_);
734   g_thread_watcher_observer_ = this;
735 }
736
737 ThreadWatcherObserver::~ThreadWatcherObserver() {
738   DCHECK(this == g_thread_watcher_observer_);
739   g_thread_watcher_observer_ = NULL;
740 }
741
742 // static
743 void ThreadWatcherObserver::SetupNotifications(
744     const base::TimeDelta& wakeup_interval) {
745   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
746   ThreadWatcherObserver* observer = new ThreadWatcherObserver(wakeup_interval);
747   MetricsService::SetUpNotifications(&observer->registrar_, observer);
748 }
749
750 // static
751 void ThreadWatcherObserver::RemoveNotifications() {
752   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
753   if (!g_thread_watcher_observer_)
754     return;
755   g_thread_watcher_observer_->registrar_.RemoveAll();
756   delete g_thread_watcher_observer_;
757 }
758
759 void ThreadWatcherObserver::Observe(
760     int type,
761     const content::NotificationSource& source,
762     const content::NotificationDetails& details) {
763   // There is some user activity, see if thread watchers are to be awakened.
764   base::TimeTicks now = base::TimeTicks::Now();
765   if ((now - last_wakeup_time_) < wakeup_interval_)
766     return;
767   last_wakeup_time_ = now;
768   WatchDogThread::PostTask(
769       FROM_HERE,
770       base::Bind(&ThreadWatcherList::WakeUpAll));
771 }
772
773 // WatchDogThread methods and members.
774
775 // This lock protects g_watchdog_thread.
776 static base::LazyInstance<base::Lock>::Leaky
777     g_watchdog_lock = LAZY_INSTANCE_INITIALIZER;
778
779 // The singleton of this class.
780 static WatchDogThread* g_watchdog_thread = NULL;
781
782 WatchDogThread::WatchDogThread() : Thread("BrowserWatchdog") {
783 }
784
785 WatchDogThread::~WatchDogThread() {
786   Stop();
787 }
788
789 // static
790 bool WatchDogThread::CurrentlyOnWatchDogThread() {
791   base::AutoLock lock(g_watchdog_lock.Get());
792   return g_watchdog_thread &&
793       g_watchdog_thread->message_loop() == base::MessageLoop::current();
794 }
795
796 // static
797 bool WatchDogThread::PostTask(const tracked_objects::Location& from_here,
798                               const base::Closure& task) {
799   return PostTaskHelper(from_here, task, base::TimeDelta());
800 }
801
802 // static
803 bool WatchDogThread::PostDelayedTask(const tracked_objects::Location& from_here,
804                                      const base::Closure& task,
805                                      base::TimeDelta delay) {
806   return PostTaskHelper(from_here, task, delay);
807 }
808
809 // static
810 bool WatchDogThread::PostTaskHelper(
811     const tracked_objects::Location& from_here,
812     const base::Closure& task,
813     base::TimeDelta delay) {
814   {
815     base::AutoLock lock(g_watchdog_lock.Get());
816
817     base::MessageLoop* message_loop = g_watchdog_thread ?
818         g_watchdog_thread->message_loop() : NULL;
819     if (message_loop) {
820       message_loop->PostDelayedTask(from_here, task, delay);
821       return true;
822     }
823   }
824
825   return false;
826 }
827
828 void WatchDogThread::Init() {
829   // This thread shouldn't be allowed to perform any blocking disk I/O.
830   base::ThreadRestrictions::SetIOAllowed(false);
831
832   base::AutoLock lock(g_watchdog_lock.Get());
833   CHECK(!g_watchdog_thread);
834   g_watchdog_thread = this;
835 }
836
837 void WatchDogThread::CleanUp() {
838   base::AutoLock lock(g_watchdog_lock.Get());
839   g_watchdog_thread = NULL;
840 }
841
842 namespace {
843
844 // StartupWatchDogThread methods and members.
845 //
846 // Class for detecting hangs during startup.
847 class StartupWatchDogThread : public base::Watchdog {
848  public:
849   // Constructor specifies how long the StartupWatchDogThread will wait before
850   // alarming.
851   explicit StartupWatchDogThread(const base::TimeDelta& duration)
852       : base::Watchdog(duration, "Startup watchdog thread", true) {
853   }
854
855   // Alarm is called if the time expires after an Arm() without someone calling
856   // Disarm(). When Alarm goes off, in release mode we get the crash dump
857   // without crashing and in debug mode we break into the debugger.
858   virtual void Alarm() OVERRIDE {
859 #ifndef NDEBUG
860     DCHECK(false);
861 #else
862     logging::DumpWithoutCrashing();
863 #endif
864   }
865
866   DISALLOW_COPY_AND_ASSIGN(StartupWatchDogThread);
867 };
868
869 // ShutdownWatchDogThread methods and members.
870 //
871 // Class for detecting hangs during shutdown.
872 class ShutdownWatchDogThread : public base::Watchdog {
873  public:
874   // Constructor specifies how long the ShutdownWatchDogThread will wait before
875   // alarming.
876   explicit ShutdownWatchDogThread(const base::TimeDelta& duration)
877       : base::Watchdog(duration, "Shutdown watchdog thread", true) {
878   }
879
880   // Alarm is called if the time expires after an Arm() without someone calling
881   // Disarm(). We crash the browser if this method is called.
882   virtual void Alarm() OVERRIDE {
883     ShutdownCrash();
884   }
885
886   DISALLOW_COPY_AND_ASSIGN(ShutdownWatchDogThread);
887 };
888 }  // namespace
889
890 // StartupTimeBomb methods and members.
891 //
892 // static
893 StartupTimeBomb* StartupTimeBomb::g_startup_timebomb_ = NULL;
894
895 StartupTimeBomb::StartupTimeBomb()
896     : startup_watchdog_(NULL),
897       thread_id_(base::PlatformThread::CurrentId()) {
898   CHECK(!g_startup_timebomb_);
899   g_startup_timebomb_ = this;
900 }
901
902 StartupTimeBomb::~StartupTimeBomb() {
903   DCHECK(this == g_startup_timebomb_);
904   DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
905   if (startup_watchdog_)
906     Disarm();
907   g_startup_timebomb_ = NULL;
908 }
909
910 void StartupTimeBomb::Arm(const base::TimeDelta& duration) {
911   DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
912   DCHECK(!startup_watchdog_);
913   startup_watchdog_ = new StartupWatchDogThread(duration);
914   startup_watchdog_->Arm();
915   return;
916 }
917
918 void StartupTimeBomb::Disarm() {
919   DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
920   if (startup_watchdog_) {
921     startup_watchdog_->Disarm();
922     startup_watchdog_->Cleanup();
923     DeleteStartupWatchdog();
924   }
925 }
926
927 void StartupTimeBomb::DeleteStartupWatchdog() {
928   DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
929   if (startup_watchdog_->IsJoinable()) {
930     // Allow the watchdog thread to shutdown on UI. Watchdog thread shutdowns
931     // very fast.
932     base::ThreadRestrictions::SetIOAllowed(true);
933     delete startup_watchdog_;
934     startup_watchdog_ = NULL;
935     return;
936   }
937   base::MessageLoop::current()->PostDelayedTask(
938       FROM_HERE,
939       base::Bind(&StartupTimeBomb::DeleteStartupWatchdog,
940                  base::Unretained(this)),
941       base::TimeDelta::FromSeconds(10));
942 }
943
944 // static
945 void StartupTimeBomb::DisarmStartupTimeBomb() {
946   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
947   if (g_startup_timebomb_)
948     g_startup_timebomb_->Disarm();
949 }
950
951 // ShutdownWatcherHelper methods and members.
952 //
953 // ShutdownWatcherHelper is a wrapper class for detecting hangs during
954 // shutdown.
955 ShutdownWatcherHelper::ShutdownWatcherHelper()
956     : shutdown_watchdog_(NULL),
957       thread_id_(base::PlatformThread::CurrentId()) {
958 }
959
960 ShutdownWatcherHelper::~ShutdownWatcherHelper() {
961   DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
962   if (shutdown_watchdog_) {
963     shutdown_watchdog_->Disarm();
964     delete shutdown_watchdog_;
965     shutdown_watchdog_ = NULL;
966   }
967 }
968
969 void ShutdownWatcherHelper::Arm(const base::TimeDelta& duration) {
970   DCHECK_EQ(thread_id_, base::PlatformThread::CurrentId());
971   DCHECK(!shutdown_watchdog_);
972   base::TimeDelta actual_duration = duration;
973
974   chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel();
975   if (channel == chrome::VersionInfo::CHANNEL_STABLE) {
976     actual_duration *= 20;
977   } else if (channel == chrome::VersionInfo::CHANNEL_BETA ||
978              channel == chrome::VersionInfo::CHANNEL_DEV) {
979     actual_duration *= 10;
980   }
981
982 #if defined(OS_WIN)
983   // On Windows XP, give twice the time for shutdown.
984   if (base::win::GetVersion() <= base::win::VERSION_XP)
985     actual_duration *= 2;
986 #endif
987
988   shutdown_watchdog_ = new ShutdownWatchDogThread(actual_duration);
989   shutdown_watchdog_->Arm();
990 }