lockup_detector: Remove old softlockup code
[profile/ivi/kernel-x86-ivi.git] / kernel / nmi_watchdog.c
index 36817b2..a79d211 100644 (file)
@@ -30,6 +30,8 @@ static DEFINE_PER_CPU(struct perf_event *, nmi_watchdog_ev);
 static DEFINE_PER_CPU(int, nmi_watchdog_touch);
 static DEFINE_PER_CPU(long, alert_counter);
 
+static int panic_on_timeout;
+
 void touch_nmi_watchdog(void)
 {
        __raw_get_cpu_var(nmi_watchdog_touch) = 1;
@@ -46,42 +48,34 @@ void touch_all_nmi_watchdog(void)
        touch_softlockup_watchdog();
 }
 
-#ifdef CONFIG_SYSCTL
-/*
- * proc handler for /proc/sys/kernel/nmi_watchdog
- */
-int proc_nmi_enabled(struct ctl_table *table, int write,
-                    void __user *buffer, size_t *length, loff_t *ppos)
+static int __init setup_nmi_watchdog(char *str)
 {
-       int cpu;
-
-       if (per_cpu(nmi_watchdog_ev, smp_processor_id()) == NULL)
-               nmi_watchdog_enabled = 0;
-       else
-               nmi_watchdog_enabled = 1;
-
-       touch_all_nmi_watchdog();
-       proc_dointvec(table, write, buffer, length, ppos);
-       if (nmi_watchdog_enabled)
-               for_each_online_cpu(cpu)
-                       perf_event_enable(per_cpu(nmi_watchdog_ev, cpu));
-       else
-               for_each_online_cpu(cpu)
-                       perf_event_disable(per_cpu(nmi_watchdog_ev, cpu));
-       return 0;
+       if (!strncmp(str, "panic", 5)) {
+               panic_on_timeout = 1;
+               str = strchr(str, ',');
+               if (!str)
+                       return 1;
+               ++str;
+       }
+       return 1;
 }
+__setup("nmi_watchdog=", setup_nmi_watchdog);
 
-#endif /* CONFIG_SYSCTL */
-
-struct perf_event_attr wd_attr = {
-       .type = PERF_TYPE_HARDWARE,
-       .config = PERF_COUNT_HW_CPU_CYCLES,
-       .size = sizeof(struct perf_event_attr),
-       .pinned = 1,
-       .disabled = 1,
+struct perf_event_attr wd_hw_attr = {
+       .type           = PERF_TYPE_HARDWARE,
+       .config         = PERF_COUNT_HW_CPU_CYCLES,
+       .size           = sizeof(struct perf_event_attr),
+       .pinned         = 1,
+       .disabled       = 1,
 };
 
-static int panic_on_timeout;
+struct perf_event_attr wd_sw_attr = {
+       .type           = PERF_TYPE_SOFTWARE,
+       .config         = PERF_COUNT_SW_CPU_CLOCK,
+       .size           = sizeof(struct perf_event_attr),
+       .pinned         = 1,
+       .disabled       = 1,
+};
 
 void wd_overflow(struct perf_event *event, int nmi,
                 struct perf_sample_data *data,
@@ -101,21 +95,104 @@ void wd_overflow(struct perf_event *event, int nmi,
                 * Ayiee, looks like this CPU is stuck ...
                 * wait a few IRQs (5 seconds) before doing the oops ...
                 */
-               per_cpu(alert_counter,cpu) += 1;
-               if (per_cpu(alert_counter,cpu) == 5) {
-                       /*
-                        * die_nmi will return ONLY if NOTIFY_STOP happens..
-                        */
-                       die_nmi("BUG: NMI Watchdog detected LOCKUP",
-                               regs, panic_on_timeout);
+               per_cpu(alert_counter, cpu) += 1;
+               if (per_cpu(alert_counter, cpu) == 5) {
+                       if (panic_on_timeout)
+                               panic("NMI Watchdog detected LOCKUP on cpu %d", cpu);
+                       else
+                               WARN(1, "NMI Watchdog detected LOCKUP on cpu %d", cpu);
                }
        } else {
-               per_cpu(alert_counter,cpu) = 0;
+               per_cpu(alert_counter, cpu) = 0;
        }
 
        return;
 }
 
+static int enable_nmi_watchdog(int cpu)
+{
+       struct perf_event *event;
+       struct perf_event_attr *wd_attr;
+
+       event = per_cpu(nmi_watchdog_ev, cpu);
+       if (event && event->state > PERF_EVENT_STATE_OFF)
+               return 0;
+
+       if (event == NULL) {
+               /* Try to register using hardware perf events first */
+               wd_attr = &wd_hw_attr;
+               wd_attr->sample_period = hw_nmi_get_sample_period();
+               event = perf_event_create_kernel_counter(wd_attr, cpu, -1, wd_overflow);
+               if (IS_ERR(event)) {
+                       /* hardware doesn't exist or not supported, fallback to software events */
+                       printk(KERN_INFO "nmi_watchdog: hardware not available, trying software events\n");
+                       wd_attr = &wd_sw_attr;
+                       wd_attr->sample_period = NSEC_PER_SEC;
+                       event = perf_event_create_kernel_counter(wd_attr, cpu, -1, wd_overflow);
+                       if (IS_ERR(event)) {
+                               printk(KERN_ERR "nmi watchdog failed to create perf event on %i: %p\n", cpu, event);
+                               return -1;
+                       }
+               }
+               per_cpu(nmi_watchdog_ev, cpu) = event;
+       }
+       perf_event_enable(per_cpu(nmi_watchdog_ev, cpu));
+       return 0;
+}
+
+static void disable_nmi_watchdog(int cpu)
+{
+       struct perf_event *event;
+
+       event = per_cpu(nmi_watchdog_ev, cpu);
+       if (event) {
+               perf_event_disable(per_cpu(nmi_watchdog_ev, cpu));
+               per_cpu(nmi_watchdog_ev, cpu) = NULL;
+               perf_event_release_kernel(event);
+       }
+}
+
+#ifdef CONFIG_SYSCTL
+/*
+ * proc handler for /proc/sys/kernel/nmi_watchdog
+ */
+int nmi_watchdog_enabled;
+
+int proc_nmi_enabled(struct ctl_table *table, int write,
+                    void __user *buffer, size_t *length, loff_t *ppos)
+{
+       int cpu;
+
+       if (!write) {
+               struct perf_event *event;
+               for_each_online_cpu(cpu) {
+                       event = per_cpu(nmi_watchdog_ev, cpu);
+                       if (event && event->state > PERF_EVENT_STATE_OFF) {
+                               nmi_watchdog_enabled = 1;
+                               break;
+                       }
+               }
+               proc_dointvec(table, write, buffer, length, ppos);
+               return 0;
+       }
+
+       touch_all_nmi_watchdog();
+       proc_dointvec(table, write, buffer, length, ppos);
+       if (nmi_watchdog_enabled) {
+               for_each_online_cpu(cpu)
+                       if (enable_nmi_watchdog(cpu)) {
+                               printk(KERN_ERR "NMI watchdog failed configuration, "
+                                       " can not be enabled\n");
+                       }
+       } else {
+               for_each_online_cpu(cpu)
+                       disable_nmi_watchdog(cpu);
+       }
+       return 0;
+}
+
+#endif /* CONFIG_SYSCTL */
+
 /*
  * Create/destroy watchdog threads as CPUs come and go:
  */
@@ -123,7 +200,6 @@ static int __cpuinit
 cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 {
        int hotcpu = (unsigned long)hcpu;
-       struct perf_event *event;
 
        switch (action) {
        case CPU_UP_PREPARE:
@@ -132,25 +208,15 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
                break;
        case CPU_ONLINE:
        case CPU_ONLINE_FROZEN:
-               /* originally wanted the below chunk to be in CPU_UP_PREPARE, but caps is unpriv for non-CPU0 */
-               wd_attr.sample_period = cpu_khz * 1000;
-               event = perf_event_create_kernel_counter(&wd_attr, hotcpu, -1, wd_overflow);
-               if (IS_ERR(event)) {
-                       printk(KERN_ERR "nmi watchdog failed to create perf event on %i: %p\n", hotcpu, event);
+               if (enable_nmi_watchdog(hotcpu))
                        return NOTIFY_BAD;
-               }
-               per_cpu(nmi_watchdog_ev, hotcpu) = event;
-               perf_event_enable(per_cpu(nmi_watchdog_ev, hotcpu));
                break;
 #ifdef CONFIG_HOTPLUG_CPU
        case CPU_UP_CANCELED:
        case CPU_UP_CANCELED_FROZEN:
-               perf_event_disable(per_cpu(nmi_watchdog_ev, hotcpu));
+               disable_nmi_watchdog(hotcpu);
        case CPU_DEAD:
        case CPU_DEAD_FROZEN:
-               event = per_cpu(nmi_watchdog_ev, hotcpu);
-               per_cpu(nmi_watchdog_ev, hotcpu) = NULL;
-               perf_event_release_kernel(event);
                break;
 #endif /* CONFIG_HOTPLUG_CPU */
        }
@@ -178,6 +244,8 @@ static int __init spawn_nmi_watchdog_task(void)
        if (nonmi_watchdog)
                return 0;
 
+       printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n");
+
        err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
        if (err == NOTIFY_BAD) {
                BUG();