Merge branch 'release' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>
Mon, 18 Feb 2013 21:34:11 +0000 (22:34 +0100)
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>
Mon, 18 Feb 2013 21:34:11 +0000 (22:34 +0100)
* 'release' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux: (35 commits)
  PM idle: remove global declaration of pm_idle
  unicore32 idle: delete stray pm_idle comment
  openrisc idle: delete pm_idle
  mn10300 idle: delete pm_idle
  microblaze idle: delete pm_idle
  m32r idle: delete pm_idle, and other dead idle code
  ia64 idle: delete pm_idle
  cris idle: delete idle and pm_idle
  ARM64 idle: delete pm_idle
  ARM idle: delete pm_idle
  blackfin idle: delete pm_idle
  sparc idle: rename pm_idle to sparc_idle
  sh idle: rename global pm_idle to static sh_idle
  x86 idle: rename global pm_idle to static x86_idle
  APM idle: register apm_cpu_idle via cpuidle
  tools/power turbostat: display SMI count by default
  intel_idle: export both C1 and C1E
  cpuidle: remove vestage definition of cpuidle_state_usage.driver_data
  x86 idle: remove 32-bit-only "no-hlt" parameter, hlt_works_ok flag
  x86 idle: remove mwait_idle() and "idle=mwait" cmdline param
  ...

Conflicts:
arch/x86/kernel/process.c (with PM / tracing commit 43720bd)
drivers/acpi/processor_idle.c (with ACPICA commit 4f84291)

35 files changed:
Documentation/kernel-parameters.txt
arch/arm/kernel/process.c
arch/arm/mach-davinci/cpuidle.c
arch/arm64/kernel/process.c
arch/blackfin/kernel/process.c
arch/cris/kernel/process.c
arch/ia64/kernel/process.c
arch/ia64/kernel/setup.c
arch/m32r/kernel/process.c
arch/microblaze/kernel/process.c
arch/mn10300/kernel/process.c
arch/openrisc/kernel/idle.c
arch/sh/kernel/idle.c
arch/sparc/include/asm/processor_32.h
arch/sparc/kernel/apc.c
arch/sparc/kernel/leon_pmc.c
arch/sparc/kernel/pmc.c
arch/sparc/kernel/process_32.c
arch/unicore32/kernel/process.c
arch/x86/Kconfig
arch/x86/include/asm/mwait.h
arch/x86/include/asm/processor.h
arch/x86/include/uapi/asm/msr-index.h
arch/x86/kernel/apm_32.c
arch/x86/kernel/cpu/bugs.c
arch/x86/kernel/cpu/proc.c
arch/x86/kernel/process.c
arch/x86/kernel/smpboot.c
arch/x86/xen/setup.c
drivers/acpi/processor_idle.c
drivers/idle/intel_idle.c
include/linux/cpuidle.h
include/linux/pm.h
tools/power/x86/turbostat/turbostat.8
tools/power/x86/turbostat/turbostat.c

index 41c5d9e..4c5b3f9 100644 (file)
@@ -1039,16 +1039,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        Claim all unknown PCI IDE storage controllers.
 
        idle=           [X86]
-                       Format: idle=poll, idle=mwait, idle=halt, idle=nomwait
+                       Format: idle=poll, idle=halt, idle=nomwait
                        Poll forces a polling idle loop that can slightly
                        improve the performance of waking up a idle CPU, but
                        will use a lot of power and make the system run hot.
                        Not recommended.
-                       idle=mwait: On systems which support MONITOR/MWAIT but
-                       the kernel chose to not use it because it doesn't save
-                       as much power as a normal idle loop, use the
-                       MONITOR/MWAIT idle loop anyways. Performance should be
-                       the same as idle=poll.
                        idle=halt: Halt is forced to be used for CPU idle.
                        In such case C2/C3 won't be used again.
                        idle=nomwait: Disable mwait for CPU C-states
@@ -1891,10 +1886,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        wfi(ARM) instruction doesn't work correctly and not to
                        use it. This is also useful when using JTAG debugger.
 
-       no-hlt          [BUGS=X86-32] Tells the kernel that the hlt
-                       instruction doesn't work correctly and not to
-                       use it.
-
        no_file_caps    Tells the kernel not to honor file capabilities.  The
                        only way then for a file to be executed with privilege
                        is to be setuid root or executed by root.
index c6dec5f..047d3e4 100644 (file)
@@ -172,14 +172,9 @@ static void default_idle(void)
        local_irq_enable();
 }
 
-void (*pm_idle)(void) = default_idle;
-EXPORT_SYMBOL(pm_idle);
-
 /*
- * The idle thread, has rather strange semantics for calling pm_idle,
- * but this is what x86 does and we need to do the same, so that
- * things like cpuidle get called in the same way.  The only difference
- * is that we always respect 'hlt_counter' to prevent low power idle.
+ * The idle thread.
+ * We always respect 'hlt_counter' to prevent low power idle.
  */
 void cpu_idle(void)
 {
@@ -210,10 +205,10 @@ void cpu_idle(void)
                        } else if (!need_resched()) {
                                stop_critical_timings();
                                if (cpuidle_idle_call())
-                                       pm_idle();
+                                       default_idle();
                                start_critical_timings();
                                /*
-                                * pm_idle functions must always
+                                * default_idle functions must always
                                 * return with IRQs enabled.
                                 */
                                WARN_ON(irqs_disabled());
index 9107691..5ac9e93 100644 (file)
 
 #define DAVINCI_CPUIDLE_MAX_STATES     2
 
-struct davinci_ops {
-       void (*enter) (u32 flags);
-       void (*exit) (u32 flags);
-       u32 flags;
-};
+static DEFINE_PER_CPU(struct cpuidle_device, davinci_cpuidle_device);
+static void __iomem *ddr2_reg_base;
+static bool ddr2_pdown;
+
+static void davinci_save_ddr_power(int enter, bool pdown)
+{
+       u32 val;
+
+       val = __raw_readl(ddr2_reg_base + DDR2_SDRCR_OFFSET);
+
+       if (enter) {
+               if (pdown)
+                       val |= DDR2_SRPD_BIT;
+               else
+                       val &= ~DDR2_SRPD_BIT;
+               val |= DDR2_LPMODEN_BIT;
+       } else {
+               val &= ~(DDR2_SRPD_BIT | DDR2_LPMODEN_BIT);
+       }
+
+       __raw_writel(val, ddr2_reg_base + DDR2_SDRCR_OFFSET);
+}
 
 /* Actual code that puts the SoC in different idle states */
 static int davinci_enter_idle(struct cpuidle_device *dev,
                                struct cpuidle_driver *drv,
                                                int index)
 {
-       struct cpuidle_state_usage *state_usage = &dev->states_usage[index];
-       struct davinci_ops *ops = cpuidle_get_statedata(state_usage);
-
-       if (ops && ops->enter)
-               ops->enter(ops->flags);
+       davinci_save_ddr_power(1, ddr2_pdown);
 
        index = cpuidle_wrap_enter(dev, drv, index,
                                arm_cpuidle_simple_enter);
 
-       if (ops && ops->exit)
-               ops->exit(ops->flags);
+       davinci_save_ddr_power(0, ddr2_pdown);
 
        return index;
 }
 
-/* fields in davinci_ops.flags */
-#define DAVINCI_CPUIDLE_FLAGS_DDR2_PWDN        BIT(0)
-
 static struct cpuidle_driver davinci_idle_driver = {
        .name                   = "cpuidle-davinci",
        .owner                  = THIS_MODULE,
@@ -70,45 +79,6 @@ static struct cpuidle_driver davinci_idle_driver = {
        .state_count = DAVINCI_CPUIDLE_MAX_STATES,
 };
 
-static DEFINE_PER_CPU(struct cpuidle_device, davinci_cpuidle_device);
-static void __iomem *ddr2_reg_base;
-
-static void davinci_save_ddr_power(int enter, bool pdown)
-{
-       u32 val;
-
-       val = __raw_readl(ddr2_reg_base + DDR2_SDRCR_OFFSET);
-
-       if (enter) {
-               if (pdown)
-                       val |= DDR2_SRPD_BIT;
-               else
-                       val &= ~DDR2_SRPD_BIT;
-               val |= DDR2_LPMODEN_BIT;
-       } else {
-               val &= ~(DDR2_SRPD_BIT | DDR2_LPMODEN_BIT);
-       }
-
-       __raw_writel(val, ddr2_reg_base + DDR2_SDRCR_OFFSET);
-}
-
-static void davinci_c2state_enter(u32 flags)
-{
-       davinci_save_ddr_power(1, !!(flags & DAVINCI_CPUIDLE_FLAGS_DDR2_PWDN));
-}
-
-static void davinci_c2state_exit(u32 flags)
-{
-       davinci_save_ddr_power(0, !!(flags & DAVINCI_CPUIDLE_FLAGS_DDR2_PWDN));
-}
-
-static struct davinci_ops davinci_states[DAVINCI_CPUIDLE_MAX_STATES] = {
-       [1] = {
-               .enter  = davinci_c2state_enter,
-               .exit   = davinci_c2state_exit,
-       },
-};
-
 static int __init davinci_cpuidle_probe(struct platform_device *pdev)
 {
        int ret;
@@ -124,11 +94,7 @@ static int __init davinci_cpuidle_probe(struct platform_device *pdev)
 
        ddr2_reg_base = pdata->ddr2_ctlr_base;
 
-       if (pdata->ddr2_pdown)
-               davinci_states[1].flags |= DAVINCI_CPUIDLE_FLAGS_DDR2_PWDN;
-       cpuidle_set_statedata(&device->states_usage[1], &davinci_states[1]);
-
-       device->state_count = DAVINCI_CPUIDLE_MAX_STATES;
+       ddr2_pdown = pdata->ddr2_pdown;
 
        ret = cpuidle_register_driver(&davinci_idle_driver);
        if (ret) {
index cb0956b..c7002d4 100644 (file)
@@ -97,14 +97,9 @@ static void default_idle(void)
        local_irq_enable();
 }
 
-void (*pm_idle)(void) = default_idle;
-EXPORT_SYMBOL_GPL(pm_idle);
-
 /*
- * The idle thread, has rather strange semantics for calling pm_idle,
- * but this is what x86 does and we need to do the same, so that
- * things like cpuidle get called in the same way.  The only difference
- * is that we always respect 'hlt_counter' to prevent low power idle.
+ * The idle thread.
+ * We always respect 'hlt_counter' to prevent low power idle.
  */
 void cpu_idle(void)
 {
@@ -122,10 +117,10 @@ void cpu_idle(void)
                        local_irq_disable();
                        if (!need_resched()) {
                                stop_critical_timings();
-                               pm_idle();
+                               default_idle();
                                start_critical_timings();
                                /*
-                                * pm_idle functions should always return
+                                * default_idle functions should always return
                                 * with IRQs enabled.
                                 */
                                WARN_ON(irqs_disabled());
index 3e16ad9..8061426 100644 (file)
@@ -39,12 +39,6 @@ int nr_l1stack_tasks;
 void *l1_stack_base;
 unsigned long l1_stack_len;
 
-/*
- * Powermanagement idle function, if any..
- */
-void (*pm_idle)(void) = NULL;
-EXPORT_SYMBOL(pm_idle);
-
 void (*pm_power_off)(void) = NULL;
 EXPORT_SYMBOL(pm_power_off);
 
@@ -81,7 +75,6 @@ void cpu_idle(void)
 {
        /* endless idle loop with no priority at all */
        while (1) {
-               void (*idle)(void) = pm_idle;
 
 #ifdef CONFIG_HOTPLUG_CPU
                if (cpu_is_offline(smp_processor_id()))
index 7f65be6..104ff4d 100644 (file)
@@ -54,11 +54,6 @@ void enable_hlt(void)
 
 EXPORT_SYMBOL(enable_hlt);
  
-/*
- * The following aren't currently used.
- */
-void (*pm_idle)(void);
-
 extern void default_idle(void);
 
 void (*pm_power_off)(void);
@@ -77,16 +72,12 @@ void cpu_idle (void)
        while (1) {
                rcu_idle_enter();
                while (!need_resched()) {
-                       void (*idle)(void);
                        /*
                         * Mark this as an RCU critical section so that
                         * synchronize_kernel() in the unload path waits
                         * for our completion.
                         */
-                       idle = pm_idle;
-                       if (!idle)
-                               idle = default_idle;
-                       idle();
+                       default_idle();
                }
                rcu_idle_exit();
                schedule_preempt_disabled();
index 31360cb..e34f565 100644 (file)
@@ -57,8 +57,6 @@ void (*ia64_mark_idle)(int);
 
 unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
 EXPORT_SYMBOL(boot_option_idle_override);
-void (*pm_idle) (void);
-EXPORT_SYMBOL(pm_idle);
 void (*pm_power_off) (void);
 EXPORT_SYMBOL(pm_power_off);
 
@@ -301,7 +299,6 @@ cpu_idle (void)
                        if (mark_idle)
                                (*mark_idle)(1);
 
-                       idle = pm_idle;
                        if (!idle)
                                idle = default_idle;
                        (*idle)();
index aaefd9b..2029cc0 100644 (file)
@@ -1051,7 +1051,6 @@ cpu_init (void)
                max_num_phys_stacked = num_phys_stacked;
        }
        platform_cpu_init();
-       pm_idle = default_idle;
 }
 
 void __init
index 765d0f5..bde899e 100644 (file)
@@ -44,36 +44,10 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
        return tsk->thread.lr;
 }
 
-/*
- * Powermanagement idle function, if any..
- */
-static void (*pm_idle)(void) = NULL;
-
 void (*pm_power_off)(void) = NULL;
 EXPORT_SYMBOL(pm_power_off);
 
 /*
- * We use this is we don't have any better
- * idle routine..
- */
-static void default_idle(void)
-{
-       /* M32R_FIXME: Please use "cpu_sleep" mode.  */
-       cpu_relax();
-}
-
-/*
- * On SMP it's slightly faster (but much more power-consuming!)
- * to poll the ->work.need_resched flag instead of waiting for the
- * cross-CPU IPI to arrive. Use this option with caution.
- */
-static void poll_idle (void)
-{
-       /* M32R_FIXME */
-       cpu_relax();
-}
-
-/*
  * The idle thread. There's no useful work to be
  * done, so just try to conserve power and have a
  * low exit latency (ie sit in a loop waiting for
@@ -84,14 +58,8 @@ void cpu_idle (void)
        /* endless idle loop with no priority at all */
        while (1) {
                rcu_idle_enter();
-               while (!need_resched()) {
-                       void (*idle)(void) = pm_idle;
-
-                       if (!idle)
-                               idle = default_idle;
-
-                       idle();
-               }
+               while (!need_resched())
+                       cpu_relax();
                rcu_idle_exit();
                schedule_preempt_disabled();
        }
@@ -120,21 +88,6 @@ void machine_power_off(void)
        /* M32R_FIXME */
 }
 
-static int __init idle_setup (char *str)
-{
-       if (!strncmp(str, "poll", 4)) {
-               printk("using poll in idle threads.\n");
-               pm_idle = poll_idle;
-       } else if (!strncmp(str, "sleep", 4)) {
-               printk("using sleep in idle threads.\n");
-               pm_idle = default_idle;
-       }
-
-       return 1;
-}
-
-__setup("idle=", idle_setup);
-
 void show_regs(struct pt_regs * regs)
 {
        printk("\n");
index a5b74f7..6ff2dcf 100644 (file)
@@ -41,7 +41,6 @@ void show_regs(struct pt_regs *regs)
                                regs->msr, regs->ear, regs->esr, regs->fsr);
 }
 
-void (*pm_idle)(void);
 void (*pm_power_off)(void) = NULL;
 EXPORT_SYMBOL(pm_power_off);
 
@@ -98,8 +97,6 @@ void cpu_idle(void)
 
        /* endless idle loop with no priority at all */
        while (1) {
-               void (*idle)(void) = pm_idle;
-
                if (!idle)
                        idle = default_idle;
 
index eb09f5a..84f4e97 100644 (file)
 #include "internal.h"
 
 /*
- * power management idle function, if any..
- */
-void (*pm_idle)(void);
-EXPORT_SYMBOL(pm_idle);
-
-/*
  * return saved PC of a blocked thread.
  */
 unsigned long thread_saved_pc(struct task_struct *tsk)
@@ -113,7 +107,6 @@ void cpu_idle(void)
                        void (*idle)(void);
 
                        smp_rmb();
-                       idle = pm_idle;
                        if (!idle) {
 #if defined(CONFIG_SMP) && !defined(CONFIG_HOTPLUG_CPU)
                                idle = poll_idle;
index 7d618fe..5e8a3b6 100644 (file)
 
 void (*powersave) (void) = NULL;
 
-static inline void pm_idle(void)
-{
-       barrier();
-}
-
 void cpu_idle(void)
 {
        set_thread_flag(TIF_POLLING_NRFLAG);
index 0c91016..3d5a1b3 100644 (file)
@@ -22,7 +22,7 @@
 #include <asm/smp.h>
 #include <asm/bl_bit.h>
 
-void (*pm_idle)(void);
+static void (*sh_idle)(void);
 
 static int hlt_counter;
 
@@ -103,9 +103,9 @@ void cpu_idle(void)
                        /* Don't trace irqs off for idle */
                        stop_critical_timings();
                        if (cpuidle_idle_call())
-                               pm_idle();
+                               sh_idle();
                        /*
-                        * Sanity check to ensure that pm_idle() returns
+                        * Sanity check to ensure that sh_idle() returns
                         * with IRQs enabled
                         */
                        WARN_ON(irqs_disabled());
@@ -123,13 +123,13 @@ void __init select_idle_routine(void)
        /*
         * If a platform has set its own idle routine, leave it alone.
         */
-       if (pm_idle)
+       if (sh_idle)
                return;
 
        if (hlt_works())
-               pm_idle = default_idle;
+               sh_idle = default_idle;
        else
-               pm_idle = poll_idle;
+               sh_idle = poll_idle;
 }
 
 void stop_this_cpu(void *unused)
index c1e0191..2c7baa4 100644 (file)
@@ -118,6 +118,7 @@ extern unsigned long get_wchan(struct task_struct *);
 extern struct task_struct *last_task_used_math;
 
 #define cpu_relax()    barrier()
+extern void (*sparc_idle)(void);
 
 #endif
 
index 348fa1a..eefda32 100644 (file)
@@ -20,6 +20,7 @@
 #include <asm/uaccess.h>
 #include <asm/auxio.h>
 #include <asm/apc.h>
+#include <asm/processor.h>
 
 /* Debugging
  * 
@@ -158,7 +159,7 @@ static int apc_probe(struct platform_device *op)
 
        /* Assign power management IDLE handler */
        if (!apc_no_idle)
-               pm_idle = apc_swift_idle;       
+               sparc_idle = apc_swift_idle;
 
        printk(KERN_INFO "%s: power management initialized%s\n", 
               APC_DEVNAME, apc_no_idle ? " (CPU idle disabled)" : "");
index 4e17432..708bca4 100644 (file)
@@ -9,6 +9,7 @@
 #include <asm/leon_amba.h>
 #include <asm/cpu_type.h>
 #include <asm/leon.h>
+#include <asm/processor.h>
 
 /* List of Systems that need fixup instructions around power-down instruction */
 unsigned int pmc_leon_fixup_ids[] = {
@@ -69,9 +70,9 @@ static int __init leon_pmc_install(void)
        if (sparc_cpu_model == sparc_leon) {
                /* Assign power management IDLE handler */
                if (pmc_leon_need_fixup())
-                       pm_idle = pmc_leon_idle_fixup;
+                       sparc_idle = pmc_leon_idle_fixup;
                else
-                       pm_idle = pmc_leon_idle;
+                       sparc_idle = pmc_leon_idle;
 
                printk(KERN_INFO "leon: power management initialized\n");
        }
index dcbb62f..8b7297f 100644 (file)
@@ -17,6 +17,7 @@
 #include <asm/oplib.h>
 #include <asm/uaccess.h>
 #include <asm/auxio.h>
+#include <asm/processor.h>
 
 /* Debug
  *
@@ -63,7 +64,7 @@ static int pmc_probe(struct platform_device *op)
 
 #ifndef PMC_NO_IDLE
        /* Assign power management IDLE handler */
-       pm_idle = pmc_swift_idle;
+       sparc_idle = pmc_swift_idle;
 #endif
 
        printk(KERN_INFO "%s: power management initialized\n", PMC_DEVNAME);
index be8e862..62eede1 100644 (file)
@@ -43,8 +43,7 @@
  * Power management idle function 
  * Set in pm platform drivers (apc.c and pmc.c)
  */
-void (*pm_idle)(void);
-EXPORT_SYMBOL(pm_idle);
+void (*sparc_idle)(void);
 
 /* 
  * Power-off handler instantiation for pm.h compliance
@@ -75,8 +74,8 @@ void cpu_idle(void)
        /* endless idle loop with no priority at all */
        for (;;) {
                while (!need_resched()) {
-                       if (pm_idle)
-                               (*pm_idle)();
+                       if (sparc_idle)
+                               (*sparc_idle)();
                        else
                                cpu_relax();
                }
index 62bad9f..872d7e2 100644 (file)
@@ -45,11 +45,6 @@ static const char * const processor_modes[] = {
        "UK18", "UK19", "UK1A", "EXTN", "UK1C", "UK1D", "UK1E", "SUSR"
 };
 
-/*
- * The idle thread, has rather strange semantics for calling pm_idle,
- * but this is what x86 does and we need to do the same, so that
- * things like cpuidle get called in the same way.
- */
 void cpu_idle(void)
 {
        /* endless idle loop with no priority at all */
index 4f7c2da..c03309f 100644 (file)
@@ -1922,6 +1922,7 @@ config APM_DO_ENABLE
          this feature.
 
 config APM_CPU_IDLE
+       depends on CPU_IDLE
        bool "Make CPU Idle calls when idle"
        ---help---
          Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop.
index bcdff99..2f366d0 100644 (file)
@@ -4,7 +4,8 @@
 #define MWAIT_SUBSTATE_MASK            0xf
 #define MWAIT_CSTATE_MASK              0xf
 #define MWAIT_SUBSTATE_SIZE            4
-#define MWAIT_MAX_NUM_CSTATES          8
+#define MWAIT_HINT2CSTATE(hint)                (((hint) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK)
+#define MWAIT_HINT2SUBSTATE(hint)      ((hint) & MWAIT_CSTATE_MASK)
 
 #define CPUID_MWAIT_LEAF               5
 #define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
index 888184b..b9e7d27 100644 (file)
@@ -89,7 +89,6 @@ struct cpuinfo_x86 {
        char                    wp_works_ok;    /* It doesn't on 386's */
 
        /* Problems on some 486Dx4's and old 386's: */
-       char                    hlt_works_ok;
        char                    hard_math;
        char                    rfu;
        char                    fdiv_bug;
@@ -165,15 +164,6 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
 
 extern const struct seq_operations cpuinfo_op;
 
-static inline int hlt_works(int cpu)
-{
-#ifdef CONFIG_X86_32
-       return cpu_data(cpu).hlt_works_ok;
-#else
-       return 1;
-#endif
-}
-
 #define cache_line_size()      (boot_cpu_data.x86_cache_alignment)
 
 extern void cpu_detect(struct cpuinfo_x86 *c);
@@ -725,7 +715,7 @@ extern unsigned long                boot_option_idle_override;
 extern bool                    amd_e400_c1e_detected;
 
 enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT,
-                        IDLE_POLL, IDLE_FORCE_MWAIT};
+                        IDLE_POLL};
 
 extern void enable_sep_cpu(void);
 extern int sysenter_setup(void);
@@ -998,7 +988,11 @@ extern unsigned long arch_align_stack(unsigned long sp);
 extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
 
 void default_idle(void);
-bool set_pm_idle_to_default(void);
+#ifdef CONFIG_XEN
+bool xen_set_default_idle(void);
+#else
+#define xen_set_default_idle 0
+#endif
 
 void stop_this_cpu(void *dummy);
 
index 433a59f..8d013f5 100644 (file)
 #define DEBUGCTLMSR_BTS_OFF_USR                (1UL << 10)
 #define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11)
 
+#define MSR_IA32_POWER_CTL             0x000001fc
+
 #define MSR_IA32_MC0_CTL               0x00000400
 #define MSR_IA32_MC0_STATUS            0x00000401
 #define MSR_IA32_MC0_ADDR              0x00000402
 #define MSR_IA32_PLATFORM_ID           0x00000017
 #define MSR_IA32_EBL_CR_POWERON                0x0000002a
 #define MSR_EBC_FREQUENCY_ID           0x0000002c
+#define MSR_SMI_COUNT                  0x00000034
 #define MSR_IA32_FEATURE_CONTROL        0x0000003a
 #define MSR_IA32_TSC_ADJUST             0x0000003b
 
index d65464e..9f4bc6a 100644 (file)
 #include <linux/acpi.h>
 #include <linux/syscore_ops.h>
 #include <linux/i8253.h>
+#include <linux/cpuidle.h>
 
 #include <asm/uaccess.h>
 #include <asm/desc.h>
@@ -360,13 +361,35 @@ struct apm_user {
  * idle percentage above which bios idle calls are done
  */
 #ifdef CONFIG_APM_CPU_IDLE
-#warning deprecated CONFIG_APM_CPU_IDLE will be deleted in 2012
 #define DEFAULT_IDLE_THRESHOLD 95
 #else
 #define DEFAULT_IDLE_THRESHOLD 100
 #endif
 #define DEFAULT_IDLE_PERIOD    (100 / 3)
 
+static int apm_cpu_idle(struct cpuidle_device *dev,
+                       struct cpuidle_driver *drv, int index);
+
+static struct cpuidle_driver apm_idle_driver = {
+       .name = "apm_idle",
+       .owner = THIS_MODULE,
+       .en_core_tk_irqen = 1,
+       .states = {
+               { /* entry 0 is for polling */ },
+               { /* entry 1 is for APM idle */
+                       .name = "APM",
+                       .desc = "APM idle",
+                       .flags = CPUIDLE_FLAG_TIME_VALID,
+                       .exit_latency = 250,    /* WAG */
+                       .target_residency = 500,        /* WAG */
+                       .enter = &apm_cpu_idle
+               },
+       },
+       .state_count = 2,
+};
+
+static struct cpuidle_device apm_cpuidle_device;
+
 /*
  * Local variables
  */
@@ -377,7 +400,6 @@ static struct {
 static int clock_slowed;
 static int idle_threshold __read_mostly = DEFAULT_IDLE_THRESHOLD;
 static int idle_period __read_mostly = DEFAULT_IDLE_PERIOD;
-static int set_pm_idle;
 static int suspends_pending;
 static int standbys_pending;
 static int ignore_sys_suspend;
@@ -884,8 +906,6 @@ static void apm_do_busy(void)
 #define IDLE_CALC_LIMIT        (HZ * 100)
 #define IDLE_LEAKY_MAX 16
 
-static void (*original_pm_idle)(void) __read_mostly;
-
 /**
  * apm_cpu_idle                -       cpu idling for APM capable Linux
  *
@@ -894,7 +914,8 @@ static void (*original_pm_idle)(void) __read_mostly;
  * Furthermore it calls the system default idle routine.
  */
 
-static void apm_cpu_idle(void)
+static int apm_cpu_idle(struct cpuidle_device *dev,
+       struct cpuidle_driver *drv, int index)
 {
        static int use_apm_idle; /* = 0 */
        static unsigned int last_jiffies; /* = 0 */
@@ -904,7 +925,6 @@ static void apm_cpu_idle(void)
        unsigned int jiffies_since_last_check = jiffies - last_jiffies;
        unsigned int bucket;
 
-       WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012");
 recalc:
        if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
                use_apm_idle = 0;
@@ -950,10 +970,7 @@ recalc:
                                break;
                        }
                }
-               if (original_pm_idle)
-                       original_pm_idle();
-               else
-                       default_idle();
+               default_idle();
                local_irq_disable();
                jiffies_since_last_check = jiffies - last_jiffies;
                if (jiffies_since_last_check > idle_period)
@@ -963,7 +980,7 @@ recalc:
        if (apm_idle_done)
                apm_do_busy();
 
-       local_irq_enable();
+       return index;
 }
 
 /**
@@ -2381,9 +2398,9 @@ static int __init apm_init(void)
        if (HZ != 100)
                idle_period = (idle_period * HZ) / 100;
        if (idle_threshold < 100) {
-               original_pm_idle = pm_idle;
-               pm_idle  = apm_cpu_idle;
-               set_pm_idle = 1;
+               if (!cpuidle_register_driver(&apm_idle_driver))
+                       if (cpuidle_register_device(&apm_cpuidle_device))
+                               cpuidle_unregister_driver(&apm_idle_driver);
        }
 
        return 0;
@@ -2393,15 +2410,9 @@ static void __exit apm_exit(void)
 {
        int error;
 
-       if (set_pm_idle) {
-               pm_idle = original_pm_idle;
-               /*
-                * We are about to unload the current idle thread pm callback
-                * (pm_idle), Wait for all processors to update cached/local
-                * copies of pm_idle before proceeding.
-                */
-               kick_all_cpus_sync();
-       }
+       cpuidle_unregister_device(&apm_cpuidle_device);
+       cpuidle_unregister_driver(&apm_idle_driver);
+
        if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0)
            && (apm_info.connection_version > 0x0100)) {
                error = apm_engage_power_management(APM_DEVICE_ALL, 0);
index 92dfec9..af6455e 100644 (file)
 #include <asm/paravirt.h>
 #include <asm/alternative.h>
 
-static int __init no_halt(char *s)
-{
-       WARN_ONCE(1, "\"no-hlt\" is deprecated, please use \"idle=poll\"\n");
-       boot_cpu_data.hlt_works_ok = 0;
-       return 1;
-}
-
-__setup("no-hlt", no_halt);
-
 static int __init no_387(char *s)
 {
        boot_cpu_data.hard_math = 0;
@@ -89,23 +80,6 @@ static void __init check_fpu(void)
                pr_warn("Hmm, FPU with FDIV bug\n");
 }
 
-static void __init check_hlt(void)
-{
-       if (boot_cpu_data.x86 >= 5 || paravirt_enabled())
-               return;
-
-       pr_info("Checking 'hlt' instruction... ");
-       if (!boot_cpu_data.hlt_works_ok) {
-               pr_cont("disabled\n");
-               return;
-       }
-       halt();
-       halt();
-       halt();
-       halt();
-       pr_cont("OK\n");
-}
-
 /*
  * Check whether we are able to run this kernel safely on SMP.
  *
@@ -129,7 +103,6 @@ void __init check_bugs(void)
        print_cpu_info(&boot_cpu_data);
 #endif
        check_config();
-       check_hlt();
        init_utsname()->machine[1] =
                '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
        alternative_instructions();
index 3286a92..e280253 100644 (file)
@@ -28,7 +28,6 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
 {
        seq_printf(m,
                   "fdiv_bug\t: %s\n"
-                  "hlt_bug\t\t: %s\n"
                   "f00f_bug\t: %s\n"
                   "coma_bug\t: %s\n"
                   "fpu\t\t: %s\n"
@@ -36,7 +35,6 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
                   "cpuid level\t: %d\n"
                   "wp\t\t: %s\n",
                   c->fdiv_bug ? "yes" : "no",
-                  c->hlt_works_ok ? "no" : "yes",
                   c->f00f_bug ? "yes" : "no",
                   c->coma_bug ? "yes" : "no",
                   c->hard_math ? "yes" : "no",
index dcfc1f4..14ae100 100644 (file)
@@ -268,13 +268,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
 EXPORT_SYMBOL(boot_option_idle_override);
 
-/*
- * Powermanagement idle function, if any..
- */
-void (*pm_idle)(void);
-#ifdef CONFIG_APM_MODULE
-EXPORT_SYMBOL(pm_idle);
-#endif
+static void (*x86_idle)(void);
 
 #ifndef CONFIG_SMP
 static inline void play_dead(void)
@@ -351,7 +345,7 @@ void cpu_idle(void)
                        rcu_idle_enter();
 
                        if (cpuidle_idle_call())
-                               pm_idle();
+                               x86_idle();
 
                        rcu_idle_exit();
                        start_critical_timings();
@@ -394,14 +388,16 @@ void default_idle(void)
 EXPORT_SYMBOL(default_idle);
 #endif
 
-bool set_pm_idle_to_default(void)
+#ifdef CONFIG_XEN
+bool xen_set_default_idle(void)
 {
-       bool ret = !!pm_idle;
+       bool ret = !!x86_idle;
 
-       pm_idle = default_idle;
+       x86_idle = default_idle;
 
        return ret;
 }
+#endif
 void stop_this_cpu(void *dummy)
 {
        local_irq_disable();
@@ -411,29 +407,8 @@ void stop_this_cpu(void *dummy)
        set_cpu_online(smp_processor_id(), false);
        disable_local_APIC();
 
-       for (;;) {
-               if (hlt_works(smp_processor_id()))
-                       halt();
-       }
-}
-
-/* Default MONITOR/MWAIT with no hints, used for default C1 state */
-static void mwait_idle(void)
-{
-       if (!need_resched()) {
-               trace_cpu_idle_rcuidle(1, smp_processor_id());
-               if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
-                       clflush((void *)&current_thread_info()->flags);
-
-               __monitor((void *)&current_thread_info()->flags, 0, 0);
-               smp_mb();
-               if (!need_resched())
-                       __sti_mwait(0, 0);
-               else
-                       local_irq_enable();
-               trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
-       } else
-               local_irq_enable();
+       for (;;)
+               halt();
 }
 
 /*
@@ -450,53 +425,6 @@ static void poll_idle(void)
        trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
 }
 
-/*
- * mwait selection logic:
- *
- * It depends on the CPU. For AMD CPUs that support MWAIT this is
- * wrong. Family 0x10 and 0x11 CPUs will enter C1 on HLT. Powersavings
- * then depend on a clock divisor and current Pstate of the core. If
- * all cores of a processor are in halt state (C1) the processor can
- * enter the C1E (C1 enhanced) state. If mwait is used this will never
- * happen.
- *
- * idle=mwait overrides this decision and forces the usage of mwait.
- */
-
-#define MWAIT_INFO                     0x05
-#define MWAIT_ECX_EXTENDED_INFO                0x01
-#define MWAIT_EDX_C1                   0xf0
-
-int mwait_usable(const struct cpuinfo_x86 *c)
-{
-       u32 eax, ebx, ecx, edx;
-
-       /* Use mwait if idle=mwait boot option is given */
-       if (boot_option_idle_override == IDLE_FORCE_MWAIT)
-               return 1;
-
-       /*
-        * Any idle= boot option other than idle=mwait means that we must not
-        * use mwait. Eg: idle=halt or idle=poll or idle=nomwait
-        */
-       if (boot_option_idle_override != IDLE_NO_OVERRIDE)
-               return 0;
-
-       if (c->cpuid_level < MWAIT_INFO)
-               return 0;
-
-       cpuid(MWAIT_INFO, &eax, &ebx, &ecx, &edx);
-       /* Check, whether EDX has extended info about MWAIT */
-       if (!(ecx & MWAIT_ECX_EXTENDED_INFO))
-               return 1;
-
-       /*
-        * edx enumeratios MONITOR/MWAIT extensions. Check, whether
-        * C1  supports MWAIT
-        */
-       return (edx & MWAIT_EDX_C1);
-}
-
 bool amd_e400_c1e_detected;
 EXPORT_SYMBOL(amd_e400_c1e_detected);
 
@@ -561,31 +489,24 @@ static void amd_e400_idle(void)
 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
-       if (pm_idle == poll_idle && smp_num_siblings > 1) {
+       if (x86_idle == poll_idle && smp_num_siblings > 1)
                pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n");
-       }
 #endif
-       if (pm_idle)
+       if (x86_idle)
                return;
 
-       if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
-               /*
-                * One CPU supports mwait => All CPUs supports mwait
-                */
-               pr_info("using mwait in idle threads\n");
-               pm_idle = mwait_idle;
-       } else if (cpu_has_amd_erratum(amd_erratum_400)) {
+       if (cpu_has_amd_erratum(amd_erratum_400)) {
                /* E400: APIC timer interrupt does not wake up CPU from C1e */
                pr_info("using AMD E400 aware idle routine\n");
-               pm_idle = amd_e400_idle;
+               x86_idle = amd_e400_idle;
        } else
-               pm_idle = default_idle;
+               x86_idle = default_idle;
 }
 
 void __init init_amd_e400_c1e_mask(void)
 {
        /* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */
-       if (pm_idle == amd_e400_idle)
+       if (x86_idle == amd_e400_idle)
                zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL);
 }
 
@@ -596,11 +517,8 @@ static int __init idle_setup(char *str)
 
        if (!strcmp(str, "poll")) {
                pr_info("using polling idle threads\n");
-               pm_idle = poll_idle;
+               x86_idle = poll_idle;
                boot_option_idle_override = IDLE_POLL;
-       } else if (!strcmp(str, "mwait")) {
-               boot_option_idle_override = IDLE_FORCE_MWAIT;
-               WARN_ONCE(1, "\"idle=mwait\" will be removed in 2012\n");
        } else if (!strcmp(str, "halt")) {
                /*
                 * When the boot option of idle=halt is added, halt is
@@ -609,7 +527,7 @@ static int __init idle_setup(char *str)
                 * To continue to load the CPU idle driver, don't touch
                 * the boot_option_idle_override.
                 */
-               pm_idle = default_idle;
+               x86_idle = default_idle;
                boot_option_idle_override = IDLE_HALT;
        } else if (!strcmp(str, "nomwait")) {
                /*
index ed0fe38..a6ceaed 100644 (file)
@@ -1369,7 +1369,7 @@ static inline void mwait_play_dead(void)
        void *mwait_ptr;
        struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info);
 
-       if (!(this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c)))
+       if (!this_cpu_has(X86_FEATURE_MWAIT))
                return;
        if (!this_cpu_has(X86_FEATURE_CLFLSH))
                return;
index 8971a26..94eac5c 100644 (file)
@@ -556,12 +556,9 @@ void __init xen_arch_setup(void)
               COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE);
 
        /* Set up idle, making sure it calls safe_halt() pvop */
-#ifdef CONFIG_X86_32
-       boot_cpu_data.hlt_works_ok = 1;
-#endif
        disable_cpuidle();
        disable_cpufreq();
-       WARN_ON(set_pm_idle_to_default());
+       WARN_ON(xen_set_default_idle());
        fiddle_vdso();
 #ifdef CONFIG_NUMA
        numa_off = 1;
index e606e36..fc95308 100644 (file)
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 
-#include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/slab.h>
 #include <linux/acpi.h>
 #include <linux/dmi.h>
-#include <linux/moduleparam.h>
-#include <linux/sched.h>       /* need_resched() */
-#include <linux/pm_qos.h>
+#include <linux/sched.h>       /* need_resched() */
 #include <linux/clockchips.h>
 #include <linux/cpuidle.h>
-#include <linux/irqflags.h>
 
 /*
  * Include the apic definitions for x86 to have the APIC timer related defines
 #include <asm/apic.h>
 #endif
 
-#include <asm/io.h>
-#include <asm/uaccess.h>
-
 #include <acpi/acpi_bus.h>
 #include <acpi/processor.h>
-#include <asm/processor.h>
 
 #define PREFIX "ACPI: "
 
 #define ACPI_PROCESSOR_CLASS            "processor"
 #define _COMPONENT              ACPI_PROCESSOR_COMPONENT
 ACPI_MODULE_NAME("processor_idle");
-#define PM_TIMER_TICK_NS               (1000000000ULL/ACPI_PM_TIMER_FREQUENCY)
-#define C2_OVERHEAD                    1       /* 1us */
-#define C3_OVERHEAD                    1       /* 1us */
-#define PM_TIMER_TICKS_TO_US(p)                \
-       (((p) * 1000)/(ACPI_PM_TIMER_FREQUENCY/1000))
 
 static unsigned int max_cstate __read_mostly = ACPI_PROCESSOR_MAX_POWER;
 module_param(max_cstate, uint, 0000);
@@ -82,10 +66,11 @@ module_param(latency_factor, uint, 0644);
 
 static DEFINE_PER_CPU(struct cpuidle_device *, acpi_cpuidle_device);
 
+static struct acpi_processor_cx *acpi_cstate[CPUIDLE_STATE_MAX];
+
 static int disabled_by_idle_boot_param(void)
 {
        return boot_option_idle_override == IDLE_POLL ||
-               boot_option_idle_override == IDLE_FORCE_MWAIT ||
                boot_option_idle_override == IDLE_HALT;
 }
 
@@ -737,8 +722,7 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev,
                struct cpuidle_driver *drv, int index)
 {
        struct acpi_processor *pr;
-       struct cpuidle_state_usage *state_usage = &dev->states_usage[index];
-       struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage);
+       struct acpi_processor_cx *cx = acpi_cstate[index];
 
        pr = __this_cpu_read(processors);
 
@@ -761,8 +745,7 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev,
  */
 static int acpi_idle_play_dead(struct cpuidle_device *dev, int index)
 {
-       struct cpuidle_state_usage *state_usage = &dev->states_usage[index];
-       struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage);
+       struct acpi_processor_cx *cx = acpi_cstate[index];
 
        ACPI_FLUSH_CPU_CACHE();
 
@@ -792,8 +775,7 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev,
                struct cpuidle_driver *drv, int index)
 {
        struct acpi_processor *pr;
-       struct cpuidle_state_usage *state_usage = &dev->states_usage[index];
-       struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage);
+       struct acpi_processor_cx *cx = acpi_cstate[index];
 
        pr = __this_cpu_read(processors);
 
@@ -851,8 +833,7 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev,
                struct cpuidle_driver *drv, int index)
 {
        struct acpi_processor *pr;
-       struct cpuidle_state_usage *state_usage = &dev->states_usage[index];
-       struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage);
+       struct acpi_processor_cx *cx = acpi_cstate[index];
 
        pr = __this_cpu_read(processors);
 
@@ -944,13 +925,13 @@ struct cpuidle_driver acpi_idle_driver = {
  * device i.e. per-cpu data
  *
  * @pr: the ACPI processor
+ * @dev : the cpuidle device
  */
-static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr)
+static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr,
+                                          struct cpuidle_device *dev)
 {
        int i, count = CPUIDLE_DRIVER_STATE_START;
        struct acpi_processor_cx *cx;
-       struct cpuidle_state_usage *state_usage;
-       struct cpuidle_device *dev = per_cpu(acpi_cpuidle_device, pr->id);
 
        if (!pr->flags.power_setup_done)
                return -EINVAL;
@@ -969,7 +950,6 @@ static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr)
 
        for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) {
                cx = &pr->power.states[i];
-               state_usage = &dev->states_usage[count];
 
                if (!cx->valid)
                        continue;
@@ -980,8 +960,7 @@ static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr)
                    !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
                        continue;
 #endif
-
-               cpuidle_set_statedata(state_usage, cx);
+               acpi_cstate[count] = cx;
 
                count++;
                if (count == CPUIDLE_STATE_MAX)
@@ -1105,7 +1084,7 @@ int acpi_processor_hotplug(struct acpi_processor *pr)
        cpuidle_disable_device(dev);
        acpi_processor_get_power_info(pr);
        if (pr->flags.power) {
-               acpi_processor_setup_cpuidle_cx(pr);
+               acpi_processor_setup_cpuidle_cx(pr, dev);
                ret = cpuidle_enable_device(dev);
        }
        cpuidle_resume_and_unlock();
@@ -1163,8 +1142,8 @@ int acpi_processor_cst_has_changed(struct acpi_processor *pr)
                                continue;
                        acpi_processor_get_power_info(_pr);
                        if (_pr->flags.power) {
-                               acpi_processor_setup_cpuidle_cx(_pr);
                                dev = per_cpu(acpi_cpuidle_device, cpu);
+                               acpi_processor_setup_cpuidle_cx(_pr, dev);
                                cpuidle_enable_device(dev);
                        }
                }
@@ -1233,7 +1212,7 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr)
                        return -ENOMEM;
                per_cpu(acpi_cpuidle_device, pr->id) = dev;
 
-               acpi_processor_setup_cpuidle_cx(pr);
+               acpi_processor_setup_cpuidle_cx(pr, dev);
 
                /* Register per-cpu cpuidle_device. Cpuidle driver
                 * must already be registered before registering device
index 2df9414..5d66750 100644 (file)
@@ -74,7 +74,7 @@ static struct cpuidle_driver intel_idle_driver = {
        .en_core_tk_irqen = 1,
 };
 /* intel_idle.max_cstate=0 disables driver */
-static int max_cstate = MWAIT_MAX_NUM_CSTATES - 1;
+static int max_cstate = CPUIDLE_STATE_MAX - 1;
 
 static unsigned int mwait_substates;
 
@@ -90,6 +90,7 @@ struct idle_cpu {
         * Indicate which enable bits to clear here.
         */
        unsigned long auto_demotion_disable_flags;
+       bool disable_promotion_to_c1e;
 };
 
 static const struct idle_cpu *icpu;
@@ -109,162 +110,206 @@ static struct cpuidle_state *cpuidle_state_table;
 #define CPUIDLE_FLAG_TLB_FLUSHED       0x10000
 
 /*
+ * MWAIT takes an 8-bit "hint" in EAX "suggesting"
+ * the C-state (top nibble) and sub-state (bottom nibble)
+ * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
+ *
+ * We store the hint at the top of our "flags" for each state.
+ */
+#define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
+#define MWAIT2flg(eax) ((eax & 0xFF) << 24)
+
+/*
  * States are indexed by the cstate number,
  * which is also the index into the MWAIT hint array.
  * Thus C0 is a dummy.
  */
-static struct cpuidle_state nehalem_cstates[MWAIT_MAX_NUM_CSTATES] = {
-       { /* MWAIT C0 */ },
-       { /* MWAIT C1 */
+static struct cpuidle_state nehalem_cstates[CPUIDLE_STATE_MAX] = {
+       {
                .name = "C1-NHM",
                .desc = "MWAIT 0x00",
-               .flags = CPUIDLE_FLAG_TIME_VALID,
+               .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
                .exit_latency = 3,
                .target_residency = 6,
                .enter = &intel_idle },
-       { /* MWAIT C2 */
+       {
+               .name = "C1E-NHM",
+               .desc = "MWAIT 0x01",
+               .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
+               .exit_latency = 10,
+               .target_residency = 20,
+               .enter = &intel_idle },
+       {
                .name = "C3-NHM",
                .desc = "MWAIT 0x10",
-               .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+               .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
                .exit_latency = 20,
                .target_residency = 80,
                .enter = &intel_idle },
-       { /* MWAIT C3 */
+       {
                .name = "C6-NHM",
                .desc = "MWAIT 0x20",
-               .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+               .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
                .exit_latency = 200,
                .target_residency = 800,
                .enter = &intel_idle },
+       {
+               .enter = NULL }
 };
 
-static struct cpuidle_state snb_cstates[MWAIT_MAX_NUM_CSTATES] = {
-       { /* MWAIT C0 */ },
-       { /* MWAIT C1 */
+static struct cpuidle_state snb_cstates[CPUIDLE_STATE_MAX] = {
+       {
                .name = "C1-SNB",
                .desc = "MWAIT 0x00",
-               .flags = CPUIDLE_FLAG_TIME_VALID,
-               .exit_latency = 1,
-               .target_residency = 1,
+               .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
+               .exit_latency = 2,
+               .target_residency = 2,
                .enter = &intel_idle },
-       { /* MWAIT C2 */
+       {
+               .name = "C1E-SNB",
+               .desc = "MWAIT 0x01",
+               .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
+               .exit_latency = 10,
+               .target_residency = 20,
+               .enter = &intel_idle },
+       {
                .name = "C3-SNB",
                .desc = "MWAIT 0x10",
-               .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+               .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
                .exit_latency = 80,
                .target_residency = 211,
                .enter = &intel_idle },
-       { /* MWAIT C3 */
+       {
                .name = "C6-SNB",
                .desc = "MWAIT 0x20",
-               .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+               .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
                .exit_latency = 104,
                .target_residency = 345,
                .enter = &intel_idle },
-       { /* MWAIT C4 */
+       {
                .name = "C7-SNB",
                .desc = "MWAIT 0x30",
-               .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+               .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
                .exit_latency = 109,
                .target_residency = 345,
                .enter = &intel_idle },
+       {
+               .enter = NULL }
 };
 
-static struct cpuidle_state ivb_cstates[MWAIT_MAX_NUM_CSTATES] = {
-       { /* MWAIT C0 */ },
-       { /* MWAIT C1 */
+static struct cpuidle_state ivb_cstates[CPUIDLE_STATE_MAX] = {
+       {
                .name = "C1-IVB",
                .desc = "MWAIT 0x00",
-               .flags = CPUIDLE_FLAG_TIME_VALID,
+               .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
                .exit_latency = 1,
                .target_residency = 1,
                .enter = &intel_idle },
-       { /* MWAIT C2 */
+       {
+               .name = "C1E-IVB",
+               .desc = "MWAIT 0x01",
+               .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
+               .exit_latency = 10,
+               .target_residency = 20,
+               .enter = &intel_idle },
+       {
                .name = "C3-IVB",
                .desc = "MWAIT 0x10",
-               .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+               .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
                .exit_latency = 59,
                .target_residency = 156,
                .enter = &intel_idle },
-       { /* MWAIT C3 */
+       {
                .name = "C6-IVB",
                .desc = "MWAIT 0x20",
-               .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+               .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
                .exit_latency = 80,
                .target_residency = 300,
                .enter = &intel_idle },
-       { /* MWAIT C4 */
+       {
                .name = "C7-IVB",
                .desc = "MWAIT 0x30",
-               .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+               .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
                .exit_latency = 87,
                .target_residency = 300,
                .enter = &intel_idle },
+       {
+               .enter = NULL }
 };
 
-static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = {
-       { /* MWAIT C0 */ },
-       { /* MWAIT C1 */
-               .name = "C1-ATM",
+static struct cpuidle_state hsw_cstates[CPUIDLE_STATE_MAX] = {
+       {
+               .name = "C1-HSW",
                .desc = "MWAIT 0x00",
-               .flags = CPUIDLE_FLAG_TIME_VALID,
-               .exit_latency = 1,
-               .target_residency = 4,
+               .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
+               .exit_latency = 2,
+               .target_residency = 2,
+               .enter = &intel_idle },
+       {
+               .name = "C1E-HSW",
+               .desc = "MWAIT 0x01",
+               .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
+               .exit_latency = 10,
+               .target_residency = 20,
+               .enter = &intel_idle },
+       {
+               .name = "C3-HSW",
+               .desc = "MWAIT 0x10",
+               .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+               .exit_latency = 33,
+               .target_residency = 100,
+               .enter = &intel_idle },
+       {
+               .name = "C6-HSW",
+               .desc = "MWAIT 0x20",
+               .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+               .exit_latency = 133,
+               .target_residency = 400,
+               .enter = &intel_idle },
+       {
+               .name = "C7s-HSW",
+               .desc = "MWAIT 0x32",
+               .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+               .exit_latency = 166,
+               .target_residency = 500,
+               .enter = &intel_idle },
+       {
+               .enter = NULL }
+};
+
+static struct cpuidle_state atom_cstates[CPUIDLE_STATE_MAX] = {
+       {
+               .name = "C1E-ATM",
+               .desc = "MWAIT 0x00",
+               .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
+               .exit_latency = 10,
+               .target_residency = 20,
                .enter = &intel_idle },
-       { /* MWAIT C2 */
+       {
                .name = "C2-ATM",
                .desc = "MWAIT 0x10",
-               .flags = CPUIDLE_FLAG_TIME_VALID,
+               .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID,
                .exit_latency = 20,
                .target_residency = 80,
                .enter = &intel_idle },
-       { /* MWAIT C3 */ },
-       { /* MWAIT C4 */
+       {
                .name = "C4-ATM",
                .desc = "MWAIT 0x30",
-               .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+               .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
                .exit_latency = 100,
                .target_residency = 400,
                .enter = &intel_idle },
-       { /* MWAIT C5 */ },
-       { /* MWAIT C6 */
+       {
                .name = "C6-ATM",
                .desc = "MWAIT 0x52",
-               .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+               .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
                .exit_latency = 140,
                .target_residency = 560,
                .enter = &intel_idle },
+       {
+               .enter = NULL }
 };
 
-static long get_driver_data(int cstate)
-{
-       int driver_data;
-       switch (cstate) {
-
-       case 1: /* MWAIT C1 */
-               driver_data = 0x00;
-               break;
-       case 2: /* MWAIT C2 */
-               driver_data = 0x10;
-               break;
-       case 3: /* MWAIT C3 */
-               driver_data = 0x20;
-               break;
-       case 4: /* MWAIT C4 */
-               driver_data = 0x30;
-               break;
-       case 5: /* MWAIT C5 */
-               driver_data = 0x40;
-               break;
-       case 6: /* MWAIT C6 */
-               driver_data = 0x52;
-               break;
-       default:
-               driver_data = 0x00;
-       }
-       return driver_data;
-}
-
 /**
  * intel_idle
  * @dev: cpuidle_device
@@ -278,8 +323,7 @@ static int intel_idle(struct cpuidle_device *dev,
 {
        unsigned long ecx = 1; /* break on interrupt flag */
        struct cpuidle_state *state = &drv->states[index];
-       struct cpuidle_state_usage *state_usage = &dev->states_usage[index];
-       unsigned long eax = (unsigned long)cpuidle_get_statedata(state_usage);
+       unsigned long eax = flg2MWAIT(state->flags);
        unsigned int cstate;
        int cpu = smp_processor_id();
 
@@ -362,10 +406,19 @@ static void auto_demotion_disable(void *dummy)
        msr_bits &= ~(icpu->auto_demotion_disable_flags);
        wrmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits);
 }
+static void c1e_promotion_disable(void *dummy)
+{
+       unsigned long long msr_bits;
+
+       rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
+       msr_bits &= ~0x2;
+       wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
+}
 
 static const struct idle_cpu idle_cpu_nehalem = {
        .state_table = nehalem_cstates,
        .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
+       .disable_promotion_to_c1e = true,
 };
 
 static const struct idle_cpu idle_cpu_atom = {
@@ -379,10 +432,17 @@ static const struct idle_cpu idle_cpu_lincroft = {
 
 static const struct idle_cpu idle_cpu_snb = {
        .state_table = snb_cstates,
+       .disable_promotion_to_c1e = true,
 };
 
 static const struct idle_cpu idle_cpu_ivb = {
        .state_table = ivb_cstates,
+       .disable_promotion_to_c1e = true,
+};
+
+static const struct idle_cpu idle_cpu_hsw = {
+       .state_table = hsw_cstates,
+       .disable_promotion_to_c1e = true,
 };
 
 #define ICPU(model, cpu) \
@@ -402,6 +462,9 @@ static const struct x86_cpu_id intel_idle_ids[] = {
        ICPU(0x2d, idle_cpu_snb),
        ICPU(0x3a, idle_cpu_ivb),
        ICPU(0x3e, idle_cpu_ivb),
+       ICPU(0x3c, idle_cpu_hsw),
+       ICPU(0x3f, idle_cpu_hsw),
+       ICPU(0x45, idle_cpu_hsw),
        {}
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids);
@@ -484,32 +547,31 @@ static int intel_idle_cpuidle_driver_init(void)
 
        drv->state_count = 1;
 
-       for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) {
-               int num_substates;
+       for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
+               int num_substates, mwait_hint, mwait_cstate, mwait_substate;
 
-               if (cstate > max_cstate) {
+               if (cpuidle_state_table[cstate].enter == NULL)
+                       break;
+
+               if (cstate + 1 > max_cstate) {
                        printk(PREFIX "max_cstate %d reached\n",
                                max_cstate);
                        break;
                }
 
+               mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
+               mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint);
+               mwait_substate = MWAIT_HINT2SUBSTATE(mwait_hint);
+
                /* does the state exist in CPUID.MWAIT? */
-               num_substates = (mwait_substates >> ((cstate) * 4))
+               num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4))
                                        & MWAIT_SUBSTATE_MASK;
-               if (num_substates == 0)
-                       continue;
-               /* is the state not enabled? */
-               if (cpuidle_state_table[cstate].enter == NULL) {
-                       /* does the driver not know about the state? */
-                       if (*cpuidle_state_table[cstate].name == '\0')
-                               pr_debug(PREFIX "unaware of model 0x%x"
-                                       " MWAIT %d please"
-                                       " contact lenb@kernel.org\n",
-                               boot_cpu_data.x86_model, cstate);
+
+               /* if sub-state in table is not enumerated by CPUID */
+               if ((mwait_substate + 1) > num_substates)
                        continue;
-               }
 
-               if ((cstate > 2) &&
+               if (((mwait_cstate + 1) > 2) &&
                        !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
                        mark_tsc_unstable("TSC halts in idle"
                                        " states deeper than C2");
@@ -523,6 +585,9 @@ static int intel_idle_cpuidle_driver_init(void)
        if (icpu->auto_demotion_disable_flags)
                on_each_cpu(auto_demotion_disable, NULL, 1);
 
+       if (icpu->disable_promotion_to_c1e)     /* each-cpu is redundant */
+               on_each_cpu(c1e_promotion_disable, NULL, 1);
+
        return 0;
 }
 
@@ -541,25 +606,28 @@ static int intel_idle_cpu_init(int cpu)
 
        dev->state_count = 1;
 
-       for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) {
-               int num_substates;
+       for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
+               int num_substates, mwait_hint, mwait_cstate, mwait_substate;
+
+               if (cpuidle_state_table[cstate].enter == NULL)
+                       continue;
 
-               if (cstate > max_cstate) {
+               if (cstate + 1 > max_cstate) {
                        printk(PREFIX "max_cstate %d reached\n", max_cstate);
                        break;
                }
 
+               mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
+               mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint);
+               mwait_substate = MWAIT_HINT2SUBSTATE(mwait_hint);
+
                /* does the state exist in CPUID.MWAIT? */
-               num_substates = (mwait_substates >> ((cstate) * 4))
-                       & MWAIT_SUBSTATE_MASK;
-               if (num_substates == 0)
-                       continue;
-               /* is the state not enabled? */
-               if (cpuidle_state_table[cstate].enter == NULL)
-                       continue;
+               num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4))
+                                       & MWAIT_SUBSTATE_MASK;
 
-               dev->states_usage[dev->state_count].driver_data =
-                       (void *)get_driver_data(cstate);
+               /* if sub-state in table is not enumerated by CPUID */
+               if ((mwait_substate + 1) > num_substates)
+                       continue;
 
                dev->state_count += 1;
        }
index 24cd103..480c14d 100644 (file)
@@ -32,8 +32,6 @@ struct cpuidle_driver;
  ****************************/
 
 struct cpuidle_state_usage {
-       void            *driver_data;
-
        unsigned long long      disable;
        unsigned long long      usage;
        unsigned long long      time; /* in US */
@@ -62,26 +60,6 @@ struct cpuidle_state {
 
 #define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000)
 
-/**
- * cpuidle_get_statedata - retrieves private driver state data
- * @st_usage: the state usage statistics
- */
-static inline void *cpuidle_get_statedata(struct cpuidle_state_usage *st_usage)
-{
-       return st_usage->driver_data;
-}
-
-/**
- * cpuidle_set_statedata - stores private driver state data
- * @st_usage: the state usage statistics
- * @data: the private data
- */
-static inline void
-cpuidle_set_statedata(struct cpuidle_state_usage *st_usage, void *data)
-{
-       st_usage->driver_data = data;
-}
-
 struct cpuidle_device {
        unsigned int            registered:1;
        unsigned int            enabled:1;
index 03d7bb1..97bcf23 100644 (file)
@@ -31,7 +31,6 @@
 /*
  * Callbacks for platform drivers to implement.
  */
-extern void (*pm_idle)(void);
 extern void (*pm_power_off)(void);
 extern void (*pm_power_off_prepare)(void);
 
index 0d7dc2c..b4ddb74 100644 (file)
@@ -31,8 +31,6 @@ The \fB-S\fP option limits output to a 1-line System Summary for each interval.
 .PP
 The \fB-v\fP option increases verbosity.
 .PP
-The \fB-s\fP option prints the SMI counter, equivalent to "-c 0x34"
-.PP
 The \fB-c MSR#\fP option includes the delta of the specified 32-bit MSR counter.
 .PP
 The \fB-C MSR#\fP option includes the delta of the specified 64-bit MSR counter.
@@ -186,26 +184,24 @@ This is a weighted average, where the weight is %c0.  ie. it is the total number
 un-halted cycles elapsed per time divided by the number of CPUs.
 .SH SMI COUNTING EXAMPLE
 On Intel Nehalem and newer processors, MSR 0x34 is a System Management Mode Interrupt (SMI) counter.
-Using the -m option, you can display how many SMIs have fired since reset, or if there
-are SMIs during the measurement interval, you can display the delta using the -d option.
+This counter is shown by default under the "SMI" column.
 .nf
-[root@x980 ~]# turbostat -m 0x34
-cor CPU    %c0  GHz  TSC   MSR 0x034    %c1    %c3    %c6   %pc3   %pc6
-          1.41 1.82 3.38  0x00000000   8.92  37.82  51.85  17.37   0.55
-  0   0   3.73 2.03 3.38  0x00000055   1.72  48.25  46.31  17.38   0.55
-  0   6   0.14 1.63 3.38  0x00000056   5.30
-  1   2   2.51 1.80 3.38  0x00000056  15.65  29.33  52.52
-  1   8   0.10 1.65 3.38  0x00000056  18.05
-  2   4   1.16 1.68 3.38  0x00000056   5.87  24.47  68.50
-  2  10   0.10 1.63 3.38  0x00000056   6.93
-  8   1   3.84 1.91 3.38  0x00000056   1.36  50.65  44.16
-  8   7   0.08 1.64 3.38  0x00000056   5.12
-  9   3   1.82 1.73 3.38  0x00000056   7.59  24.21  66.38
-  9   9   0.09 1.68 3.38  0x00000056   9.32
- 10   5   1.66 1.65 3.38  0x00000056  15.10  50.00  33.23
- 10  11   1.72 1.65 3.38  0x00000056  15.05
+[root@x980 ~]# turbostat
+cor CPU    %c0  GHz  TSC SMI    %c1    %c3    %c6 CTMP   %pc3   %pc6
+          0.11 1.91 3.38   0   1.84   0.26  97.79   29   0.82  83.87
+  0   0   0.40 1.63 3.38   0  10.27   0.12  89.20   20   0.82  83.88
+  0   6   0.06 1.63 3.38   0  10.61
+  1   2   0.37 2.63 3.38   0   0.02   0.10  99.51   22
+  1   8   0.01 1.62 3.38   0   0.39
+  2   4   0.07 1.62 3.38   0   0.04   0.07  99.82   23
+  2  10   0.02 1.62 3.38   0   0.09
+  8   1   0.23 1.64 3.38   0   0.10   1.07  98.60   24
+  8   7   0.02 1.64 3.38   0   0.31
+  9   3   0.03 1.62 3.38   0   0.03   0.05  99.89   29
+  9   9   0.02 1.62 3.38   0   0.05
+ 10   5   0.07 1.62 3.38   0   0.08   0.12  99.73   27
+ 10  11   0.03 1.62 3.38   0   0.13
 ^C
-[root@x980 ~]# 
 .fi
 .SH NOTES
 
index ce6d460..6f3214e 100644 (file)
@@ -58,6 +58,7 @@ unsigned int extra_msr_offset32;
 unsigned int extra_msr_offset64;
 unsigned int extra_delta_offset32;
 unsigned int extra_delta_offset64;
+int do_smi;
 double bclk;
 unsigned int show_pkg;
 unsigned int show_core;
@@ -99,6 +100,7 @@ struct thread_data {
        unsigned long long extra_delta64;
        unsigned long long extra_msr32;
        unsigned long long extra_delta32;
+       unsigned int smi_count;
        unsigned int cpu_id;
        unsigned int flags;
 #define CPU_IS_FIRST_THREAD_IN_CORE    0x2
@@ -248,6 +250,8 @@ void print_header(void)
        if (has_aperf)
                outp += sprintf(outp, "  GHz");
        outp += sprintf(outp, "  TSC");
+       if (do_smi)
+               outp += sprintf(outp, " SMI");
        if (extra_delta_offset32)
                outp += sprintf(outp, "  count 0x%03X", extra_delta_offset32);
        if (extra_delta_offset64)
@@ -314,6 +318,8 @@ int dump_counters(struct thread_data *t, struct core_data *c,
                        extra_msr_offset32, t->extra_msr32);
                fprintf(stderr, "msr0x%x: %016llX\n",
                        extra_msr_offset64, t->extra_msr64);
+               if (do_smi)
+                       fprintf(stderr, "SMI: %08X\n", t->smi_count);
        }
 
        if (c) {
@@ -352,6 +358,7 @@ int dump_counters(struct thread_data *t, struct core_data *c,
  * RAM_W: %5.2
  * GHz: "GHz" 3 columns %3.2
  * TSC: "TSC" 3 columns %3.2
+ * SMI: "SMI" 4 columns %4d
  * percentage " %pc3" %6.2
  * Perf Status percentage: %5.2
  * "CTMP" 4 columns %4d
@@ -431,6 +438,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
        /* TSC */
        outp += sprintf(outp, "%5.2f", 1.0 * t->tsc/units/interval_float);
 
+       /* SMI */
+       if (do_smi)
+               outp += sprintf(outp, "%4d", t->smi_count);
+
        /* delta */
        if (extra_delta_offset32)
                outp += sprintf(outp, "  %11llu", t->extra_delta32);
@@ -645,6 +656,9 @@ delta_thread(struct thread_data *new, struct thread_data *old,
         */
        old->extra_msr32 = new->extra_msr32;
        old->extra_msr64 = new->extra_msr64;
+
+       if (do_smi)
+               old->smi_count = new->smi_count - old->smi_count;
 }
 
 int delta_cpu(struct thread_data *t, struct core_data *c,
@@ -672,6 +686,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
        t->mperf = 0;
        t->c1 = 0;
 
+       t->smi_count = 0;
        t->extra_delta32 = 0;
        t->extra_delta64 = 0;
 
@@ -802,6 +817,11 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
                        return -4;
        }
 
+       if (do_smi) {
+               if (get_msr(cpu, MSR_SMI_COUNT, &msr))
+                       return -5;
+               t->smi_count = msr & 0xFFFFFFFF;
+       }
        if (extra_delta_offset32) {
                if (get_msr(cpu, extra_delta_offset32, &msr))
                        return -5;
@@ -908,8 +928,7 @@ void print_verbose_header(void)
 
        get_msr(0, MSR_NHM_PLATFORM_INFO, &msr);
 
-       if (verbose)
-               fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr);
+       fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr);
 
        ratio = (msr >> 40) & 0xFF;
        fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n",
@@ -919,13 +938,16 @@ void print_verbose_header(void)
        fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n",
                ratio, bclk, ratio * bclk);
 
+       get_msr(0, MSR_IA32_POWER_CTL, &msr);
+       fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E: %sabled)\n",
+               msr, msr & 0x2 ? "EN" : "DIS");
+
        if (!do_ivt_turbo_ratio_limit)
                goto print_nhm_turbo_ratio_limits;
 
        get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr);
 
-       if (verbose)
-               fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
+       fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
 
        ratio = (msr >> 56) & 0xFF;
        if (ratio)
@@ -1016,8 +1038,7 @@ print_nhm_turbo_ratio_limits:
 
        get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
 
-       if (verbose)
-               fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
+       fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
 
        ratio = (msr >> 56) & 0xFF;
        if (ratio)
@@ -1397,6 +1418,9 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model)
        case 0x2D:      /* SNB Xeon */
        case 0x3A:      /* IVB */
        case 0x3E:      /* IVB Xeon */
+       case 0x3C:      /* HSW */
+       case 0x3F:      /* HSW */
+       case 0x45:      /* HSW */
                return 1;
        case 0x2E:      /* Nehalem-EX Xeon - Beckton */
        case 0x2F:      /* Westmere-EX Xeon - Eagleton */
@@ -1488,6 +1512,9 @@ void rapl_probe(unsigned int family, unsigned int model)
        switch (model) {
        case 0x2A:
        case 0x3A:
+       case 0x3C:      /* HSW */
+       case 0x3F:      /* HSW */
+       case 0x45:      /* HSW */
                do_rapl = RAPL_PKG | RAPL_CORES | RAPL_GFX;
                break;
        case 0x2D:
@@ -1724,6 +1751,9 @@ int is_snb(unsigned int family, unsigned int model)
        case 0x2D:
        case 0x3A:      /* IVB */
        case 0x3E:      /* IVB Xeon */
+       case 0x3C:      /* HSW */
+       case 0x3F:      /* HSW */
+       case 0x45:      /* HSW */
                return 1;
        }
        return 0;
@@ -1883,6 +1913,7 @@ void check_cpuid()
 
        do_nehalem_platform_info = genuine_intel && has_invariant_tsc;
        do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */
+       do_smi = do_nhm_cstates;
        do_snb_cstates = is_snb(family, model);
        bclk = discover_bclk(family, model);
 
@@ -2219,9 +2250,6 @@ void cmdline(int argc, char **argv)
                case 'c':
                        sscanf(optarg, "%x", &extra_delta_offset32);
                        break;
-               case 's':
-                       extra_delta_offset32 = 0x34;    /* SMI counter */
-                       break;
                case 'C':
                        sscanf(optarg, "%x", &extra_delta_offset64);
                        break;
@@ -2248,7 +2276,7 @@ int main(int argc, char **argv)
        cmdline(argc, argv);
 
        if (verbose)
-               fprintf(stderr, "turbostat v3.0 November 23, 2012"
+               fprintf(stderr, "turbostat v3.2 February 11, 2013"
                        " - Len Brown <lenb@kernel.org>\n");
 
        turbostat_init();