TEST mrst idle: update to latest MRST idle code
authorLen Brown <lenb@kernel.org>
Fri, 12 Aug 2011 21:42:29 +0000 (22:42 +0100)
committermgross <mark.gross@intel.com>
Wed, 9 Nov 2011 20:37:12 +0000 (12:37 -0800)
replace:

arch/x86/kernel/platform/mrst/pmu.c
arch/x86/kernel/platform/mrst/pmu.h
drivers/idle/intel_idle.c (also: sync w/ 3.1, retain penwell entry)
mrst_s0i3.c, mrst_s0i3_asm.S

minor associated changes:

arch/x86/include/asm/mrst.h
arch/x86/kernel/smpboot.c

n.b. Actual s0i3 support is EXCLUDED from intel_idle for now

Change-Id: Ibd214229062f19a8814ef2371a05f1239758bfa3
Signed-off-by: Len Brown <len.brown@intel.com>
arch/x86/include/asm/mrst.h
arch/x86/kernel/smpboot.c
arch/x86/platform/mrst/pmu.c
arch/x86/platform/mrst/pmu.h
drivers/idle/intel_idle.c
drivers/idle/mrst_s0i3.c
drivers/idle/mrst_s0i3_asm.S

index 2dc497a..18ba6d2 100644 (file)
@@ -83,19 +83,18 @@ extern int pmu_pci_set_power_state(struct pci_dev *pdev, pci_power_t state);
 
 #ifdef CONFIG_X86_MRST
 extern int mrst_pmu_s0i3_entry(void);
-extern void mrst_pmu_pending_set(int value);
 extern void mrst_pmu_disable_msi(void);
+extern u32 mrst_pmu_msi_is_disabled(void);
 extern void mrst_pmu_enable_msi(void);
-extern void mrst_pmu_s0i3_prepare(void);
 extern void mrst_reserve_memory(void);
 #else
 static inline void mrst_reserve_memory(void) { }
 #endif /* !CONFIG_X86_MRST */
 
 #include <linux/cpuidle.h>
-extern int mrst_idle(struct cpuidle_device *dev, struct cpuidle_state *state);
+extern int mrst_s0i3(struct cpuidle_device *dev, struct cpuidle_state *state);
 extern void mrst_s0i3_resume(void);
-extern int mrst_pmu_validate_cstates(struct cpuidle_device *dev);
+extern int mrst_pmu_invalid_cstates(void);
 extern const char s0i3_trampoline_data[], s0i3_trampoline_data_end[];
 
 #endif /* _ASM_X86_MRST_H */
index c213b66..11b9630 100644 (file)
@@ -526,7 +526,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
                                                    &delays);
 }
 
-int __cpuinit
+int
 wakeup_secondary_cpu_via_init_delays(int phys_apicid,
        unsigned long start_eip, const struct init_wakeup_delays *delays)
 {
index d5a9fb6..c0ac06d 100644 (file)
 
 #include <linux/cpuidle.h>
 #include <linux/debugfs.h>
+#include <linux/delay.h>
 #include <linux/interrupt.h>
+#include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/seq_file.h>
-#include <linux/delay.h>
+#include <linux/sfi.h>
+#include <asm/intel_scu_ipc.h>
 #include "pmu.h"
 
-/* next #warning sequence number: #41 */
+#define IPCMSG_FW_REVISION     0xF4
 
 struct mrst_device {
        u16 pci_dev_num;        /* DEBUG only */
@@ -67,25 +70,34 @@ static struct mrst_device mrst_devs[] = {
 /* 24 */ { 0x4110, 0 },                        /* Lincroft */
 };
 
-static u16 mrst_lss9_pci_ids[] = {0x080a, 0x0814, 0x0815, 0};
+/* n.b. We ignore PCI-id 0x815 in LSS9 b/c Linux has no driver for it */
+static u16 mrst_lss9_pci_ids[] = {0x080a, 0x0814, 0};
 static u16 mrst_lss10_pci_ids[] = {0x0800, 0x0801, 0x0802, 0x0803,
                                        0x0804, 0x0805, 0x080f, 0};
 
+/* handle concurrent SMP invokations of pmu_pci_set_power_state() */
+static spinlock_t mrst_pmu_power_state_lock;
+
 static unsigned int wake_counters[MRST_NUM_LSS];       /* DEBUG only */
 static unsigned int pmu_irq_stats[INT_INVALID + 1];    /* DEBUG only */
 
-static DECLARE_COMPLETION(s0ix_completion);
-
-#warning pri#2 #3 s0i3_pmu_command_pending needs locking?
-static bool s0i3_pmu_command_pending;
-
 static int graphics_is_off;
 static int lss_s0i3_enabled;
-static u32 sdhc2_sss;
+static bool mrst_pmu_s0i3_enable;
+
+/*  debug counters */
+static u32 pmu_wait_ready_calls;
+static u32 pmu_wait_ready_udelays;
+static u32 pmu_wait_ready_udelays_max;
+static u32 pmu_wait_done_calls;
+static u32 pmu_wait_done_udelays;
+static u32 pmu_wait_done_udelays_max;
+static u32 pmu_set_power_state_entry;
+static u32 pmu_set_power_state_send_cmd;
 
 static struct mrst_device *pci_id_2_mrst_dev(u16 pci_dev_num)
 {
-       int index;
+       int index = 0;
 
        if ((pci_dev_num >= 0x0800) && (pci_dev_num <= 0x815))
                index = pci_dev_num - 0x800;
@@ -95,29 +107,15 @@ static struct mrst_device *pci_id_2_mrst_dev(u16 pci_dev_num)
                index = 23;
        else if (pci_dev_num == 0x4110)
                index = 24;
-       else
-               BUG();
 
-       BUG_ON(pci_dev_num != mrst_devs[index].pci_dev_num);
+       if (pci_dev_num != mrst_devs[index].pci_dev_num) {
+               WARN_ONCE(1, FW_BUG "Unknown PCI device 0x%04X\n", pci_dev_num);
+               return 0;
+       }
 
        return &mrst_devs[index];
 }
 
-/*
- * graphics_is_in_d3() reads PMU1.PM_SSS[0] directly to verify graphics D3.
- *
- * In production, the graphics driver should invoke pci_set_power_state(),
- * we capture that in graphics_is_off, and the graphics driver will put
- * itself into D3 via the Punit.
- *
- * When everybody has a production graphics driver, this extra read
- * of the Punit in the idle path can go away.
- */
-static int graphics_is_in_d3(void)
-{
-       return (0xC == (0xC & inl(0x1130)));    /* DEBUG only */
-}
-
 /**
  * mrst_pmu_validate_cstates
  * @dev: cpuidle_device
@@ -131,66 +129,31 @@ static int graphics_is_in_d3(void)
 #define IDLE_STATE4_IS_C6      4
 #define IDLE_STATE5_IS_S0I3    5
 
-int mrst_pmu_validate_cstates(struct cpuidle_device *dev)
+int mrst_pmu_invalid_cstates(void)
 {
        int cpu = smp_processor_id();
 
-       BUG_ON(dev->state_count != 6);  /* depends on intel_idle.c table */
-
        /*
-        * invalidated S0i3 if: PMU is not initialized, or
-        * CPU1 is active, or there is a still-unprocessed PMU command, or
-        * device LSS is insufficient, or the GPU is active,
+        * Demote to C4 if the PMU is busy.
+        * Since LSS changes leave the busy bit clear...
+        * busy means either the PMU is waiting for an ACK-C6 that
+        * isn't coming due to an MWAIT that returned immediately;
+        * or we returned from S0i3 successfully, and the PMU
+        * is not done sending us interrupts.
         */
-       if (!pmu_reg || !cpumask_equal(cpu_online_mask, cpumask_of(cpu)) ||
-           s0i3_pmu_command_pending || !lss_s0i3_enabled ||
-           (!graphics_is_off && !graphics_is_in_d3()))
-
-               dev->states[IDLE_STATE5_IS_S0I3].flags |= CPUIDLE_FLAG_IGNORE;
-       else
-               dev->states[IDLE_STATE5_IS_S0I3].flags &= ~CPUIDLE_FLAG_IGNORE;
+       if (pmu_read_busy_status())
+               return 1 << IDLE_STATE4_IS_C6 | 1 << IDLE_STATE5_IS_S0I3;
 
        /*
-        * If there is a pending PMU command, we cannot enter C6.
+        * Disallow S0i3 if: PMU is not initialized, or CPU1 is active,
+        * or if device LSS is insufficient, or the GPU is active,
+        * or if it has been explicitly disabled.
         */
-       if (s0i3_pmu_command_pending)
-               dev->states[IDLE_STATE4_IS_C6].flags |= CPUIDLE_FLAG_IGNORE;
+       if (!pmu_reg || !cpumask_equal(cpu_online_mask, cpumask_of(cpu)) ||
+           !lss_s0i3_enabled || !graphics_is_off || !mrst_pmu_s0i3_enable)
+               return 1 << IDLE_STATE5_IS_S0I3;
        else
-               dev->states[IDLE_STATE4_IS_C6].flags &= ~CPUIDLE_FLAG_IGNORE;
-
-       return 0;
-}
-
-/*
- * Send a command to the PMU to shut down the south complex
- */
-
-static void s0i3_wait_for_pmu(void)
-{
-       while (pmu_read_sts() & (1 << 8))
-               cpu_relax();
-}
-
-void mrst_pmu_pending_set(int value)
-{
-       s0i3_pmu_command_pending = value;
-}
-
-void mrst_pmu_s0i3_prepare(void)
-{
-       s0i3_wait_for_pmu();
-
-       /* Clear any possible error conditions */
-       pmu_write_ics(0x300);
-
-       /* set wake control to restore current state */
-       pmu_write_wssc(pmu_read_sss());
-
-       /* Put all Langwell sub-systems into D0i2 */
-       pmu_write_ssc(SUB_SYS_ALL_D0I2);
-
-       /* Avoid entering conventional C6 until the PMU command has cleared */
-       s0i3_pmu_command_pending = true;
+               return 0;
 }
 
 /*
@@ -214,42 +177,77 @@ int mrst_pmu_s0i3_entry(void)
 {
        int status;
 
+       /* Clear any possible error conditions */
+       pmu_write_ics(0x300);
+
+       /* set wake control to current D-states */
+       pmu_write_wssc(S0I3_SSS_TARGET);
+
        status = mrst_s0i3_entry(PM_S0I3_COMMAND, &pmu_reg->pm_cmd);
        pmu_update_wake_counters();
        return status;
 }
 
-static int pmu_wait_not_busy(void)
+/* poll for maximum of 5ms for busy bit to clear */
+static int pmu_wait_ready(void)
 {
-       int pmu_busy_retry = 500;
+       int udelays;
+
+       pmu_wait_ready_calls++;
+
+       for (udelays = 0; udelays < 500; ++udelays) {
+               if (udelays > pmu_wait_ready_udelays_max)
+                       pmu_wait_ready_udelays_max = udelays;
 
-       while (--pmu_busy_retry) {
                if (pmu_read_busy_status() == 0)
                        return 0;
 
-               udelay(100);
+               udelay(10);
+               pmu_wait_ready_udelays++;
        }
 
-       pmu_busy_retry = 450;
-       while (--pmu_busy_retry) {
+       /*
+        * if this fires, observe
+        * /sys/kernel/debug/mrst_pmu_wait_ready_calls
+        * /sys/kernel/debug/mrst_pmu_wait_ready_udelays
+        */
+       WARN_ONCE(1, "SCU not ready for 5ms");
+       return -EBUSY;
+}
+/* poll for maximum of 50ms us for busy bit to clear */
+static int pmu_wait_done(void)
+{
+       int udelays;
+
+       pmu_wait_done_calls++;
+
+       for (udelays = 0; udelays < 500; ++udelays) {
+               if (udelays > pmu_wait_done_udelays_max)
+                       pmu_wait_done_udelays_max = udelays;
+
                if (pmu_read_busy_status() == 0)
                        return 0;
 
-               mdelay(1);
+               udelay(100);
+               pmu_wait_done_udelays++;
        }
-       WARN(1, "pmu2 stays busy! It looks hung.");
+
+       /*
+        * if this fires, observe
+        * /sys/kernel/debug/mrst_pmu_wait_done_calls
+        * /sys/kernel/debug/mrst_pmu_wait_done_udelays
+        */
+       WARN_ONCE(1, "SCU not done for 50ms");
        return -EBUSY;
 }
 
-void mrst_pmu_disable_msi(void)
+u32 mrst_pmu_msi_is_disabled(void)
 {
-       pmu_wait_not_busy();
-       pmu_msi_disable();
+       return pmu_msi_is_disabled();
 }
 
 void mrst_pmu_enable_msi(void)
 {
-       pmu_wait_not_busy();
        pmu_msi_enable();
 }
 
@@ -280,13 +278,8 @@ static irqreturn_t pmu_irq(int irq, void *dummy)
                pmu_irq_stats[INT_INVALID]++;
        }
 
-       s0i3_pmu_command_pending = false;
-
        pmu_write_ics(pmu_ics.value); /* Clear pending interrupt */
 
-       #warning pri#2 #23 pmu_irq: complete_all(s0ix_completion) always?
-       complete_all(&s0ix_completion);
-
        return IRQ_HANDLED;
 }
 
@@ -297,7 +290,7 @@ static int pci_2_mrst_state(int lss, pci_power_t pci_state)
 {
        switch (pci_state) {
        case PCI_D0:
-               if (SSMSK(D0i1, lss) & S0I1_ACG_SSS_TARGET)
+               if (SSMSK(D0i1, lss) & D0I1_ACG_SSS_TARGET)
                        return D0i1;
                else
                        return D0;
@@ -324,7 +317,7 @@ static int pmu_issue_command(u32 pm_ssc)
        }
 
        /*
-        * enable interrupts in PMU2 so that interrupts are
+        * enable interrupts in PMU so that interrupts are
         * propagated when ioc bit for a particular set
         * command is set
         */
@@ -346,7 +339,7 @@ static int pmu_issue_command(u32 pm_ssc)
 
        /* construct the command to send SET_CFG to particular PMU */
        command.pmu2_params.d_param.cmd = SET_CFG_CMD;
-       command.pmu2_params.d_param.ioc = 1;
+       command.pmu2_params.d_param.ioc = 0;
        command.pmu2_params.d_param.mode_id = 0;
        command.pmu2_params.d_param.sys_state = SYS_STATE_S0I0;
 
@@ -365,7 +358,13 @@ static u16 pmu_min_lss_pci_req(u16 *ids, u16 pci_state)
        int i;
 
        for (i = 0; ids[i]; ++i) {
-               existing_request = pci_id_2_mrst_dev(ids[i])->latest_request;
+               struct mrst_device *mrst_dev;
+
+               mrst_dev = pci_id_2_mrst_dev(ids[i]);
+               if (unlikely(!mrst_dev))
+                       continue;
+
+               existing_request = mrst_dev->latest_request;
                if (existing_request < pci_state)
                        pci_state = existing_request;
        }
@@ -376,7 +375,6 @@ static u16 pmu_min_lss_pci_req(u16 *ids, u16 pci_state)
  * pmu_pci_set_power_state - Callback function is used by all the PCI devices
  *                     for a platform  specific device power on/shutdown.
  */
-#warning pri#2 #37 pdev->d3_delay needs to be cleared, default is 10ms
 
 int pmu_pci_set_power_state(struct pci_dev *pdev, pci_power_t pci_state)
 {
@@ -384,10 +382,15 @@ int pmu_pci_set_power_state(struct pci_dev *pdev, pci_power_t pci_state)
        int status = 0;
        struct mrst_device *mrst_dev;
 
+       pmu_set_power_state_entry++;
+
        BUG_ON(pdev->vendor != PCI_VENDOR_ID_INTEL);
        BUG_ON(pci_state < PCI_D0 || pci_state > PCI_D3cold);
 
        mrst_dev = pci_id_2_mrst_dev(pdev->device);
+       if (unlikely(!mrst_dev))
+               return -ENODEV;
+
        mrst_dev->pci_state_counts[pci_state]++;        /* count invocations */
 
        /* PMU driver calls self as part of PCI initialization, ignore */
@@ -396,6 +399,13 @@ int pmu_pci_set_power_state(struct pci_dev *pdev, pci_power_t pci_state)
 
        BUG_ON(!pmu_reg); /* SW bug if called before initialized */
 
+       spin_lock(&mrst_pmu_power_state_lock);
+
+       if (pdev->d3_delay) {
+               dev_dbg(&pdev->dev, "d3_delay %d, should be 0\n",
+                       pdev->d3_delay);
+               pdev->d3_delay = 0;
+       }
        /*
         * If Lincroft graphics, simply remember state
         */
@@ -405,7 +415,7 @@ int pmu_pci_set_power_state(struct pci_dev *pdev, pci_power_t pci_state)
                        graphics_is_off = 0;
                else
                        graphics_is_off = 1;
-               return 0;
+               goto ret;
        }
 
        if (!mrst_dev->lss)
@@ -416,10 +426,6 @@ int pmu_pci_set_power_state(struct pci_dev *pdev, pci_power_t pci_state)
 
        mrst_dev->latest_request = pci_state;   /* record latest request */
 
-       if (mrst_dev->lss == LSS_SD_HC2)
-               sdhc2_sss = SSMSK(pci_2_mrst_state(LSS_SD_HC2, pci_state),
-                               LSS_SD_HC2);
-
        /*
         * LSS9 and LSS10 contain multiple PCI devices.
         * Use the lowest numbered (highest power) state in the LSS
@@ -429,12 +435,9 @@ int pmu_pci_set_power_state(struct pci_dev *pdev, pci_power_t pci_state)
        else if (mrst_dev->lss == 10)
                pci_state = pmu_min_lss_pci_req(mrst_lss10_pci_ids, pci_state);
 
-       wait_for_completion(&s0ix_completion);
-
-       #warning pri#2 #17 pmu_wait_not_busy() does not prevent SMP race
-       status = pmu_wait_not_busy();
+       status = pmu_wait_ready();
        if (status)
-               return status;
+               goto ret;
 
        old_sss = pmu_read_sss();
        new_sss = old_sss & ~SSMSK(3, mrst_dev->lss);
@@ -444,29 +447,22 @@ int pmu_pci_set_power_state(struct pci_dev *pdev, pci_power_t pci_state)
        if (new_sss == old_sss)
                goto ret;       /* nothing to do */
 
-       INIT_COMPLETION(s0ix_completion);
+       pmu_set_power_state_send_cmd++;
 
        status = pmu_issue_command(new_sss);
 
        if (unlikely(status != 0)) {
-               dev_err(&pdev->dev, "Failed to Issue a PM\
-                command to PMU2\n");
-               complete_all(&s0ix_completion);
-               return status;
+               dev_err(&pdev->dev, "Failed to Issue a PM command\n");
+               goto ret;
        }
 
-       /* lets delay hand over till we confirm
-        * scu has completed operation */
-
-       #warning pri#2 #18 pmu_pci_set_power_state() magic delay & wait
-       mdelay(1);
-
-       if (pmu_wait_not_busy())
+       if (pmu_wait_done())
                goto ret;
 
        lss_s0i3_enabled =
-       (((pmu_read_sss() | sdhc2_sss) & S0I3_SSS_TARGET) == S0I3_SSS_TARGET);
+       ((pmu_read_sss() & S0I3_SSS_TARGET) == S0I3_SSS_TARGET);
 ret:
+       spin_unlock(&mrst_pmu_power_state_lock);
        return status;
 }
 
@@ -478,18 +474,15 @@ static inline const char *d0ix_name(int state)
        return d0ix_names[(int) state];
 }
 
-static int pmu_devices_state_show(struct seq_file *s, void *unused)
+static int debug_mrst_pmu_show(struct seq_file *s, void *unused)
 {
        struct pci_dev *pdev = NULL;
        u32 cur_pmsss;
        int lss;
 
-       if (pmu_wait_not_busy())
-               goto unlock;
-
-       seq_printf(s, "0x%08X D0I1_ACG_SSS_TARGET\n", S0I1_ACG_SSS_TARGET);
+       seq_printf(s, "0x%08X D0I1_ACG_SSS_TARGET\n", D0I1_ACG_SSS_TARGET);
 
-       cur_pmsss = (pmu_read_sss() | sdhc2_sss);
+       cur_pmsss = pmu_read_sss();
 
        seq_printf(s, "0x%08X S0I3_SSS_TARGET\n", S0I3_SSS_TARGET);
 
@@ -509,7 +502,6 @@ static int pmu_devices_state_show(struct seq_file *s, void *unused)
                u16 pmcsr;
                struct mrst_device *mrst_dev;
                int i;
-               unsigned int lssmask;
 
                mrst_dev = pci_id_2_mrst_dev(pdev->device);
 
@@ -518,6 +510,10 @@ static int pmu_devices_state_show(struct seq_file *s, void *unused)
                        pdev->vendor, pdev->device,
                        dev_driver_string(&pdev->dev));
 
+               if (unlikely (!mrst_dev)) {
+                       seq_printf(s, " UNKNOWN\n");
+                       continue;
+               }
 
                if (mrst_dev->lss)
                        seq_printf(s, "LSS %2d %-4s ", mrst_dev->lss,
@@ -540,11 +536,16 @@ static int pmu_devices_state_show(struct seq_file *s, void *unused)
                for (i = 0; i <= PCI_D3cold; ++i)
                        seq_printf(s, "%d ", mrst_dev->pci_state_counts[i]);
 
-               lssmask = SSMSK(D0i3, mrst_dev->lss);
+               if (mrst_dev->lss) {
+                       unsigned int lssmask;
+
+                       lssmask = SSMSK(D0i3, mrst_dev->lss);
 
-               if ((lssmask & S0I3_SSS_TARGET) &&
-                   ((lssmask & cur_pmsss) != lssmask))
-                       seq_printf(s , "[BLOCKS s0i3]");
+                       if ((lssmask & S0I3_SSS_TARGET) &&
+                               ((lssmask & cur_pmsss) !=
+                                       (lssmask & S0I3_SSS_TARGET)))
+                                               seq_printf(s , "[BLOCKS s0i3]");
+               }
 
                seq_printf(s, "\n");
        }
@@ -562,17 +563,35 @@ static int pmu_devices_state_show(struct seq_file *s, void *unused)
                pmu_irq_stats[INT_CMD_ERR], pmu_irq_stats[INT_WAKE_RX],
                pmu_irq_stats[INT_SS_ERROR], pmu_irq_stats[INT_S0IX_MISS],
                pmu_irq_stats[INT_NO_ACKC6], pmu_irq_stats[INT_INVALID]);
-unlock:
+
+       seq_printf(s, "mrst_pmu_wait_ready_calls          %8d\n",
+                       pmu_wait_ready_calls);
+       seq_printf(s, "mrst_pmu_wait_ready_udelays        %8d\n",
+                       pmu_wait_ready_udelays);
+       seq_printf(s, "mrst_pmu_wait_ready_udelays_max    %8d\n",
+                       pmu_wait_ready_udelays_max);
+       seq_printf(s, "mrst_pmu_wait_done_calls           %8d\n",
+                       pmu_wait_done_calls);
+       seq_printf(s, "mrst_pmu_wait_done_udelays         %8d\n",
+                       pmu_wait_done_udelays);
+       seq_printf(s, "mrst_pmu_wait_done_udelays_max     %8d\n",
+                       pmu_wait_done_udelays_max);
+       seq_printf(s, "mrst_pmu_set_power_state_entry     %8d\n",
+                       pmu_set_power_state_entry);
+       seq_printf(s, "mrst_pmu_set_power_state_send_cmd  %8d\n",
+                       pmu_set_power_state_send_cmd);
+       seq_printf(s, "SCU busy: %d\n", pmu_read_busy_status());
+
        return 0;
 }
 
-static int devices_state_open(struct inode *inode, struct file *file)
+static int debug_mrst_pmu_open(struct inode *inode, struct file *file)
 {
-       return single_open(file, pmu_devices_state_show, NULL);
+       return single_open(file, debug_mrst_pmu_show, NULL);
 }
 
 static const struct file_operations devices_state_operations = {
-       .open           = devices_state_open,
+       .open           = debug_mrst_pmu_open,
        .read           = seq_read,
        .llseek         = seq_lseek,
        .release        = single_release,
@@ -594,6 +613,12 @@ static void pmu_scu_firmware_debug(void)
                int pos;
 
                mrst_dev = pci_id_2_mrst_dev(pdev->device);
+               if (unlikely(!mrst_dev)) {
+                       printk(KERN_ERR FW_BUG "pmu: Unknown "
+                               "PCI device 0x%04X\n", pdev->device);
+                       continue;
+               }
+
                if (mrst_dev->lss == 0)
                        continue;        /* no LSS in our table */
 
@@ -631,9 +656,6 @@ static int __devinit pmu_probe(struct pci_dev *pdev,
        int ret;
        struct mrst_pmu_reg *pmu;
 
-       /* our completion shouldn't start armed*/
-       complete_all(&s0ix_completion);
-
        /* Init the device */
        ret = pci_enable_device(pdev);
        if (ret) {
@@ -650,7 +672,7 @@ static int __devinit pmu_probe(struct pci_dev *pdev,
        /* Map the memory of PMU reg base */
        pmu = pci_iomap(pdev, 0, 0);
        if (!pmu) {
-               dev_err(&pdev->dev, "Unable to map the PMU2 address space\n");
+               dev_err(&pdev->dev, "Unable to map the PMU address space\n");
                ret = -ENOMEM;
                goto out_err2;
        }
@@ -672,10 +694,13 @@ static int __devinit pmu_probe(struct pci_dev *pdev,
 
        pmu_write_wkc(S0I3_WAKE_SOURCES);       /* Enable S0i3 wakeup sources */
 
-       s0i3_wait_for_pmu();
-       pmu_write_ssc(S0I1_ACG_SSS_TARGET);     /* Enable Auto-Clock_Gating */
+       pmu_wait_ready();
+
+       pmu_write_ssc(D0I1_ACG_SSS_TARGET);     /* Enable Auto-Clock_Gating */
        pmu_write_cmd(0x201);
 
+       spin_lock_init(&mrst_pmu_power_state_lock);
+
        /* Enable the hardware interrupt */
        pmu_irq_enable();
        return 0;
@@ -735,4 +760,58 @@ static void __exit mid_pci_cleanup(void)
 {
        pci_unregister_driver(&driver);
 }
+
+static int ia_major;
+static int ia_minor;
+
+static int pmu_sfi_parse_oem(struct sfi_table_header *table)
+{
+       struct sfi_table_simple *sb;
+
+       sb = (struct sfi_table_simple *)table;
+       ia_major = (sb->pentry[1] >> 0) & 0xFFFF;
+       ia_minor = (sb->pentry[1] >> 16) & 0xFFFF;
+       printk(KERN_INFO "mrst_pmu: IA FW version v%x.%x\n",
+               ia_major, ia_minor);
+
+       return 0;
+}
+
+static int __init scu_fw_check(void)
+{
+       int ret;
+       u32 fw_version;
+
+       if (!pmu_reg)
+               return 0;       /* this driver didn't probe-out */
+
+       sfi_table_parse("OEMB", NULL, NULL, pmu_sfi_parse_oem);
+
+       if (ia_major < 0x6005 || ia_minor < 0x1525) {
+               WARN(1, "mrst_pmu: IA FW version too old\n");
+               return -1;
+       }
+
+       ret = intel_scu_ipc_command(IPCMSG_FW_REVISION, 0, NULL, 0,
+                                       &fw_version, 1);
+
+       if (ret) {
+               WARN(1, "mrst_pmu: IPC FW version? %d\n", ret);
+       } else {
+               int scu_major = (fw_version >> 8) & 0xFF;
+               int scu_minor = (fw_version >> 0) & 0xFF;
+
+               printk(KERN_INFO "mrst_pmu: firmware v%x\n", fw_version);
+
+               if ((scu_major >= 0xC0) && (scu_minor >= 0x49)) {
+                       printk(KERN_INFO "mrst_pmu: enabling S0i3\n");
+                       mrst_pmu_s0i3_enable = true;
+               } else {
+                       WARN(1, "mrst_pmu: S0i3 disabled, old firmware %X.%X",
+                                       scu_major, scu_minor);
+               }
+       }
+       return 0;
+}
+late_initcall(scu_fw_check);
 module_exit(mid_pci_cleanup);
index 0efebf0..bfbfe64 100644 (file)
@@ -27,9 +27,8 @@
 #define        PCI_VENDOR_CAP_LOG_ID_MASK      0x7F
 #define PCI_VENDOR_CAP_LOG_SS_MASK     0x80
 
-#define SUB_SYS_ALL_D0I2       0xAAAAAAAA
-#define SUB_SYS_ALL_D0I1       0x55555555
-#define S0I3_WAKE_SOURCES      0x0000FFFF
+#define SUB_SYS_ALL_D0I1       0x01155555
+#define S0I3_WAKE_SOURCES      0x00001FFF
 
 #define PM_S0I3_COMMAND                                        \
        ((0 << 31) |    /* Reserved */                  \
@@ -42,6 +41,7 @@
        (0 << 8) |      /* Do not interrupt */          \
        (1 << 0))       /* Set configuration */
 
+#define        LSS_DMI         0
 #define        LSS_SD_HC0      1
 #define        LSS_SD_HC1      2
 #define        LSS_NAND        3
 #define        LSS_DISPLAY     6
 #define        LSS_USB_HC      7
 #define        LSS_USB_OTG     8
-
 #define        LSS_AUDIO       9
 #define        LSS_AUDIO_LPE   9
 #define        LSS_AUDIO_SSP   9
-
 #define        LSS_I2C0        10
 #define        LSS_I2C1        10
 #define        LSS_I2C2        10
 #define        LSS_SPI1        10
 #define        LSS_SPI2        10
 #define        LSS_GPIO        10
-
-#define        LSS_SD_HC2      14
-
-#define MRST_NUM_LSS   16
+#define        LSS_SRAM        11      /* used by SCU, do not touch */
+#define        LSS_SD_HC2      12
+/* LSS hardware bits 15,14,13 are hardwired to 0, thus unusable */
+#define MRST_NUM_LSS   13
 
 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
 
 #define        D0i3    3
 
 #define S0I3_SSS_TARGET        (               \
+       SSMSK(D0i1, LSS_DMI) |          \
        SSMSK(D0i3, LSS_SD_HC0) |       \
        SSMSK(D0i3, LSS_SD_HC1) |       \
+       SSMSK(D0i3, LSS_NAND) |         \
        SSMSK(D0i3, LSS_SD_HC2) |       \
+       SSMSK(D0i3, LSS_IMAGING) |      \
+       SSMSK(D0i3, LSS_SECURITY) |     \
+       SSMSK(D0i3, LSS_DISPLAY) |      \
        SSMSK(D0i3, LSS_USB_HC) |       \
        SSMSK(D0i3, LSS_USB_OTG) |      \
-       SSMSK(D0i3, LSS_AUDIO))
+       SSMSK(D0i3, LSS_AUDIO) |        \
+       SSMSK(D0i1, LSS_I2C0))
 
 /*
  * D0i1 on Langwell is Autonomous Clock Gating (ACG).
  * Enable ACG on every LSS except camera and audio
  */
-#define S0I1_ACG_SSS_TARGET     \
+#define D0I1_ACG_SSS_TARGET     \
        (SUB_SYS_ALL_D0I1 & ~SSMSK(D0i1, LSS_IMAGING) & ~SSMSK(D0i1, LSS_AUDIO))
 
 enum cm_mode {
@@ -146,10 +150,12 @@ static inline void pmu_write_cmd(u32 arg) { writel(arg, &pmu_reg->pm_cmd); }
 static inline void pmu_write_ics(u32 arg) { writel(arg, &pmu_reg->pm_ics); }
 static inline void pmu_write_wkc(u32 arg) { writel(arg, &pmu_reg->pm_wkc[0]); }
 static inline void pmu_write_ssc(u32 arg) { writel(arg, &pmu_reg->pm_ssc[0]); }
-static inline void pmu_write_wssc(u32 arg) { writel(arg, &pmu_reg->pm_wssc[0]); }
+static inline void pmu_write_wssc(u32 arg)
+                                       { writel(arg, &pmu_reg->pm_wssc[0]); }
 
 static inline void pmu_msi_enable(void) { writel(0, &pmu_reg->pm_msi_disable); }
-static inline void pmu_msi_disable(void) { writel(1, &pmu_reg->pm_msi_disable); }
+static inline u32 pmu_msi_is_disabled(void)
+                               { return readl(&pmu_reg->pm_msi_disable); }
 
 union pmu_pm_ics {
        struct {
@@ -220,5 +226,9 @@ union pmu_pm_set_cfg_cmd_t {
        u32 pmu_pm_set_cfg_cmd_value;
 };
 
+#ifdef FUTURE_PATCH
 extern int mrst_s0i3_entry(u32 regval, u32 *regaddr);
+#else
+static inline int mrst_s0i3_entry(u32 regval, u32 *regaddr) { return -1; }
+#endif
 #endif
index 81bd9a3..d99f315 100644 (file)
@@ -83,15 +83,8 @@ static unsigned int lapic_timer_reliable_states = (1 << 1);   /* Default to only
 
 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
 static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state);
-static int intel_mid_idle(struct cpuidle_device *dev, struct cpuidle_state *state);
 
 static struct cpuidle_state *cpuidle_state_table;
-static int (*cpuidle_device_prepare)(struct cpuidle_device *dev);
-
-/*
- * Indicates that this is not a proper MWAIT state
- */
-#define CPUIDLE_FLAG_INTEL_FAKE                0x10000
 
 /*
  * Hardware C-state auto-demotion may not always be optimal.
@@ -214,65 +207,13 @@ static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = {
                .enter = &intel_idle },
 };
 
-#ifdef CONFIG_X86_INTEL_MID
-static struct cpuidle_state mrst_cstates[MWAIT_MAX_NUM_CSTATES] = {
-       { /* MWAIT C0 */ },
-       { /* MWAIT C1 */
-               .name = "ATM-C1",
-               .desc = "MWAIT 0x00",
-               .driver_data = (void *) 0x00,
-               .flags = CPUIDLE_FLAG_TIME_VALID,
-               .exit_latency = 1,
-               .target_residency = 4,
-               .enter = &intel_idle },
-       { /* MWAIT C2 */
-               .name = "ATM-C2",
-               .desc = "MWAIT 0x10",
-               .driver_data = (void *) 0x10,
-               .flags = CPUIDLE_FLAG_TIME_VALID,
-               .exit_latency = 20,
-               .target_residency = 80,
-               .enter = &intel_idle },
-       { /* MWAIT C3 */ },
-       { /* MWAIT C4 */
-               .name = "ATM-C4",
-               .desc = "MWAIT 0x30",
-               .driver_data = (void *) 0x30,
-               .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
-               .exit_latency = 100,
-               .target_residency = 400,
-               .enter = &intel_idle },
-       { /* MWAIT C5 */ },
-       { /* MWAIT C6 */
-               .name = "ATM-C6",
-               .desc = "MWAIT 0x52",
-               .driver_data = (void *) 0x52,
-               .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
-               .exit_latency = 140,
-               .target_residency = 560,
-               .enter = &intel_mid_idle, },
-       { /* MRST S0i3 */
-               .name = "MRST-S0i3",
-               .desc = "MRST S0i3",
-               .driver_data = (void *) -1UL, /* Special */
-               .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED |
-                       CPUIDLE_FLAG_INTEL_FAKE,
-               .exit_latency = 300, /* XXX */
-               .target_residency = 1200, /* XXX */
-               .enter = &intel_mid_idle },
-};
-#warning pri#3 #24 tune mrst_cstates parameters
-#else
-#define mrst_cstates atom_cstates
-#endif
-
 /**
  * intel_idle
  * @dev: cpuidle_device
  * @state: cpuidle state
  *
  */
-int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state)
+static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state)
 {
        unsigned long ecx = 1; /* break on interrupt flag */
        unsigned long eax = (unsigned long)cpuidle_get_statedata(state);
@@ -298,6 +239,9 @@ int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state)
        kt_before = ktime_get_real();
 
        stop_critical_timings();
+#ifndef MODULE
+       trace_power_start(POWER_CSTATE, (eax >> 4) + 1, cpu);
+#endif
        if (!need_resched()) {
 
                __monitor((void *)&current_thread_info()->flags, 0, 0);
@@ -319,69 +263,6 @@ int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state)
        return usec_delta;
 }
 
-/**
- * intel_mid_idle      -       Idle a MID device
- * @dev: cpuidle_device
- * @state: cpuidle state
- *
- * This enters S0i3, C6 or C4 depending on what is currently permitted.
- * C1-C4 are handled via the normal intel_idle entry.
- */
-extern void do_s0i3(void);
-
-int intel_mid_idle(struct cpuidle_device *dev, struct cpuidle_state *state)
-{
-       unsigned long ecx = 1; /* break on interrupt flag */
-       unsigned long eax = (unsigned long)cpuidle_get_statedata(state);
-       ktime_t kt_before, kt_after;
-       s64 usec_delta;
-       int cpu = smp_processor_id();
-
-       local_irq_disable();
-
-       /*
-        * leave_mm() to avoid costly and often unnecessary wakeups
-        * for flushing the user TLB's associated with the active mm.
-        */
-#ifdef CPUIDLE_FLAG_TLB_FLUSHED         
-       if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
-               leave_mm(cpu);
-#endif /* FIXME */
-       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
-
-       kt_before = ktime_get_real();
-
-       stop_critical_timings();
-
-       if (!need_resched()) {
-#ifdef CONFIG_X86_MRST
-               if (eax == -1UL) {
-                       ;//do_s0i3();
-               } else
-#endif         
-               {
-                       /* Conventional MWAIT */
-
-                       __monitor((void *)&current_thread_info()->flags, 0, 0);
-                       smp_mb();
-                       if (!need_resched())
-                               __mwait(eax, ecx);
-               }
-       }
-
-       start_critical_timings();
-
-       kt_after = ktime_get_real();
-       usec_delta = ktime_to_us(ktime_sub(kt_after, kt_before));
-
-       local_irq_enable();
-
-       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
-
-       return usec_delta;
-}
-
-
 static void __setup_broadcast_timer(void *arg)
 {
        unsigned long reason = (unsigned long)arg;
@@ -420,6 +301,36 @@ static void auto_demotion_disable(void *dummy)
        wrmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits);
 }
 
+static int (*intel_idle_platform_prepare)(void);
+
+/*
+ * check for invalid c-states at run-time
+ */
+static int intel_idle_prepare(struct cpuidle_device *dev)
+{
+       int c, invalid_cstates;
+
+       if (*intel_idle_platform_prepare == 0)
+               return 0;
+
+       invalid_cstates = (intel_idle_platform_prepare)();
+
+       for (c = 0; c <= dev->state_count; c++) {
+               int ignored = dev->states[c].flags & CPUIDLE_FLAG_IGNORE;
+               int invalid = (c > max_cstate) || ((1 << c) & invalid_cstates);
+
+               if (invalid) {
+                       if (!ignored)
+                               dev->states[c].flags |= CPUIDLE_FLAG_IGNORE;
+               } else { /* valid */
+                       if (ignored)
+                               dev->states[c].flags &= ~CPUIDLE_FLAG_IGNORE;
+               }
+       }
+
+       return 0;
+}
+
 /*
  * intel_idle_probe()
  */
@@ -471,13 +382,14 @@ static int intel_idle_probe(void)
                cpuidle_state_table = atom_cstates;
                break;
 
+
        case 0x26:      /* 38 - Lincroft Atom Processor */
                cpuidle_state_table = mrst_cstates;
 #ifdef CONFIG_X86_MRST
-               cpuidle_device_prepare = mrst_pmu_validate_cstates;
+               intel_idle_platform_prepare = &mrst_pmu_invalid_cstates;
 #endif
        case 0x27:      /* 39 - Penwell Atom Processor */
-               cpuidle_state_table = mrst_cstates;
+               cpuidle_state_table = atom_cstates;
                auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE;
                break;
 
@@ -543,6 +455,8 @@ static int intel_idle_cpuidle_devices_init(void)
                dev->state_count = 1;
 
                for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) {
+                       int num_substates;
+
                        if (cstate > max_cstate) {
                                printk(PREFIX "max_cstate %d reached\n",
                                        max_cstate);
@@ -550,16 +464,10 @@ static int intel_idle_cpuidle_devices_init(void)
                        }
 
                        /* does the state exist in CPUID.MWAIT? */
-                       if (!(cpuidle_state_table[cstate].flags &
-                             CPUIDLE_FLAG_INTEL_FAKE)) {
-                               int num_substates;
-
-                               num_substates = (mwait_substates >> (cstate*4))
-                                       & MWAIT_SUBSTATE_MASK;
-                               if (num_substates == 0)
-                                       continue;
-                       }
-
+                       num_substates = (mwait_substates >> ((cstate) * 4))
+                                               & MWAIT_SUBSTATE_MASK;
+                       if (num_substates == 0)
+                               continue;
                        /* is the state not enabled? */
                        if (cpuidle_state_table[cstate].enter == NULL) {
                                /* does the driver not know about the state? */
@@ -583,7 +491,8 @@ static int intel_idle_cpuidle_devices_init(void)
                }
 
                dev->cpu = i;
-               dev->prepare = cpuidle_device_prepare;
+               dev->prepare = &intel_idle_prepare;
+
                if (cpuidle_register_device(dev)) {
                        pr_debug(PREFIX "cpuidle_register_device %d failed!\n",
                                 i);
@@ -642,7 +551,7 @@ static void __exit intel_idle_exit(void)
 module_init(intel_idle_init);
 module_exit(intel_idle_exit);
 
-module_param(max_cstate, int, 0444);
+module_param(max_cstate, int, 0644);
 
 MODULE_AUTHOR("Len Brown <len.brown@intel.com>");
 MODULE_DESCRIPTION("Cpuidle driver for Intel Hardware v" INTEL_IDLE_VERSION);
index f3cd85f..3df063d 100644 (file)
@@ -22,6 +22,8 @@
 #include <linux/kernel.h>
 #include <linux/cpuidle.h>
 #include <linux/clockchips.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
 #include <linux/hrtimer.h>     /* ktime_get_real() */
 #include <linux/pci.h>
 #include <linux/cpu.h>
@@ -42,6 +44,68 @@ static u64 *wakeup_ptr;
 static phys_addr_t s0i3_trampoline_phys;
 static void *s0i3_trampoline_base;
 
+static u64 mrst_s0i3_entry_count;
+static u64 mrst_s0i3_exit_count;
+static u64 mrst_s0i3_fail_count;
+
+static ktime_t kt_s0i3_enter;
+static u64 s0i3_ts_end(ktime_t kt_start,
+       u64 *us_delta_minp, u64 *us_delta_maxp, u64 *us_delta_totalp);
+
+static u64 s0i3_entry_us_min;  // debug
+static u64 s0i3_entry_us_max;  // debug
+static u64 s0i3_entry_us_total;        // debug
+
+static u64 s0i3_poll_msi_disabled_us_min;      // debug
+static u64 s0i3_poll_msi_disabled_us_max;      // debug
+static u64 s0i3_poll_msi_disabled_us_total;    // debug
+
+static u32 s0i3_poll_msi_disabled_calls;
+static u32 s0i3_poll_msi_disabled_cnt;
+static u32 s0i3_poll_msi_disabled_max;
+
+static ktime_t s0i3_exit_end_kt;       // debug
+
+static ktime_t s0i3_exit_start_kt; // debug
+static ktime_t s0i3_exit_restore_msrs_kt;      // debug
+static ktime_t s0i3_exit_restore_processor_state_kt;   // debug
+static ktime_t s0i3_exit_restore_lapic_kt;     // debug
+static ktime_t s0i3_exit_poke_kt;      // debug
+
+
+static u64 s0i3_us_min;
+static u64 s0i3_us_max;
+static u64 s0i3_us_total;
+
+/*
+ * s0i3_ts_end()
+ * simplify maintaining min, max, average us timestamps
+ *
+ * call at end of timestamp range
+ * take end timestamp, calculate us_delta
+ * update min, max, total
+ * later: average = total/count
+ * return: us_delta
+ */
+static u64 s0i3_ts_end(ktime_t kt_start,
+       u64 *us_delta_minp, u64 *us_delta_maxp, u64 *us_delta_totalp)
+{
+       u64 us_delta;
+       ktime_t kt_now = ktime_get_real();
+
+       us_delta = ktime_to_us(ktime_sub(kt_now, kt_start));
+
+       if (us_delta > *us_delta_maxp)
+               *us_delta_maxp = us_delta;
+
+       if (us_delta < *us_delta_minp || *us_delta_minp == 0)
+               *us_delta_minp = us_delta;
+
+       *us_delta_totalp += us_delta;
+
+       return us_delta;
+}
+
 /*
  * List of MSRs to be saved/restored, *other* than what is handled by
  * * save_processor_state/restore_processor_state.  * This is
@@ -78,13 +142,6 @@ static void s0i3_save_msrs(void)
                s0i3_msr_data[i].q = native_read_msr(s0i3_msr_list[i]);
 }
 
-static void s0i3_adjust_msrs(void)
-{
-       native_write_msr(MSR_NHM_SNB_PKG_CST_CFG_CTL,
-                        s0i3_msr_data[0].l & ~(1 << 25),
-                        s0i3_msr_data[0].h);
-}
-
 static void s0i3_restore_msrs(void)
 {
        int i;
@@ -156,8 +213,8 @@ static void s0i3_poke_other_cpu(void)
 {
        const struct init_wakeup_delays delays = {
                .assert_init    = 0,
-               .icr_accept     = 30,
-               .cpu_accept     = 20,
+               .icr_accept     = 0,
+               .cpu_accept     = 0,
        };
 
        wakeup_secondary_cpu_via_init_delays(1, s0i3_trampoline_phys, &delays);
@@ -168,51 +225,103 @@ static inline void s0i3_update_wake_pointer(void)
        *wakeup_ptr = virt_to_phys(mrst_s0i3_resume);
 }
 
-noinline void do_s0i3(void)
+static int s0i3_sfi_parse_wake(struct sfi_table_header *table)
 {
+       struct sfi_table_simple *sb;
+       struct sfi_wake_table_entry *pentry;
+       int num;
+
+       sb = (struct sfi_table_simple *)table;
+       pentry = (struct sfi_wake_table_entry *)sb->pentry;
+       num = SFI_GET_NUM_ENTRIES(sb, struct sfi_wake_table_entry);
+
+       if (num < 1)            /* num == 1? */
+               return -EINVAL;
+
+       wakeup_ptr = ioremap_cache(pentry->phys_addr, 8);
+
+       printk(KERN_DEBUG "s0i3: wakeup pointer at 0x%llx mapped to %p\n",
+              pentry->phys_addr, wakeup_ptr);
+
+       return wakeup_ptr ? 0 : -ENOMEM;
+}
+
+/* n.b. match intel_idle.c */
+#define CPUIDLE_FLAG_TLB_FLUSHED        0x10000
+
+/**
+ * mrst_s0i3
+ * @dev: cpuidle_device
+ * @state: cpuidle state
+ *
+ * Enter S0i3.
+ */
+int mrst_s0i3(struct cpuidle_device *dev, struct cpuidle_state *state)
+{
+       s64 us_delta;
+       int cpu = smp_processor_id();
+
+       local_irq_disable();
+
+       /*
+        * leave_mm() to avoid costly and often unnecessary wakeups
+        * for flushing the user TLB's associated with the active mm.
+        */
+       if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
+               leave_mm(cpu);
+
+       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
+
+       kt_s0i3_enter = ktime_get_real();
+
+       stop_critical_timings();
+       trace_power_start(POWER_CSTATE, 7, 0);
+
+       mrst_s0i3_entry_count++;
        s0i3_update_wake_pointer();
-       mrst_pmu_disable_msi(); /* disable MSIs before save LAPIC */
        s0i3_save_lapic();
        s0i3_save_msrs();
        save_processor_state();
-       s0i3_adjust_msrs();
-       mrst_pmu_s0i3_prepare();
        if (mrst_pmu_s0i3_entry()) {
+
+               s0i3_exit_start_kt = ktime_get_real(); // debug
+
                s0i3_restore_msrs();
+               s0i3_exit_restore_msrs_kt = ktime_get_real(); // debug
+
                restore_processor_state();
+               s0i3_exit_restore_processor_state_kt = ktime_get_real(); // debug
+
                s0i3_restore_lapic();
+               s0i3_exit_restore_lapic_kt = ktime_get_real(); // debug
 
-               /* The PMU command executed correctly, so no longer pending */
-               mrst_pmu_pending_set(false);
+               mrst_s0i3_exit_count++;
 
+               s0i3_poke_other_cpu();   /* 12 uS */
+               s0i3_exit_poke_kt = ktime_get_real();
 
-               s0i3_poke_other_cpu();
+               mrst_pmu_enable_msi();
+
+               /* exit latency */
+               s0i3_exit_end_kt = ktime_get_real();
+
+               /* S0i3 Residency */
+               us_delta = s0i3_ts_end(kt_s0i3_enter, &s0i3_us_min,
+                       &s0i3_us_max, &s0i3_us_total);
        } else {
+               mrst_s0i3_fail_count++;
                /* save_processor_state() did execute kernel_fpu_begin() */
                kernel_fpu_end();
+               us_delta = 0;    /* tell cpudile not to add any S0i3-time */
        }
-       mrst_pmu_enable_msi();
-}
-
-static int s0i3_sfi_parse_wake(struct sfi_table_header *table)
-{
-       struct sfi_table_simple *sb;
-       struct sfi_wake_table_entry *pentry;
-       int num;
 
-       sb = (struct sfi_table_simple *)table;
-       pentry = (struct sfi_wake_table_entry *)sb->pentry;
-       num = SFI_GET_NUM_ENTRIES(sb, struct sfi_wake_table_entry);
+       start_critical_timings();
 
-       if (num < 1)            /* num == 1? */
-               return -EINVAL;
+       local_irq_enable();
 
-       wakeup_ptr = ioremap_cache(pentry->phys_addr, 8);
+       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
 
-       printk(KERN_DEBUG "s0i3: wakeup pointer at 0x%llx mapped to %p\n",
-              pentry->phys_addr, wakeup_ptr);
-
-       return wakeup_ptr ? 0 : -ENOMEM;
+       return us_delta;
 }
 
 /*
@@ -227,9 +336,6 @@ void __init mrst_reserve_memory(void)
        phys_addr_t mem;
        size_t size;
 
-       if (boot_cpu_data.x86 != 6 || boot_cpu_data.x86_model != 0x26)
-               return;
-
        size = s0i3_trampoline_data_end - s0i3_trampoline_data;
        size = ALIGN(size, PAGE_SIZE);
 
@@ -242,14 +348,137 @@ void __init mrst_reserve_memory(void)
        s0i3_trampoline_base = __va(mem);
        memblock_x86_reserve_range(mem, mem + size, "S0I3");
 }
+#ifdef CONFIG_DEBUG_FS
+static u64 *s0i3_us_addr;
+
+/*
+ * After we send an S0i3-enter command to the SCU,
+ * we poll the SCU's MSI-DISABLED flag to be sure
+ * the SCU is ready to receive our MWAIT status.
+ *
+ * Without this check, it is possible for the SCU to process our MWAIT
+ * (on ARC IRQ level2) before our S0i3 command (on ARC IRQ level1),
+ * which would cause the SCU to wait for 1ms for our (lost) MWAIT results
+ * and bail out with an E501.
+ */
+
+/*
+ * Since WBINVD preceeds checking for MSI-DISABLED,
+ * we typically see MSI-DISABLEd on the 1st read.
+ * If do not see MWAIT-DISABLED in 200 reads, we will never see it.
+ * For then the SCU has dropped our command, the SCU busy bit is stuck set,
+ * and the system will likely die due to inability to send LSS commands
+ * to the SCU to un-clock-gate devices.
+ */
+#define MAX_MSI_DISABLED_POLLS  200
+
+asmlinkage void mrst_s0i3_wait_for_msi_disabled(void)
+{
+       int reads;
+       static ktime_t kt_before; // debug
+
+       s0i3_ts_end(kt_s0i3_enter, &s0i3_entry_us_min,
+                       &s0i3_entry_us_max, &s0i3_entry_us_total); // debug
+
+       kt_before = ktime_get_real();   // debug
+
+       s0i3_poll_msi_disabled_calls++; // todo: redundant with entry_count?
+       for (reads = 1;  reads <= MAX_MSI_DISABLED_POLLS; ++reads) {
+               s0i3_poll_msi_disabled_cnt++;
+
+               if (mrst_pmu_msi_is_disabled()) {
+                       s0i3_ts_end(kt_before, &s0i3_poll_msi_disabled_us_min,
+                               &s0i3_poll_msi_disabled_us_max, &s0i3_poll_msi_disabled_us_total); // debug
+                       return;
+               }
+
+               if (reads > s0i3_poll_msi_disabled_max)
+                       s0i3_poll_msi_disabled_max = reads;
+       }
+       printk(KERN_EMERG FW_BUG "SCU dropped S0i3 command\n");
+}
+
+static int mrst_s0i3_debugfs_show(struct seq_file *s, void *unused)
+{
+       u64 us_avg;
+
+       seq_printf(s, "entry_count\t%8lld\n", mrst_s0i3_entry_count);
+       if (mrst_s0i3_entry_count) {
+               us_avg = s0i3_entry_us_total;
+               do_div(us_avg, mrst_s0i3_entry_count);
+       } else
+               us_avg = 0;
+       seq_printf(s, "entry_us_max %2lld\n", s0i3_entry_us_max);
+       seq_printf(s, "entry_us_avg %2lld\n", us_avg);
+       seq_printf(s, "entry_us_min %2lld\n", s0i3_entry_us_min);
+
+       if (s0i3_poll_msi_disabled_calls) {
+               us_avg = s0i3_poll_msi_disabled_cnt / s0i3_poll_msi_disabled_calls;
+       } else
+               us_avg = 0;
+
+       seq_printf(s, "poll_msi_disabled_calls  %8d\n", s0i3_poll_msi_disabled_calls);
+       seq_printf(s, "poll_msi_disabled_avg    %8lld\n", us_avg);
+       seq_printf(s, "poll_msi_disabled_max    %8d\n", s0i3_poll_msi_disabled_max);
+
+       if (mrst_s0i3_entry_count) {
+               us_avg = s0i3_poll_msi_disabled_us_total;
+               do_div(us_avg, mrst_s0i3_entry_count);
+       } else
+               us_avg = 0;
+       seq_printf(s, "poll_msi_disabled_us_max %2lld\n", s0i3_poll_msi_disabled_us_max);
+       seq_printf(s, "poll_msi_disabled_us_avg %2lld\n", us_avg);
+       seq_printf(s, "poll_msi_disabled_us_min %2lld\n", s0i3_poll_msi_disabled_us_min);
+
+       seq_printf(s, "exit_count\t%8lld\n", mrst_s0i3_exit_count);
+
+       seq_printf(s, "exit_us %3lld\n",
+               ktime_to_us(ktime_sub(s0i3_exit_end_kt, s0i3_exit_start_kt)));
+
+       seq_printf(s, " exit_restore_msrs %3lld\n",
+               ktime_to_us(ktime_sub(s0i3_exit_restore_msrs_kt, s0i3_exit_start_kt)));
+       seq_printf(s, " exit_processor_restore_state %3lld\n",
+               ktime_to_us(ktime_sub(s0i3_exit_restore_processor_state_kt, s0i3_exit_restore_msrs_kt)));
+
+       seq_printf(s, " exit_restore_lapic %3lld\n",
+               ktime_to_us(ktime_sub(s0i3_exit_restore_lapic_kt, s0i3_exit_restore_processor_state_kt)));
+       seq_printf(s, " exit_poke_cpu %3lld\n",
+               ktime_to_us(ktime_sub(s0i3_exit_poke_kt, s0i3_exit_restore_lapic_kt)));
+       seq_printf(s, " exit_enable_msi %3lld\n",
+               ktime_to_us(ktime_sub(s0i3_exit_end_kt, s0i3_exit_poke_kt)));
+
+       if (mrst_s0i3_exit_count) {
+               us_avg = s0i3_us_total;
+               do_div(us_avg, mrst_s0i3_exit_count);
+       } else
+               us_avg = 0;
+       seq_printf(s, "Residency us_max %6lld\n", s0i3_us_max);
+       seq_printf(s, "Residency us_avg %6lld\n", us_avg);
+       seq_printf(s, "Residency us_min %6lld\n", s0i3_us_min);
+
+
+       seq_printf(s, "fail_count\t%8lld\n", mrst_s0i3_fail_count);
+
+       return 0;
+}
+
+static int mrst_s0i3_debugfs_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, mrst_s0i3_debugfs_show, NULL);
+}
+
+static const struct file_operations s0i3_debugfs_fops = {
+        .open           = mrst_s0i3_debugfs_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+};
+#endif
 
 static int __init s0i3_prepare(void)
 {
        int err;
 
-       if (boot_cpu_data.x86 != 6 || boot_cpu_data.x86_model != 0x26)
-               return -EOPNOTSUPP;
-
        wakeup_ptr = NULL;
        err = sfi_table_parse(SFI_SIG_WAKE, NULL, NULL, s0i3_sfi_parse_wake);
        if (err)
@@ -259,6 +488,16 @@ static int __init s0i3_prepare(void)
        memcpy(s0i3_trampoline_base, s0i3_trampoline_data,
               s0i3_trampoline_data_end - s0i3_trampoline_data);
 
+#ifdef CONFIG_DEBUG_FS
+       /* /sys/kernel/debug/mrst_s0i3_residency */
+       s0i3_us_addr = ioremap_nocache((resource_size_t)0xffffeef0, sizeof(u64));
+       debugfs_create_u64("mrst_s0i3_residency", S_IFREG | S_IWUSR | S_IRUGO,
+               NULL, (u64 *) s0i3_us_addr);
+
+       /* /sys/kernel/debug/mrst_s0i3 */
+       debugfs_create_file("mrst_s0i3", S_IFREG | S_IRUGO,
+               NULL, NULL, &s0i3_debugfs_fops);
+#endif
        return 0;
 }
 
index 9c174bf..1023c74 100644 (file)
@@ -51,6 +51,8 @@ ENTRY(mrst_s0i3_entry)
 
        pushl   %gs
        pushl   %fs
+       pushl   %es
+       pushl   %ds
 
        movl    %cr3, %eax
        pushl   %eax
@@ -83,10 +85,9 @@ ENTRY(mrst_s0i3_entry)
 
        leal    (-__PAGE_OFFSET)(%esp), %eax
        movl    %eax, mrst_s0i3_resume_stack
-       lock
-       incl    mrst_s0i3_entry_ctr
 
-       wbinvd
+       movl    %esi, (%edi)            /* Set the PM_CMD register for S0i3 */
+       call    mrst_s0i3_wait_for_msi_disabled
 
        movl    %esp, %eax              /* As good as anything... */
        xorl    %edx, %edx
@@ -95,15 +96,11 @@ ENTRY(mrst_s0i3_entry)
 
        movl    $MRST_C6_HINTS_EAX, %eax
        movl    $MRST_C6_HINTS_ECX, %ecx
-       movl    %esi, (%edi)            /* Set the PM_CMD register for S0i3 */
        mwait
 
        /* If MWAIT wakes us up, assume something happened... */
        movl    %ebp, %esp
 
-       lock
-       incl    mrst_s0i3_fail_ctr
-
        xorl    %eax, %eax              /* Not really S0i3 */
        popl    %ebx
        popl    %ebp
@@ -118,9 +115,6 @@ ENDPROC(mrst_s0i3_entry)
 ENTRY(mrst_s0i3_resume)
        cli
 
-       lock
-       incl    pa(mrst_s0i3_exit_ctr)
-
        movl    pa(mrst_s0i3_resume_stack), %esp
        popfl
        lgdtl   2(%esp)                 /* Physical GDT pointer */
@@ -129,7 +123,6 @@ ENTRY(mrst_s0i3_resume)
        movl    $(__KERNEL_DS), %eax
        movl    %eax, %ss
        movl    %eax, %ds
-       movl    %eax, %es
 
        popl    %eax
        popl    %edx
@@ -166,6 +159,8 @@ ENTRY(mrst_s0i3_resume)
        andb    $~0x02, (GDT_ENTRY_TSS*8+5)(%ebx)
        ltr     %ax                     /* Set the TSS */
 
+       popl    %ds
+       popl    %es
        popl    %fs
        popl    %gs
 
@@ -195,6 +190,12 @@ ENDPROC(mrst_s0i3_resume)
 ENTRY(s0i3_trampoline_data)
 r_base = .
        cli
+       movw    %cs, %ax
+       movw    %ax, %ss
+       movw    %ax, %ds
+       movw    %ax, %es
+       movw    %ax, %fs
+       movw    %ax, %gs
 
        /* Disable autodemote */
        movl    $MSR_NHM_SNB_PKG_CST_CFG_CTL, %ecx
@@ -227,12 +228,3 @@ ENTRY(s0i3_trampoline_data_end)
 mrst_s0i3_resume_stack:
        .space  4
 END(mrst_s0i3_resume_stack)
-mrst_s0i3_entry_ctr:
-       .space  4
-END(mrst_s0i3_entry_ctr)
-mrst_s0i3_exit_ctr:
-       .space  4
-END(mrst_s0i3_exit_ctr)
-mrst_s0i3_fail_ctr:
-       .space  4
-END(mrst_s0i3_fail_ctr)