Merge branch 'timers/posix-cpu-timers-for-tglx' of

author Thomas Gleixner <tglx@linutronix.de>

Thu, 4 Jul 2013 21:11:22 +0000 (23:11 +0200)

committer Thomas Gleixner <tglx@linutronix.de>

Thu, 4 Jul 2013 21:11:22 +0000 (23:11 +0200)
author Thomas Gleixner <tglx@linutronix.de>
Thu, 4 Jul 2013 21:11:22 +0000 (23:11 +0200)
committer Thomas Gleixner <tglx@linutronix.de>
Thu, 4 Jul 2013 21:11:22 +0000 (23:11 +0200)
diff --combined arch/arm/Kconfig

index 53d3a35,136f263..b02e6bb
--- 1/arch/arm/Kconfig
--- 2/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@@ -14,7 -14,6 +14,7 @@@ config AR
         select GENERIC_IRQ_PROBE
         select GENERIC_IRQ_SHOW
         select GENERIC_PCI_IOMAP
+ +      select GENERIC_SCHED_CLOCK
         select GENERIC_SMP_IDLE_THREAD
         select GENERIC_IDLE_POLL_SETUP
         select GENERIC_STRNCPY_FROM_USER
@@@ -1088,6 -1087,20 +1088,20 @@@ if !MM
   source "arch/arm/Kconfig-nommu"
   endif
   
+ config PJ4B_ERRATA_4742
+       bool "PJ4B Errata 4742: IDLE Wake Up Commands can Cause the CPU Core to Cease Operation"
+       depends on CPU_PJ4B && MACH_ARMADA_370
+       default y
+       help
+         When coming out of either a Wait for Interrupt (WFI) or a Wait for
+         Event (WFE) IDLE states, a specific timing sensitivity exists between
+         the retiring WFI/WFE instructions and the newly issued subsequent
+         instructions.  This sensitivity can result in a CPU hang scenario.
+         Workaround:
+         The software must insert either a Data Synchronization Barrier (DSB)
+         or Data Memory Barrier (DMB) command immediately after the WFI/WFE
+         instruction
+ 
   config ARM_ERRATA_326103
         bool "ARM errata: FSR write bit incorrect on a SWP to read-only memory"
         depends on CPU_V6
@@@ -1190,6 -1203,16 +1204,16 @@@ config PL310_ERRATA_58836
            is not correctly implemented in PL310 as clean lines are not
            invalidated as a result of these operations.
   
+ config ARM_ERRATA_643719
+       bool "ARM errata: LoUIS bit field in CLIDR register is incorrect"
+       depends on CPU_V7 && SMP
+       help
+         This option enables the workaround for the 643719 Cortex-A9 (prior to
+         r1p0) erratum. On affected cores the LoUIS bit field of the CLIDR
+         register returns zero when it should return one. The workaround
+         corrects this value, ensuring cache maintenance operations which use
+         it behave as intended and avoiding data corruption.
+ 
   config ARM_ERRATA_720789
         bool "ARM errata: TLBIASIDIS and TLBIMVAIS operations can broadcast a faulty ASID"
         depends on CPU_V7
@@@ -2007,7 -2030,7 +2031,7 @@@ config XIP_PHYS_ADD
   
   config KEXEC
         bool "Kexec system call (EXPERIMENTAL)"
-       depends on (!SMP || HOTPLUG_CPU)
+       depends on (!SMP || PM_SLEEP_SMP)
         help
           kexec is a system call that implements the ability to shutdown your
           current kernel, and to start another kernel.  It is like a reboot
diff --combined arch/x86/kernel/kvmclock.c

index 0db81ab,3dd37eb..1f354f4
--- 1/arch/x86/kernel/kvmclock.c
--- 2/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@@ -48,9 -48,10 +48,9 @@@ static struct pvclock_wall_clock wall_c
    * have elapsed since the hypervisor wrote the data. So we try to account for
    * that with system time
    */
- -static unsigned long kvm_get_wallclock(void)
+ +static void kvm_get_wallclock(struct timespec *now)
   {
         struct pvclock_vcpu_time_info *vcpu_time;
- -      struct timespec ts;
         int low, high;
         int cpu;
   
@@@ -63,12 -64,14 +63,12 @@@
         cpu = smp_processor_id();
   
         vcpu_time = &hv_clock[cpu].pvti;
- -      pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
+ +      pvclock_read_wallclock(&wall_clock, vcpu_time, now);
   
         preempt_enable();
- -
- -      return ts.tv_sec;
   }
   
- -static int kvm_set_wallclock(unsigned long now)
+ +static int kvm_set_wallclock(const struct timespec *now)
   {
         return -1;
   }
@@@ -239,6 -242,7 +239,7 @@@ void __init kvmclock_init(void
         if (!mem)
                 return;
         hv_clock = __va(mem);
+       memset(hv_clock, 0, size);
   
         if (kvm_register_clock("boot clock")) {
                 hv_clock = NULL;
diff --combined arch/x86/platform/efi/efi.c

index dd3b825,d2fbced..90f6ed1
--- 1/arch/x86/platform/efi/efi.c
--- 2/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@@ -42,7 -42,6 +42,6 @@@
   #include <linux/io.h>
   #include <linux/reboot.h>
   #include <linux/bcd.h>
- #include <linux/ucs2_string.h>
   
   #include <asm/setup.h>
   #include <asm/efi.h>
@@@ -54,12 -53,12 +53,12 @@@
   
   #define EFI_DEBUG     1
   
- /*
-  * There's some additional metadata associated with each
-  * variable. Intel's reference implementation is 60 bytes - bump that
-  * to account for potential alignment constraints
-  */
- #define VAR_METADATA_SIZE 64
+ #define EFI_MIN_RESERVE 5120
+ 
+ #define EFI_DUMMY_GUID \
+       EFI_GUID(0x4424ac57, 0xbe4b, 0x47dd, 0x9e, 0x97, 0xed, 0x50, 0xf0, 0x9f, 0x92, 0xa9)
+ 
+ static efi_char16_t efi_dummy_name[6] = { 'D', 'U', 'M', 'M', 'Y', 0 };
   
   struct efi __read_mostly efi = {
         .mps        = EFI_INVALID_TABLE_ADDR,
@@@ -79,13 -78,6 +78,6 @@@ struct efi_memory_map memmap
   static struct efi efi_phys __initdata;
   static efi_system_table_t efi_systab __initdata;
   
- static u64 efi_var_store_size;
- static u64 efi_var_remaining_size;
- static u64 efi_var_max_var_size;
- static u64 boot_used_size;
- static u64 boot_var_size;
- static u64 active_size;
- 
   unsigned long x86_efi_facility;
   
   /*
@@@ -188,53 -180,8 +180,8 @@@ static efi_status_t virt_efi_get_next_v
                                                efi_char16_t *name,
                                                efi_guid_t *vendor)
   {
-       efi_status_t status;
-       static bool finished = false;
-       static u64 var_size;
- 
-       status = efi_call_virt3(get_next_variable,
-                               name_size, name, vendor);
- 
-       if (status == EFI_NOT_FOUND) {
-               finished = true;
-               if (var_size < boot_used_size) {
-                       boot_var_size = boot_used_size - var_size;
-                       active_size += boot_var_size;
-               } else {
-                       printk(KERN_WARNING FW_BUG  "efi: Inconsistent initial sizes\n");
-               }
-       }
- 
-       if (boot_used_size && !finished) {
-               unsigned long size;
-               u32 attr;
-               efi_status_t s;
-               void *tmp;
- 
-               s = virt_efi_get_variable(name, vendor, &attr, &size, NULL);
- 
-               if (s != EFI_BUFFER_TOO_SMALL || !size)
-                       return status;
- 
-               tmp = kmalloc(size, GFP_ATOMIC);
- 
-               if (!tmp)
-                       return status;
- 
-               s = virt_efi_get_variable(name, vendor, &attr, &size, tmp);
- 
-               if (s == EFI_SUCCESS && (attr & EFI_VARIABLE_NON_VOLATILE)) {
-                       var_size += size;
-                       var_size += ucs2_strsize(name, 1024);
-                       active_size += size;
-                       active_size += VAR_METADATA_SIZE;
-                       active_size += ucs2_strsize(name, 1024);
-               }
- 
-               kfree(tmp);
-       }
- 
-       return status;
+       return efi_call_virt3(get_next_variable,
+                             name_size, name, vendor);
   }
   
   static efi_status_t virt_efi_set_variable(efi_char16_t *name,
@@@ -243,34 -190,9 +190,9 @@@
                                           unsigned long data_size,
                                           void *data)
   {
-       efi_status_t status;
-       u32 orig_attr = 0;
-       unsigned long orig_size = 0;
- 
-       status = virt_efi_get_variable(name, vendor, &orig_attr, &orig_size,
-                                      NULL);
- 
-       if (status != EFI_BUFFER_TOO_SMALL)
-               orig_size = 0;
- 
-       status = efi_call_virt5(set_variable,
-                               name, vendor, attr,
-                               data_size, data);
- 
-       if (status == EFI_SUCCESS) {
-               if (orig_size) {
-                       active_size -= orig_size;
-                       active_size -= ucs2_strsize(name, 1024);
-                       active_size -= VAR_METADATA_SIZE;
-               }
-               if (data_size) {
-                       active_size += data_size;
-                       active_size += ucs2_strsize(name, 1024);
-                       active_size += VAR_METADATA_SIZE;
-               }
-       }
- 
-       return status;
+       return efi_call_virt5(set_variable,
+                             name, vendor, attr,
+                             data_size, data);
   }
   
   static efi_status_t virt_efi_query_variable_info(u32 attr,
@@@ -352,9 -274,8 +274,9 @@@ static efi_status_t __init phys_efi_get
         return status;
   }
   
- -int efi_set_rtc_mmss(unsigned long nowtime)
+ +int efi_set_rtc_mmss(const struct timespec *now)
   {
+ +      unsigned long nowtime = now->tv_sec;
         efi_status_t    status;
         efi_time_t      eft;
         efi_time_cap_t  cap;
@@@ -389,7 -310,7 +311,7 @@@
         return 0;
   }
   
- -unsigned long efi_get_time(void)
+ +void efi_get_time(struct timespec *now)
   {
         efi_status_t status;
         efi_time_t eft;
@@@ -399,9 -320,8 +321,9 @@@
         if (status != EFI_SUCCESS)
                 pr_err("Oops: efitime: can't read time!\n");
   
- -      return mktime(eft.year, eft.month, eft.day, eft.hour,
- -                    eft.minute, eft.second);
+ +      now->tv_sec = mktime(eft.year, eft.month, eft.day, eft.hour,
+ +                           eft.minute, eft.second);
+ +      now->tv_nsec = 0;
   }
   
   /*
@@@ -788,9 -708,6 +710,6 @@@ void __init efi_init(void
         char vendor[100] = "unknown";
         int i = 0;
         void *tmp;
-       struct setup_data *data;
-       struct efi_var_bootdata *efi_var_data;
-       u64 pa_data;
   
   #ifdef CONFIG_X86_32
         if (boot_params.efi_info.efi_systab_hi ||
@@@ -808,22 -725,6 +727,6 @@@
         if (efi_systab_init(efi_phys.systab))
                 return;
   
-       pa_data = boot_params.hdr.setup_data;
-       while (pa_data) {
-               data = early_ioremap(pa_data, sizeof(*efi_var_data));
-               if (data->type == SETUP_EFI_VARS) {
-                       efi_var_data = (struct efi_var_bootdata *)data;
- 
-                       efi_var_store_size = efi_var_data->store_size;
-                       efi_var_remaining_size = efi_var_data->remaining_size;
-                       efi_var_max_var_size = efi_var_data->max_var_size;
-               }
-               pa_data = data->next;
-               early_iounmap(data, sizeof(*efi_var_data));
-       }
- 
-       boot_used_size = efi_var_store_size - efi_var_remaining_size;
- 
         set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility);
   
         /*
@@@ -1087,6 -988,13 +990,13 @@@ void __init efi_enter_virtual_mode(void
                 runtime_code_page_mkexec();
   
         kfree(new_memmap);
+ 
+       /* clean DUMMY object */
+       efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID,
+                        EFI_VARIABLE_NON_VOLATILE |
+                        EFI_VARIABLE_BOOTSERVICE_ACCESS |
+                        EFI_VARIABLE_RUNTIME_ACCESS,
+                        0, NULL);
   }
   
   /*
@@@ -1138,33 -1046,70 +1048,70 @@@ efi_status_t efi_query_variable_store(u
         efi_status_t status;
         u64 storage_size, remaining_size, max_size;
   
+       if (!(attributes & EFI_VARIABLE_NON_VOLATILE))
+               return 0;
+ 
         status = efi.query_variable_info(attributes, &storage_size,
                                          &remaining_size, &max_size);
         if (status != EFI_SUCCESS)
                 return status;
   
-       if (!max_size && remaining_size > size)
-               printk_once(KERN_ERR FW_BUG "Broken EFI implementation"
-                           " is returning MaxVariableSize=0\n");
         /*
          * Some firmware implementations refuse to boot if there's insufficient
          * space in the variable store. We account for that by refusing the
          * write if permitting it would reduce the available space to under
-        * 50%. However, some firmware won't reclaim variable space until
-        * after the used (not merely the actively used) space drops below
-        * a threshold. We can approximate that case with the value calculated
-        * above. If both the firmware and our calculations indicate that the
-        * available space would drop below 50%, refuse the write.
+        * 5KB. This figure was provided by Samsung, so should be safe.
          */
+       if ((remaining_size - size < EFI_MIN_RESERVE) &&
+               !efi_no_storage_paranoia) {
+ 
+               /*
+                * Triggering garbage collection may require that the firmware
+                * generate a real EFI_OUT_OF_RESOURCES error. We can force
+                * that by attempting to use more space than is available.
+                */
+               unsigned long dummy_size = remaining_size + 1024;
+               void *dummy = kzalloc(dummy_size, GFP_ATOMIC);
+ 
+               if (!dummy)
+                       return EFI_OUT_OF_RESOURCES;
+ 
+               status = efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID,
+                                         EFI_VARIABLE_NON_VOLATILE |
+                                         EFI_VARIABLE_BOOTSERVICE_ACCESS |
+                                         EFI_VARIABLE_RUNTIME_ACCESS,
+                                         dummy_size, dummy);
+ 
+               if (status == EFI_SUCCESS) {
+                       /*
+                        * This should have failed, so if it didn't make sure
+                        * that we delete it...
+                        */
+                       efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID,
+                                        EFI_VARIABLE_NON_VOLATILE |
+                                        EFI_VARIABLE_BOOTSERVICE_ACCESS |
+                                        EFI_VARIABLE_RUNTIME_ACCESS,
+                                        0, dummy);
+               }
+ 
+               kfree(dummy);
   
-       if (!storage_size || size > remaining_size ||
-           (max_size && size > max_size))
-               return EFI_OUT_OF_RESOURCES;
+               /*
+                * The runtime code may now have triggered a garbage collection
+                * run, so check the variable info again
+                */
+               status = efi.query_variable_info(attributes, &storage_size,
+                                                &remaining_size, &max_size);
   
-       if (!efi_no_storage_paranoia &&
-           ((active_size + size + VAR_METADATA_SIZE > storage_size / 2) &&
-            (remaining_size - size < storage_size / 2)))
-               return EFI_OUT_OF_RESOURCES;
+               if (status != EFI_SUCCESS)
+                       return status;
+ 
+               /*
+                * There still isn't enough room, so return an error
+                */
+               if (remaining_size - size < EFI_MIN_RESERVE)
+                       return EFI_OUT_OF_RESOURCES;
+       }
   
         return EFI_SUCCESS;
   }
diff --combined init/Kconfig

index 1a3f933,2d9b831..68174a5
--- 1/init/Kconfig
--- 2/init/Kconfig
+++ b/init/Kconfig
@@@ -431,6 -431,7 +431,7 @@@ choic
   config TREE_RCU
         bool "Tree-based hierarchical RCU"
         depends on !PREEMPT && SMP
+       select IRQ_WORK
         help
           This option selects the RCU implementation that is
           designed for very large SMP system with hundreds or
@@@ -757,9 -758,6 +758,9 @@@ config LOG_BUF_SHIF
   config HAVE_UNSTABLE_SCHED_CLOCK
         bool
   
+ +config GENERIC_SCHED_CLOCK
+ +      bool
+ +
   #
   # For architectures that want to enable the support for NUMA-affine scheduler
   # balancing logic:
diff --combined kernel/time/tick-broadcast.c

index 4430fa6,20d6fba..6d3f916
--- 1/kernel/time/tick-broadcast.c
--- 2/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@@ -19,7 -19,6 +19,7 @@@
   #include <linux/profile.h>
   #include <linux/sched.h>
   #include <linux/smp.h>
+ +#include <linux/module.h>
   
   #include "tick-internal.h"
   
@@@ -30,7 -29,6 +30,7 @@@
   
   static struct tick_device tick_broadcast_device;
   static cpumask_var_t tick_broadcast_mask;
+ +static cpumask_var_t tick_broadcast_on;
   static cpumask_var_t tmpmask;
   static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
   static int tick_broadcast_force;
@@@ -66,34 -64,17 +66,34 @@@ static void tick_broadcast_start_period
   /*
    * Check, if the device can be utilized as broadcast device:
    */
- -int tick_check_broadcast_device(struct clock_event_device *dev)
+ +static bool tick_check_broadcast_device(struct clock_event_device *curdev,
+ +                                      struct clock_event_device *newdev)
+ +{
+ +      if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) ||
+ +          (newdev->features & CLOCK_EVT_FEAT_C3STOP))
+ +              return false;
+ +
+ +      if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT &&
+ +          !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
+ +              return false;
+ +
+ +      return !curdev || newdev->rating > curdev->rating;
+ +}
+ +
+ +/*
+ + * Conditionally install/replace broadcast device
+ + */
+ +void tick_install_broadcast_device(struct clock_event_device *dev)
   {
         struct clock_event_device *cur = tick_broadcast_device.evtdev;
   
- -      if ((dev->features & CLOCK_EVT_FEAT_DUMMY) ||
- -          (tick_broadcast_device.evtdev &&
- -           tick_broadcast_device.evtdev->rating >= dev->rating) ||
- -           (dev->features & CLOCK_EVT_FEAT_C3STOP))
- -              return 0;
+ +      if (!tick_check_broadcast_device(cur, dev))
+ +              return;
   
- -      clockevents_exchange_device(tick_broadcast_device.evtdev, dev);
+ +      if (!try_module_get(dev->owner))
+ +              return;
+ +
+ +      clockevents_exchange_device(cur, dev);
         if (cur)
                 cur->event_handler = clockevents_handle_noop;
         tick_broadcast_device.evtdev = dev;
@@@ -109,6 -90,7 +109,6 @@@
          */
         if (dev->features & CLOCK_EVT_FEAT_ONESHOT)
                 tick_clock_notify();
- -      return 1;
   }
   
   /*
@@@ -141,9 -123,8 +141,9 @@@ static void tick_device_setup_broadcast
    */
   int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
   {
+ +      struct clock_event_device *bc = tick_broadcast_device.evtdev;
         unsigned long flags;
- -      int ret = 0;
+ +      int ret;
   
         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
   
@@@ -157,59 -138,20 +157,59 @@@
                 dev->event_handler = tick_handle_periodic;
                 tick_device_setup_broadcast_func(dev);
                 cpumask_set_cpu(cpu, tick_broadcast_mask);
- -              tick_broadcast_start_periodic(tick_broadcast_device.evtdev);
+ +              tick_broadcast_start_periodic(bc);
                 ret = 1;
         } else {
                 /*
- -               * When the new device is not affected by the stop
- -               * feature and the cpu is marked in the broadcast mask
- -               * then clear the broadcast bit.
+ +               * Clear the broadcast bit for this cpu if the
+ +               * device is not power state affected.
                  */
- -              if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) {
- -                      int cpu = smp_processor_id();
+ +              if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
                         cpumask_clear_cpu(cpu, tick_broadcast_mask);
- -                      tick_broadcast_clear_oneshot(cpu);
- -              } else {
+ +              else
                         tick_device_setup_broadcast_func(dev);
+ +
+ +              /*
+ +               * Clear the broadcast bit if the CPU is not in
+ +               * periodic broadcast on state.
+ +               */
+ +              if (!cpumask_test_cpu(cpu, tick_broadcast_on))
+ +                      cpumask_clear_cpu(cpu, tick_broadcast_mask);
+ +
+ +              switch (tick_broadcast_device.mode) {
+ +              case TICKDEV_MODE_ONESHOT:
+ +                      /*
+ +                       * If the system is in oneshot mode we can
+ +                       * unconditionally clear the oneshot mask bit,
+ +                       * because the CPU is running and therefore
+ +                       * not in an idle state which causes the power
+ +                       * state affected device to stop. Let the
+ +                       * caller initialize the device.
+ +                       */
+ +                      tick_broadcast_clear_oneshot(cpu);
+ +                      ret = 0;
+ +                      break;
+ +
+ +              case TICKDEV_MODE_PERIODIC:
+ +                      /*
+ +                       * If the system is in periodic mode, check
+ +                       * whether the broadcast device can be
+ +                       * switched off now.
+ +                       */
+ +                      if (cpumask_empty(tick_broadcast_mask) && bc)
+ +                              clockevents_shutdown(bc);
+ +                      /*
+ +                       * If we kept the cpu in the broadcast mask,
+ +                       * tell the caller to leave the per cpu device
+ +                       * in shutdown state. The periodic interrupt
+ +                       * is delivered by the broadcast device.
+ +                       */
+ +                      ret = cpumask_test_cpu(cpu, tick_broadcast_mask);
+ +                      break;
+ +              default:
+ +                      /* Nothing to do */
+ +                      ret = 0;
+ +                      break;
                 }
         }
         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
@@@ -339,7 -281,6 +339,7 @@@ static void tick_do_broadcast_on_off(un
         switch (*reason) {
         case CLOCK_EVT_NOTIFY_BROADCAST_ON:
         case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
+ +              cpumask_set_cpu(cpu, tick_broadcast_on);
                 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
                         if (tick_broadcast_device.mode ==
                             TICKDEV_MODE_PERIODIC)
@@@ -349,12 -290,8 +349,12 @@@
                         tick_broadcast_force = 1;
                 break;
         case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
- -              if (!tick_broadcast_force &&
- -                  cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
+ +              if (tick_broadcast_force)
+ +                      break;
+ +              cpumask_clear_cpu(cpu, tick_broadcast_on);
+ +              if (!tick_device_is_functional(dev))
+ +                      break;
+ +              if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
                         if (tick_broadcast_device.mode ==
                             TICKDEV_MODE_PERIODIC)
                                 tick_setup_periodic(dev, 0);
@@@ -412,7 -349,6 +412,7 @@@ void tick_shutdown_broadcast(unsigned i
   
         bc = tick_broadcast_device.evtdev;
         cpumask_clear_cpu(cpu, tick_broadcast_mask);
+ +      cpumask_clear_cpu(cpu, tick_broadcast_on);
   
         if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
                 if (bc && cpumask_empty(tick_broadcast_mask))
@@@ -539,15 -475,7 +539,15 @@@ void tick_check_oneshot_broadcast(int c
         if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) {
                 struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
   
- -              clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT);
+ +              /*
+ +               * We might be in the middle of switching over from
+ +               * periodic to oneshot. If the CPU has not yet
+ +               * switched over, leave the device alone.
+ +               */
+ +              if (td->mode == TICKDEV_MODE_ONESHOT) {
+ +                      clockevents_set_mode(td->evtdev,
+ +                                           CLOCK_EVT_MODE_ONESHOT);
+ +              }
         }
   }
   
@@@ -583,18 -511,17 +583,24 @@@ again
                 }
         }
   
+       /*
+        * Remove the current cpu from the pending mask. The event is
+        * delivered immediately in tick_do_broadcast() !
+        */
+       cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask);
+ 
         /* Take care of enforced broadcast requests */
         cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask);
         cpumask_clear(tick_broadcast_force_mask);
   
         /*
+ +       * Sanity check. Catch the case where we try to broadcast to
+ +       * offline cpus.
+ +       */
+ +      if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask)))
+ +              cpumask_and(tmpmask, tmpmask, cpu_online_mask);
+ +
+ +      /*
          * Wakeup the cpus which have an expired event.
          */
         tick_do_broadcast(tmpmask);
@@@ -654,8 -581,8 +660,8 @@@ void tick_broadcast_oneshot_control(uns
   
         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
         if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
-               WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
                 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
+                       WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
                         clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
                         /*
                          * We only reprogram the broadcast timer if we
@@@ -672,8 -599,6 +678,6 @@@
         } else {
                 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
                         clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
-                       if (dev->next_event.tv64 == KTIME_MAX)
-                               goto out;
                         /*
                          * The cpu which was handling the broadcast
                          * timer marked this cpu in the broadcast
@@@ -688,6 -613,11 +692,11 @@@
                                 goto out;
   
                         /*
+                        * Bail out if there is no next event.
+                        */
+                       if (dev->next_event.tv64 == KTIME_MAX)
+                               goto out;
+                       /*
                          * If the pending bit is not set, then we are
                          * either the CPU handling the broadcast
                          * interrupt or we got woken by something else.
@@@ -771,10 -701,6 +780,6 @@@ void tick_broadcast_setup_oneshot(struc
   
                 bc->event_handler = tick_handle_oneshot_broadcast;
   
-               /* Take the do_timer update */
-               if (!tick_nohz_full_cpu(cpu))
-                       tick_do_timer_cpu = cpu;
- 
                 /*
                  * We must be careful here. There might be other CPUs
                  * waiting for periodic broadcast. We need to set the
@@@ -835,12 -761,10 +840,12 @@@ void tick_shutdown_broadcast_oneshot(un
         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
   
         /*
- -       * Clear the broadcast mask flag for the dead cpu, but do not
- -       * stop the broadcast device!
+ +       * Clear the broadcast masks for the dead cpu, but do not stop
+ +       * the broadcast device!
          */
         cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
+ +      cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
+ +      cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
   
         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
   }
@@@ -868,7 -792,6 +873,7 @@@ bool tick_broadcast_oneshot_available(v
   void __init tick_broadcast_init(void)
   {
         zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
+ +      zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT);
         zalloc_cpumask_var(&tmpmask, GFP_NOWAIT);
   #ifdef CONFIG_TICK_ONESHOT
         zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
diff --combined kernel/time/timekeeping.c

index 846d0a1,baeeb5c..48b9fff
--- 1/kernel/time/timekeeping.c
--- 2/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@@ -25,11 -25,6 +25,11 @@@
   
   #include "tick-internal.h"
   #include "ntp_internal.h"
+ +#include "timekeeping_internal.h"
+ +
+ +#define TK_CLEAR_NTP          (1 << 0)
+ +#define TK_MIRROR             (1 << 1)
+ +#define TK_CLOCK_WAS_SET      (1 << 2)
   
   static struct timekeeper timekeeper;
   static DEFINE_RAW_SPINLOCK(timekeeper_lock);
@@@ -205,9 -200,9 +205,9 @@@ static inline s64 timekeeping_get_ns_ra
   
   static RAW_NOTIFIER_HEAD(pvclock_gtod_chain);
   
- -static void update_pvclock_gtod(struct timekeeper *tk)
+ +static void update_pvclock_gtod(struct timekeeper *tk, bool was_set)
   {
- -      raw_notifier_call_chain(&pvclock_gtod_chain, 0, tk);
+ +      raw_notifier_call_chain(&pvclock_gtod_chain, was_set, tk);
   }
   
   /**
@@@ -221,7 -216,7 +221,7 @@@ int pvclock_gtod_register_notifier(stru
   
         raw_spin_lock_irqsave(&timekeeper_lock, flags);
         ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb);
- -      update_pvclock_gtod(tk);
+ +      update_pvclock_gtod(tk, true);
         raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
   
         return ret;
@@@ -246,16 -241,16 +246,16 @@@ int pvclock_gtod_unregister_notifier(st
   EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
   
   /* must hold timekeeper_lock */
- -static void timekeeping_update(struct timekeeper *tk, bool clearntp, bool mirror)
+ +static void timekeeping_update(struct timekeeper *tk, unsigned int action)
   {
- -      if (clearntp) {
+ +      if (action & TK_CLEAR_NTP) {
                 tk->ntp_error = 0;
                 ntp_clear();
         }
         update_vsyscall(tk);
- -      update_pvclock_gtod(tk);
+ +      update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET);
   
- -      if (mirror)
+ +      if (action & TK_MIRROR)
                 memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper));
   }
   
@@@ -513,7 -508,7 +513,7 @@@ int do_settimeofday(const struct timesp
   
         tk_set_xtime(tk, tv);
   
- -      timekeeping_update(tk, true, true);
+ +      timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
   
         write_seqcount_end(&timekeeper_seq);
         raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@@ -557,7 -552,7 +557,7 @@@ int timekeeping_inject_offset(struct ti
         tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts));
   
   error: /* even if we error out, we forwarded the time, so call update */
- -      timekeeping_update(tk, true, true);
+ +      timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
   
         write_seqcount_end(&timekeeper_seq);
         raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@@ -632,22 -627,13 +632,22 @@@ static int change_clocksource(void *dat
         write_seqcount_begin(&timekeeper_seq);
   
         timekeeping_forward_now(tk);
- -      if (!new->enable || new->enable(new) == 0) {
- -              old = tk->clock;
- -              tk_setup_internals(tk, new);
- -              if (old->disable)
- -                      old->disable(old);
+ +      /*
+ +       * If the cs is in module, get a module reference. Succeeds
+ +       * for built-in code (owner == NULL) as well.
+ +       */
+ +      if (try_module_get(new->owner)) {
+ +              if (!new->enable || new->enable(new) == 0) {
+ +                      old = tk->clock;
+ +                      tk_setup_internals(tk, new);
+ +                      if (old->disable)
+ +                              old->disable(old);
+ +                      module_put(old->owner);
+ +              } else {
+ +                      module_put(new->owner);
+ +              }
         }
- -      timekeeping_update(tk, true, true);
+ +      timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
   
         write_seqcount_end(&timekeeper_seq);
         raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@@ -662,15 -648,14 +662,15 @@@
    * This function is called from clocksource.c after a new, better clock
    * source has been registered. The caller holds the clocksource_mutex.
    */
- -void timekeeping_notify(struct clocksource *clock)
+ +int timekeeping_notify(struct clocksource *clock)
   {
         struct timekeeper *tk = &timekeeper;
   
         if (tk->clock == clock)
- -              return;
+ +              return 0;
         stop_machine(change_clocksource, clock, NULL);
         tick_clock_notify();
+ +      return tk->clock == clock ? 0 : -1;
   }
   
   /**
@@@ -856,7 -841,6 +856,7 @@@ static void __timekeeping_inject_sleept
         tk_xtime_add(tk, delta);
         tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *delta));
         tk_set_sleep_time(tk, timespec_add(tk->total_sleep_time, *delta));
+ +      tk_debug_account_sleep_time(delta);
   }
   
   /**
@@@ -888,7 -872,7 +888,7 @@@ void timekeeping_inject_sleeptime(struc
   
         __timekeeping_inject_sleeptime(tk, delta);
   
- -      timekeeping_update(tk, true, true);
+ +      timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
   
         write_seqcount_end(&timekeeper_seq);
         raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@@ -970,7 -954,7 +970,7 @@@ static void timekeeping_resume(void
         tk->cycle_last = clock->cycle_last = cycle_now;
         tk->ntp_error = 0;
         timekeeping_suspended = 0;
- -      timekeeping_update(tk, false, true);
+ +      timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
         write_seqcount_end(&timekeeper_seq);
         raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
   
@@@ -991,6 -975,14 +991,14 @@@ static int timekeeping_suspend(void
   
         read_persistent_clock(&timekeeping_suspend_time);
   
+       /*
+        * On some systems the persistent_clock can not be detected at
+        * timekeeping_init by its return value, so if we see a valid
+        * value returned, update the persistent_clock_exists flag.
+        */
+       if (timekeeping_suspend_time.tv_sec || timekeeping_suspend_time.tv_nsec)
+               persistent_clock_exist = true;
+ 
         raw_spin_lock_irqsave(&timekeeper_lock, flags);
         write_seqcount_begin(&timekeeper_seq);
         timekeeping_forward_now(tk);
@@@ -1244,10 -1236,9 +1252,10 @@@ out_adjust
    * It also calls into the NTP code to handle leapsecond processing.
    *
    */
- -static inline void accumulate_nsecs_to_secs(struct timekeeper *tk)
+ +static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
   {
         u64 nsecps = (u64)NSEC_PER_SEC << tk->shift;
+ +      unsigned int action = 0;
   
         while (tk->xtime_nsec >= nsecps) {
                 int leap;
@@@ -1270,10 -1261,8 +1278,10 @@@
                         __timekeeping_set_tai_offset(tk, tk->tai_offset - leap);
   
                         clock_was_set_delayed();
+ +                      action = TK_CLOCK_WAS_SET;
                 }
         }
+ +      return action;
   }
   
   /**
@@@ -1358,7 -1347,6 +1366,7 @@@ static void update_wall_time(void
         struct timekeeper *tk = &shadow_timekeeper;
         cycle_t offset;
         int shift = 0, maxshift;
+ +      unsigned int action;
         unsigned long flags;
   
         raw_spin_lock_irqsave(&timekeeper_lock, flags);
@@@ -1411,7 -1399,7 +1419,7 @@@
          * Finally, make sure that after the rounding
          * xtime_nsec isn't larger than NSEC_PER_SEC
          */
- -      accumulate_nsecs_to_secs(tk);
+ +      action = accumulate_nsecs_to_secs(tk);
   
         write_seqcount_begin(&timekeeper_seq);
         /* Update clock->cycle_last with the new value */
@@@ -1427,7 -1415,7 +1435,7 @@@
          * updating.
          */
         memcpy(real_tk, tk, sizeof(*tk));
- -      timekeeping_update(real_tk, false, false);
+ +      timekeeping_update(real_tk, action);
         write_seqcount_end(&timekeeper_seq);
   out:
         raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@@ -1689,7 -1677,6 +1697,7 @@@ int do_adjtimex(struct timex *txc
   
         if (tai != orig_tai) {
                 __timekeeping_set_tai_offset(tk, tai);
+ +              update_pvclock_gtod(tk, true);
                 clock_was_set_delayed();
         }
         write_seqcount_end(&timekeeper_seq);
author	Thomas Gleixner <tglx@linutronix.de>
	Thu, 4 Jul 2013 21:11:22 +0000 (23:11 +0200)
committer	Thomas Gleixner <tglx@linutronix.de>
	Thu, 4 Jul 2013 21:11:22 +0000 (23:11 +0200)
		1	2
arch/arm/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/kvmclock.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/platform/efi/efi.c	patch \|	diff1 \|	diff2 \|	blob \| history
init/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/time/tick-broadcast.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/time/timekeeping.c	patch \|	diff1 \|	diff2 \|	blob \| history