Merge tag 'virtio-next-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[platform/adaptation/renesas_rcar/renesas_kernel.git] / kernel / time / tick-broadcast.c
1 /*
2  * linux/kernel/time/tick-broadcast.c
3  *
4  * This file contains functions which emulate a local clock-event
5  * device via a broadcast event source.
6  *
7  * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
8  * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
9  * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
10  *
11  * This code is licenced under the GPL version 2. For details see
12  * kernel-base/COPYING.
13  */
14 #include <linux/cpu.h>
15 #include <linux/err.h>
16 #include <linux/hrtimer.h>
17 #include <linux/interrupt.h>
18 #include <linux/percpu.h>
19 #include <linux/profile.h>
20 #include <linux/sched.h>
21 #include <linux/smp.h>
22 #include <linux/module.h>
23
24 #include "tick-internal.h"
25
26 /*
27  * Broadcast support for broken x86 hardware, where the local apic
28  * timer stops in C3 state.
29  */
30
31 static struct tick_device tick_broadcast_device;
32 static cpumask_var_t tick_broadcast_mask;
33 static cpumask_var_t tick_broadcast_on;
34 static cpumask_var_t tmpmask;
35 static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
36 static int tick_broadcast_force;
37
38 #ifdef CONFIG_TICK_ONESHOT
39 static void tick_broadcast_clear_oneshot(int cpu);
40 #else
41 static inline void tick_broadcast_clear_oneshot(int cpu) { }
42 #endif
43
44 /*
45  * Debugging: see timer_list.c
46  */
47 struct tick_device *tick_get_broadcast_device(void)
48 {
49         return &tick_broadcast_device;
50 }
51
52 struct cpumask *tick_get_broadcast_mask(void)
53 {
54         return tick_broadcast_mask;
55 }
56
57 /*
58  * Start the device in periodic mode
59  */
60 static void tick_broadcast_start_periodic(struct clock_event_device *bc)
61 {
62         if (bc)
63                 tick_setup_periodic(bc, 1);
64 }
65
66 /*
67  * Check, if the device can be utilized as broadcast device:
68  */
69 static bool tick_check_broadcast_device(struct clock_event_device *curdev,
70                                         struct clock_event_device *newdev)
71 {
72         if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) ||
73             (newdev->features & CLOCK_EVT_FEAT_PERCPU) ||
74             (newdev->features & CLOCK_EVT_FEAT_C3STOP))
75                 return false;
76
77         if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT &&
78             !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
79                 return false;
80
81         return !curdev || newdev->rating > curdev->rating;
82 }
83
84 /*
85  * Conditionally install/replace broadcast device
86  */
87 void tick_install_broadcast_device(struct clock_event_device *dev)
88 {
89         struct clock_event_device *cur = tick_broadcast_device.evtdev;
90
91         if (!tick_check_broadcast_device(cur, dev))
92                 return;
93
94         if (!try_module_get(dev->owner))
95                 return;
96
97         clockevents_exchange_device(cur, dev);
98         if (cur)
99                 cur->event_handler = clockevents_handle_noop;
100         tick_broadcast_device.evtdev = dev;
101         if (!cpumask_empty(tick_broadcast_mask))
102                 tick_broadcast_start_periodic(dev);
103         /*
104          * Inform all cpus about this. We might be in a situation
105          * where we did not switch to oneshot mode because the per cpu
106          * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack
107          * of a oneshot capable broadcast device. Without that
108          * notification the systems stays stuck in periodic mode
109          * forever.
110          */
111         if (dev->features & CLOCK_EVT_FEAT_ONESHOT)
112                 tick_clock_notify();
113 }
114
115 /*
116  * Check, if the device is the broadcast device
117  */
118 int tick_is_broadcast_device(struct clock_event_device *dev)
119 {
120         return (dev && tick_broadcast_device.evtdev == dev);
121 }
122
123 static void err_broadcast(const struct cpumask *mask)
124 {
125         pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n");
126 }
127
128 static void tick_device_setup_broadcast_func(struct clock_event_device *dev)
129 {
130         if (!dev->broadcast)
131                 dev->broadcast = tick_broadcast;
132         if (!dev->broadcast) {
133                 pr_warn_once("%s depends on broadcast, but no broadcast function available\n",
134                              dev->name);
135                 dev->broadcast = err_broadcast;
136         }
137 }
138
139 /*
140  * Check, if the device is disfunctional and a place holder, which
141  * needs to be handled by the broadcast device.
142  */
143 int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
144 {
145         struct clock_event_device *bc = tick_broadcast_device.evtdev;
146         unsigned long flags;
147         int ret;
148
149         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
150
151         /*
152          * Devices might be registered with both periodic and oneshot
153          * mode disabled. This signals, that the device needs to be
154          * operated from the broadcast device and is a placeholder for
155          * the cpu local device.
156          */
157         if (!tick_device_is_functional(dev)) {
158                 dev->event_handler = tick_handle_periodic;
159                 tick_device_setup_broadcast_func(dev);
160                 cpumask_set_cpu(cpu, tick_broadcast_mask);
161                 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
162                         tick_broadcast_start_periodic(bc);
163                 else
164                         tick_broadcast_setup_oneshot(bc);
165                 ret = 1;
166         } else {
167                 /*
168                  * Clear the broadcast bit for this cpu if the
169                  * device is not power state affected.
170                  */
171                 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
172                         cpumask_clear_cpu(cpu, tick_broadcast_mask);
173                 else
174                         tick_device_setup_broadcast_func(dev);
175
176                 /*
177                  * Clear the broadcast bit if the CPU is not in
178                  * periodic broadcast on state.
179                  */
180                 if (!cpumask_test_cpu(cpu, tick_broadcast_on))
181                         cpumask_clear_cpu(cpu, tick_broadcast_mask);
182
183                 switch (tick_broadcast_device.mode) {
184                 case TICKDEV_MODE_ONESHOT:
185                         /*
186                          * If the system is in oneshot mode we can
187                          * unconditionally clear the oneshot mask bit,
188                          * because the CPU is running and therefore
189                          * not in an idle state which causes the power
190                          * state affected device to stop. Let the
191                          * caller initialize the device.
192                          */
193                         tick_broadcast_clear_oneshot(cpu);
194                         ret = 0;
195                         break;
196
197                 case TICKDEV_MODE_PERIODIC:
198                         /*
199                          * If the system is in periodic mode, check
200                          * whether the broadcast device can be
201                          * switched off now.
202                          */
203                         if (cpumask_empty(tick_broadcast_mask) && bc)
204                                 clockevents_shutdown(bc);
205                         /*
206                          * If we kept the cpu in the broadcast mask,
207                          * tell the caller to leave the per cpu device
208                          * in shutdown state. The periodic interrupt
209                          * is delivered by the broadcast device.
210                          */
211                         ret = cpumask_test_cpu(cpu, tick_broadcast_mask);
212                         break;
213                 default:
214                         /* Nothing to do */
215                         ret = 0;
216                         break;
217                 }
218         }
219         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
220         return ret;
221 }
222
223 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
224 int tick_receive_broadcast(void)
225 {
226         struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
227         struct clock_event_device *evt = td->evtdev;
228
229         if (!evt)
230                 return -ENODEV;
231
232         if (!evt->event_handler)
233                 return -EINVAL;
234
235         evt->event_handler(evt);
236         return 0;
237 }
238 #endif
239
240 /*
241  * Broadcast the event to the cpus, which are set in the mask (mangled).
242  */
243 static void tick_do_broadcast(struct cpumask *mask)
244 {
245         int cpu = smp_processor_id();
246         struct tick_device *td;
247
248         /*
249          * Check, if the current cpu is in the mask
250          */
251         if (cpumask_test_cpu(cpu, mask)) {
252                 cpumask_clear_cpu(cpu, mask);
253                 td = &per_cpu(tick_cpu_device, cpu);
254                 td->evtdev->event_handler(td->evtdev);
255         }
256
257         if (!cpumask_empty(mask)) {
258                 /*
259                  * It might be necessary to actually check whether the devices
260                  * have different broadcast functions. For now, just use the
261                  * one of the first device. This works as long as we have this
262                  * misfeature only on x86 (lapic)
263                  */
264                 td = &per_cpu(tick_cpu_device, cpumask_first(mask));
265                 td->evtdev->broadcast(mask);
266         }
267 }
268
269 /*
270  * Periodic broadcast:
271  * - invoke the broadcast handlers
272  */
273 static void tick_do_periodic_broadcast(void)
274 {
275         raw_spin_lock(&tick_broadcast_lock);
276
277         cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);
278         tick_do_broadcast(tmpmask);
279
280         raw_spin_unlock(&tick_broadcast_lock);
281 }
282
283 /*
284  * Event handler for periodic broadcast ticks
285  */
286 static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
287 {
288         ktime_t next;
289
290         tick_do_periodic_broadcast();
291
292         /*
293          * The device is in periodic mode. No reprogramming necessary:
294          */
295         if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
296                 return;
297
298         /*
299          * Setup the next period for devices, which do not have
300          * periodic mode. We read dev->next_event first and add to it
301          * when the event already expired. clockevents_program_event()
302          * sets dev->next_event only when the event is really
303          * programmed to the device.
304          */
305         for (next = dev->next_event; ;) {
306                 next = ktime_add(next, tick_period);
307
308                 if (!clockevents_program_event(dev, next, false))
309                         return;
310                 tick_do_periodic_broadcast();
311         }
312 }
313
314 /*
315  * Powerstate information: The system enters/leaves a state, where
316  * affected devices might stop
317  */
318 static void tick_do_broadcast_on_off(unsigned long *reason)
319 {
320         struct clock_event_device *bc, *dev;
321         struct tick_device *td;
322         unsigned long flags;
323         int cpu, bc_stopped;
324
325         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
326
327         cpu = smp_processor_id();
328         td = &per_cpu(tick_cpu_device, cpu);
329         dev = td->evtdev;
330         bc = tick_broadcast_device.evtdev;
331
332         /*
333          * Is the device not affected by the powerstate ?
334          */
335         if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
336                 goto out;
337
338         if (!tick_device_is_functional(dev))
339                 goto out;
340
341         bc_stopped = cpumask_empty(tick_broadcast_mask);
342
343         switch (*reason) {
344         case CLOCK_EVT_NOTIFY_BROADCAST_ON:
345         case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
346                 cpumask_set_cpu(cpu, tick_broadcast_on);
347                 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
348                         if (tick_broadcast_device.mode ==
349                             TICKDEV_MODE_PERIODIC)
350                                 clockevents_shutdown(dev);
351                 }
352                 if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
353                         tick_broadcast_force = 1;
354                 break;
355         case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
356                 if (tick_broadcast_force)
357                         break;
358                 cpumask_clear_cpu(cpu, tick_broadcast_on);
359                 if (!tick_device_is_functional(dev))
360                         break;
361                 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
362                         if (tick_broadcast_device.mode ==
363                             TICKDEV_MODE_PERIODIC)
364                                 tick_setup_periodic(dev, 0);
365                 }
366                 break;
367         }
368
369         if (cpumask_empty(tick_broadcast_mask)) {
370                 if (!bc_stopped)
371                         clockevents_shutdown(bc);
372         } else if (bc_stopped) {
373                 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
374                         tick_broadcast_start_periodic(bc);
375                 else
376                         tick_broadcast_setup_oneshot(bc);
377         }
378 out:
379         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
380 }
381
382 /*
383  * Powerstate information: The system enters/leaves a state, where
384  * affected devices might stop.
385  */
386 void tick_broadcast_on_off(unsigned long reason, int *oncpu)
387 {
388         if (!cpumask_test_cpu(*oncpu, cpu_online_mask))
389                 printk(KERN_ERR "tick-broadcast: ignoring broadcast for "
390                        "offline CPU #%d\n", *oncpu);
391         else
392                 tick_do_broadcast_on_off(&reason);
393 }
394
395 /*
396  * Set the periodic handler depending on broadcast on/off
397  */
398 void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
399 {
400         if (!broadcast)
401                 dev->event_handler = tick_handle_periodic;
402         else
403                 dev->event_handler = tick_handle_periodic_broadcast;
404 }
405
406 /*
407  * Remove a CPU from broadcasting
408  */
409 void tick_shutdown_broadcast(unsigned int *cpup)
410 {
411         struct clock_event_device *bc;
412         unsigned long flags;
413         unsigned int cpu = *cpup;
414
415         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
416
417         bc = tick_broadcast_device.evtdev;
418         cpumask_clear_cpu(cpu, tick_broadcast_mask);
419         cpumask_clear_cpu(cpu, tick_broadcast_on);
420
421         if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
422                 if (bc && cpumask_empty(tick_broadcast_mask))
423                         clockevents_shutdown(bc);
424         }
425
426         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
427 }
428
429 void tick_suspend_broadcast(void)
430 {
431         struct clock_event_device *bc;
432         unsigned long flags;
433
434         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
435
436         bc = tick_broadcast_device.evtdev;
437         if (bc)
438                 clockevents_shutdown(bc);
439
440         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
441 }
442
443 int tick_resume_broadcast(void)
444 {
445         struct clock_event_device *bc;
446         unsigned long flags;
447         int broadcast = 0;
448
449         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
450
451         bc = tick_broadcast_device.evtdev;
452
453         if (bc) {
454                 clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME);
455
456                 switch (tick_broadcast_device.mode) {
457                 case TICKDEV_MODE_PERIODIC:
458                         if (!cpumask_empty(tick_broadcast_mask))
459                                 tick_broadcast_start_periodic(bc);
460                         broadcast = cpumask_test_cpu(smp_processor_id(),
461                                                      tick_broadcast_mask);
462                         break;
463                 case TICKDEV_MODE_ONESHOT:
464                         if (!cpumask_empty(tick_broadcast_mask))
465                                 broadcast = tick_resume_broadcast_oneshot(bc);
466                         break;
467                 }
468         }
469         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
470
471         return broadcast;
472 }
473
474
475 #ifdef CONFIG_TICK_ONESHOT
476
477 static cpumask_var_t tick_broadcast_oneshot_mask;
478 static cpumask_var_t tick_broadcast_pending_mask;
479 static cpumask_var_t tick_broadcast_force_mask;
480
481 /*
482  * Exposed for debugging: see timer_list.c
483  */
484 struct cpumask *tick_get_broadcast_oneshot_mask(void)
485 {
486         return tick_broadcast_oneshot_mask;
487 }
488
489 /*
490  * Called before going idle with interrupts disabled. Checks whether a
491  * broadcast event from the other core is about to happen. We detected
492  * that in tick_broadcast_oneshot_control(). The callsite can use this
493  * to avoid a deep idle transition as we are about to get the
494  * broadcast IPI right away.
495  */
496 int tick_check_broadcast_expired(void)
497 {
498         return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
499 }
500
501 /*
502  * Set broadcast interrupt affinity
503  */
504 static void tick_broadcast_set_affinity(struct clock_event_device *bc,
505                                         const struct cpumask *cpumask)
506 {
507         if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ))
508                 return;
509
510         if (cpumask_equal(bc->cpumask, cpumask))
511                 return;
512
513         bc->cpumask = cpumask;
514         irq_set_affinity(bc->irq, bc->cpumask);
515 }
516
517 static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
518                                     ktime_t expires, int force)
519 {
520         int ret;
521
522         if (bc->mode != CLOCK_EVT_MODE_ONESHOT)
523                 clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
524
525         ret = clockevents_program_event(bc, expires, force);
526         if (!ret)
527                 tick_broadcast_set_affinity(bc, cpumask_of(cpu));
528         return ret;
529 }
530
531 int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
532 {
533         clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
534         return 0;
535 }
536
537 /*
538  * Called from irq_enter() when idle was interrupted to reenable the
539  * per cpu device.
540  */
541 void tick_check_oneshot_broadcast(int cpu)
542 {
543         if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) {
544                 struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
545
546                 /*
547                  * We might be in the middle of switching over from
548                  * periodic to oneshot. If the CPU has not yet
549                  * switched over, leave the device alone.
550                  */
551                 if (td->mode == TICKDEV_MODE_ONESHOT) {
552                         clockevents_set_mode(td->evtdev,
553                                              CLOCK_EVT_MODE_ONESHOT);
554                 }
555         }
556 }
557
558 /*
559  * Handle oneshot mode broadcasting
560  */
561 static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
562 {
563         struct tick_device *td;
564         ktime_t now, next_event;
565         int cpu, next_cpu = 0;
566
567         raw_spin_lock(&tick_broadcast_lock);
568 again:
569         dev->next_event.tv64 = KTIME_MAX;
570         next_event.tv64 = KTIME_MAX;
571         cpumask_clear(tmpmask);
572         now = ktime_get();
573         /* Find all expired events */
574         for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
575                 td = &per_cpu(tick_cpu_device, cpu);
576                 if (td->evtdev->next_event.tv64 <= now.tv64) {
577                         cpumask_set_cpu(cpu, tmpmask);
578                         /*
579                          * Mark the remote cpu in the pending mask, so
580                          * it can avoid reprogramming the cpu local
581                          * timer in tick_broadcast_oneshot_control().
582                          */
583                         cpumask_set_cpu(cpu, tick_broadcast_pending_mask);
584                 } else if (td->evtdev->next_event.tv64 < next_event.tv64) {
585                         next_event.tv64 = td->evtdev->next_event.tv64;
586                         next_cpu = cpu;
587                 }
588         }
589
590         /*
591          * Remove the current cpu from the pending mask. The event is
592          * delivered immediately in tick_do_broadcast() !
593          */
594         cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask);
595
596         /* Take care of enforced broadcast requests */
597         cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask);
598         cpumask_clear(tick_broadcast_force_mask);
599
600         /*
601          * Sanity check. Catch the case where we try to broadcast to
602          * offline cpus.
603          */
604         if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask)))
605                 cpumask_and(tmpmask, tmpmask, cpu_online_mask);
606
607         /*
608          * Wakeup the cpus which have an expired event.
609          */
610         tick_do_broadcast(tmpmask);
611
612         /*
613          * Two reasons for reprogram:
614          *
615          * - The global event did not expire any CPU local
616          * events. This happens in dyntick mode, as the maximum PIT
617          * delta is quite small.
618          *
619          * - There are pending events on sleeping CPUs which were not
620          * in the event mask
621          */
622         if (next_event.tv64 != KTIME_MAX) {
623                 /*
624                  * Rearm the broadcast device. If event expired,
625                  * repeat the above
626                  */
627                 if (tick_broadcast_set_event(dev, next_cpu, next_event, 0))
628                         goto again;
629         }
630         raw_spin_unlock(&tick_broadcast_lock);
631 }
632
633 /*
634  * Powerstate information: The system enters/leaves a state, where
635  * affected devices might stop
636  */
637 void tick_broadcast_oneshot_control(unsigned long reason)
638 {
639         struct clock_event_device *bc, *dev;
640         struct tick_device *td;
641         unsigned long flags;
642         ktime_t now;
643         int cpu;
644
645         /*
646          * Periodic mode does not care about the enter/exit of power
647          * states
648          */
649         if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
650                 return;
651
652         /*
653          * We are called with preemtion disabled from the depth of the
654          * idle code, so we can't be moved away.
655          */
656         cpu = smp_processor_id();
657         td = &per_cpu(tick_cpu_device, cpu);
658         dev = td->evtdev;
659
660         if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
661                 return;
662
663         bc = tick_broadcast_device.evtdev;
664
665         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
666         if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
667                 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
668                         WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
669                         clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
670                         /*
671                          * We only reprogram the broadcast timer if we
672                          * did not mark ourself in the force mask and
673                          * if the cpu local event is earlier than the
674                          * broadcast event. If the current CPU is in
675                          * the force mask, then we are going to be
676                          * woken by the IPI right away.
677                          */
678                         if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) &&
679                             dev->next_event.tv64 < bc->next_event.tv64)
680                                 tick_broadcast_set_event(bc, cpu, dev->next_event, 1);
681                 }
682         } else {
683                 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
684                         clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
685                         /*
686                          * The cpu which was handling the broadcast
687                          * timer marked this cpu in the broadcast
688                          * pending mask and fired the broadcast
689                          * IPI. So we are going to handle the expired
690                          * event anyway via the broadcast IPI
691                          * handler. No need to reprogram the timer
692                          * with an already expired event.
693                          */
694                         if (cpumask_test_and_clear_cpu(cpu,
695                                        tick_broadcast_pending_mask))
696                                 goto out;
697
698                         /*
699                          * Bail out if there is no next event.
700                          */
701                         if (dev->next_event.tv64 == KTIME_MAX)
702                                 goto out;
703                         /*
704                          * If the pending bit is not set, then we are
705                          * either the CPU handling the broadcast
706                          * interrupt or we got woken by something else.
707                          *
708                          * We are not longer in the broadcast mask, so
709                          * if the cpu local expiry time is already
710                          * reached, we would reprogram the cpu local
711                          * timer with an already expired event.
712                          *
713                          * This can lead to a ping-pong when we return
714                          * to idle and therefor rearm the broadcast
715                          * timer before the cpu local timer was able
716                          * to fire. This happens because the forced
717                          * reprogramming makes sure that the event
718                          * will happen in the future and depending on
719                          * the min_delta setting this might be far
720                          * enough out that the ping-pong starts.
721                          *
722                          * If the cpu local next_event has expired
723                          * then we know that the broadcast timer
724                          * next_event has expired as well and
725                          * broadcast is about to be handled. So we
726                          * avoid reprogramming and enforce that the
727                          * broadcast handler, which did not run yet,
728                          * will invoke the cpu local handler.
729                          *
730                          * We cannot call the handler directly from
731                          * here, because we might be in a NOHZ phase
732                          * and we did not go through the irq_enter()
733                          * nohz fixups.
734                          */
735                         now = ktime_get();
736                         if (dev->next_event.tv64 <= now.tv64) {
737                                 cpumask_set_cpu(cpu, tick_broadcast_force_mask);
738                                 goto out;
739                         }
740                         /*
741                          * We got woken by something else. Reprogram
742                          * the cpu local timer device.
743                          */
744                         tick_program_event(dev->next_event, 1);
745                 }
746         }
747 out:
748         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
749 }
750
751 /*
752  * Reset the one shot broadcast for a cpu
753  *
754  * Called with tick_broadcast_lock held
755  */
756 static void tick_broadcast_clear_oneshot(int cpu)
757 {
758         cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
759 }
760
761 static void tick_broadcast_init_next_event(struct cpumask *mask,
762                                            ktime_t expires)
763 {
764         struct tick_device *td;
765         int cpu;
766
767         for_each_cpu(cpu, mask) {
768                 td = &per_cpu(tick_cpu_device, cpu);
769                 if (td->evtdev)
770                         td->evtdev->next_event = expires;
771         }
772 }
773
774 /**
775  * tick_broadcast_setup_oneshot - setup the broadcast device
776  */
777 void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
778 {
779         int cpu = smp_processor_id();
780
781         /* Set it up only once ! */
782         if (bc->event_handler != tick_handle_oneshot_broadcast) {
783                 int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
784
785                 bc->event_handler = tick_handle_oneshot_broadcast;
786
787                 /*
788                  * We must be careful here. There might be other CPUs
789                  * waiting for periodic broadcast. We need to set the
790                  * oneshot_mask bits for those and program the
791                  * broadcast device to fire.
792                  */
793                 cpumask_copy(tmpmask, tick_broadcast_mask);
794                 cpumask_clear_cpu(cpu, tmpmask);
795                 cpumask_or(tick_broadcast_oneshot_mask,
796                            tick_broadcast_oneshot_mask, tmpmask);
797
798                 if (was_periodic && !cpumask_empty(tmpmask)) {
799                         clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
800                         tick_broadcast_init_next_event(tmpmask,
801                                                        tick_next_period);
802                         tick_broadcast_set_event(bc, cpu, tick_next_period, 1);
803                 } else
804                         bc->next_event.tv64 = KTIME_MAX;
805         } else {
806                 /*
807                  * The first cpu which switches to oneshot mode sets
808                  * the bit for all other cpus which are in the general
809                  * (periodic) broadcast mask. So the bit is set and
810                  * would prevent the first broadcast enter after this
811                  * to program the bc device.
812                  */
813                 tick_broadcast_clear_oneshot(cpu);
814         }
815 }
816
817 /*
818  * Select oneshot operating mode for the broadcast device
819  */
820 void tick_broadcast_switch_to_oneshot(void)
821 {
822         struct clock_event_device *bc;
823         unsigned long flags;
824
825         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
826
827         tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
828         bc = tick_broadcast_device.evtdev;
829         if (bc)
830                 tick_broadcast_setup_oneshot(bc);
831
832         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
833 }
834
835
836 /*
837  * Remove a dead CPU from broadcasting
838  */
839 void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
840 {
841         unsigned long flags;
842         unsigned int cpu = *cpup;
843
844         raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
845
846         /*
847          * Clear the broadcast masks for the dead cpu, but do not stop
848          * the broadcast device!
849          */
850         cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
851         cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
852         cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
853
854         raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
855 }
856
857 /*
858  * Check, whether the broadcast device is in one shot mode
859  */
860 int tick_broadcast_oneshot_active(void)
861 {
862         return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT;
863 }
864
865 /*
866  * Check whether the broadcast device supports oneshot.
867  */
868 bool tick_broadcast_oneshot_available(void)
869 {
870         struct clock_event_device *bc = tick_broadcast_device.evtdev;
871
872         return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false;
873 }
874
875 #endif
876
877 void __init tick_broadcast_init(void)
878 {
879         zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
880         zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT);
881         zalloc_cpumask_var(&tmpmask, GFP_NOWAIT);
882 #ifdef CONFIG_TICK_ONESHOT
883         zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
884         zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT);
885         zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT);
886 #endif
887 }