timer: Replace timer base by a cpu index
authorThomas Gleixner <tglx@linutronix.de>
Tue, 26 May 2015 22:50:29 +0000 (22:50 +0000)
committerThomas Gleixner <tglx@linutronix.de>
Fri, 19 Jun 2015 13:18:27 +0000 (15:18 +0200)
Instead of storing a pointer to the per cpu tvec_base we can simply
cache a CPU index in the timer_list and use that to get hold of the
correct per cpu tvec_base. This is only used in lock_timer_base() and
the slightly larger code is peanuts versus the spinlock operation and
the d-cache foot print of the timer wheel.

Aside of that this allows to get rid of following nuisances:

 - boot_tvec_base

   That statically allocated 4k bss data is just kept around so the
   timer has a home when it gets statically initialized. It serves no
   other purpose.

   With the CPU index we assign the timer to CPU0 at static
   initialization time and therefor can avoid the whole boot_tvec_base
   dance.  That also simplifies the init code, which just can use the
   per cpu base.

   Before:
     text    data     bss     dec     hex filename
    17491    9201    4160   30852    7884 ../build/kernel/time/timer.o
   After:
     text    data     bss     dec     hex filename
    17440    9193       0   26633    6809 ../build/kernel/time/timer.o

 - Overloading the base pointer with various flags

   The CPU index has enough space to hold the flags (deferrable,
   irqsafe) so we can get rid of the extra masking and bit fiddling
   with the base pointer.

As a benefit we reduce the size of struct timer_list on 64 bit
machines. 4 - 8 bytes, a size reduction up to 15% per struct timer_list,
which is a real win as we have tons of them embedded in other structs.

This changes also the newly added deferrable printout of the timer
start trace point to capture and print all timer->flags, which allows
us to decode the target cpu of the timer as well.

We might have used bitfields for this, but that would change the
static initializers and the init function for no value to accomodate
big endian bitfields.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Joonwoo Park <joonwoop@codeaurora.org>
Cc: Wenbo Wang <wenbo.wang@memblaze.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Badhri Jagan Sridharan <Badhri@google.com>
Link: http://lkml.kernel.org/r/20150526224511.950084301@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
include/linux/timer.h
include/trace/events/timer.h
kernel/time/timer.c

index 064ee24d3f382d504573ffc0de233311136077f1..4a0d52bc2073655cdb5daca05cc7b4a453a3b1d7 100644 (file)
@@ -14,27 +14,23 @@ struct timer_list {
         * All fields that change during normal runtime grouped to the
         * same cacheline
         */
-       struct hlist_node entry;
-       unsigned long expires;
-       struct tvec_base *base;
-
-       void (*function)(unsigned long);
-       unsigned long data;
-
-       int slack;
+       struct hlist_node       entry;
+       unsigned long           expires;
+       void                    (*function)(unsigned long);
+       unsigned long           data;
+       u32                     flags;
+       int                     slack;
 
 #ifdef CONFIG_TIMER_STATS
-       int start_pid;
-       void *start_site;
-       char start_comm[16];
+       int                     start_pid;
+       void                    *start_site;
+       char                    start_comm[16];
 #endif
 #ifdef CONFIG_LOCKDEP
-       struct lockdep_map lockdep_map;
+       struct lockdep_map      lockdep_map;
 #endif
 };
 
-extern struct tvec_base boot_tvec_bases;
-
 #ifdef CONFIG_LOCKDEP
 /*
  * NB: because we have to copy the lockdep_map, setting the lockdep_map key
@@ -49,9 +45,6 @@ extern struct tvec_base boot_tvec_bases;
 #endif
 
 /*
- * Note that all tvec_bases are at least 4 byte aligned and lower two bits
- * of base in timer_list is guaranteed to be zero. Use them for flags.
- *
  * A deferrable timer will work normally when the system is busy, but
  * will not cause a CPU to come out of idle just to service it; instead,
  * the timer will be serviced when the CPU eventually wakes up with a
@@ -65,17 +58,18 @@ extern struct tvec_base boot_tvec_bases;
  * workqueue locking issues. It's not meant for executing random crap
  * with interrupts disabled. Abuse is monitored!
  */
-#define TIMER_DEFERRABLE               0x1LU
-#define TIMER_IRQSAFE                  0x2LU
-
-#define TIMER_FLAG_MASK                        0x3LU
+#define TIMER_CPUMASK          0x0007FFFF
+#define TIMER_MIGRATING                0x00080000
+#define TIMER_BASEMASK         (TIMER_CPUMASK | TIMER_MIGRATING)
+#define TIMER_DEFERRABLE       0x00100000
+#define TIMER_IRQSAFE          0x00200000
 
 #define __TIMER_INITIALIZER(_function, _expires, _data, _flags) { \
                .entry = { .next = TIMER_ENTRY_STATIC },        \
                .function = (_function),                        \
                .expires = (_expires),                          \
                .data = (_data),                                \
-               .base = (void *)((unsigned long)&boot_tvec_bases + (_flags)), \
+               .flags = (_flags),                              \
                .slack = -1,                                    \
                __TIMER_LOCKDEP_MAP_INITIALIZER(                \
                        __FILE__ ":" __stringify(__LINE__))     \
index d7abef1fe6e010d5250d0994b1c65edd70b8b3ec..073b9ac245ba0315f31a51f5df9f21bcdf2e9115 100644 (file)
@@ -45,16 +45,16 @@ TRACE_EVENT(timer_start,
 
        TP_PROTO(struct timer_list *timer,
                unsigned long expires,
-               unsigned int deferrable),
+               unsigned int flags),
 
-       TP_ARGS(timer, expires, deferrable),
+       TP_ARGS(timer, expires, flags),
 
        TP_STRUCT__entry(
                __field( void *,        timer           )
                __field( void *,        function        )
                __field( unsigned long, expires         )
                __field( unsigned long, now             )
-               __field( unsigned int,  deferrable      )
+               __field( unsigned int,  flags           )
        ),
 
        TP_fast_assign(
@@ -62,13 +62,12 @@ TRACE_EVENT(timer_start,
                __entry->function       = timer->function;
                __entry->expires        = expires;
                __entry->now            = jiffies;
-               __entry->deferrable     = deferrable;
+               __entry->flags          = flags;
        ),
 
-       TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld] defer=%c",
+       TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld] flags=0x%08x",
                  __entry->timer, __entry->function, __entry->expires,
-                 (long)__entry->expires - __entry->now,
-                 __entry->deferrable > 0 ? 'y':'n')
+                 (long)__entry->expires - __entry->now, __entry->flags)
 );
 
 /**
index 3a5e0c840884b8928586a22ea32b4d933f8a758e..1540af9f62eb3c3323212f59a85fd1cef552f09b 100644 (file)
@@ -92,43 +92,8 @@ struct tvec_base {
        struct tvec tv5;
 } ____cacheline_aligned;
 
-/*
- * __TIMER_INITIALIZER() needs to set ->base to a valid pointer (because we've
- * made NULL special, hint: lock_timer_base()) and we cannot get a compile time
- * pointer to per-cpu entries because we don't know where we'll map the section,
- * even for the boot cpu.
- *
- * And so we use boot_tvec_bases for boot CPU and per-cpu __tvec_bases for the
- * rest of them.
- */
-struct tvec_base boot_tvec_bases;
-EXPORT_SYMBOL(boot_tvec_bases);
-
-static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
-
-/* Functions below help us manage 'deferrable' flag */
-static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
-{
-       return ((unsigned int)(unsigned long)base & TIMER_DEFERRABLE);
-}
-
-static inline unsigned int tbase_get_irqsafe(struct tvec_base *base)
-{
-       return ((unsigned int)(unsigned long)base & TIMER_IRQSAFE);
-}
-
-static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
-{
-       return ((struct tvec_base *)((unsigned long)base & ~TIMER_FLAG_MASK));
-}
-
-static inline void
-timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
-{
-       unsigned long flags = (unsigned long)timer->base & TIMER_FLAG_MASK;
 
-       timer->base = (struct tvec_base *)((unsigned long)(new_base) | flags);
-}
+static DEFINE_PER_CPU(struct tvec_base, tvec_bases);
 
 static unsigned long round_jiffies_common(unsigned long j, int cpu,
                bool force_up)
@@ -403,7 +368,7 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
        /*
         * Update base->active_timers and base->next_timer
         */
-       if (!tbase_get_deferrable(timer->base)) {
+       if (!(timer->flags & TIMER_DEFERRABLE)) {
                if (!base->active_timers++ ||
                    time_before(timer->expires, base->next_timer))
                        base->next_timer = timer->expires;
@@ -422,7 +387,7 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
         * require special care against races with idle_cpu(), lets deal
         * with that later.
         */
-       if (!tbase_get_deferrable(timer->base) || tick_nohz_full_cpu(base->cpu))
+       if (!(timer->flags & TIMER_DEFERRABLE) || tick_nohz_full_cpu(base->cpu))
                wake_up_nohz_cpu(base->cpu);
 }
 
@@ -443,7 +408,7 @@ static void timer_stats_account_timer(struct timer_list *timer)
 
        if (likely(!timer->start_site))
                return;
-       if (unlikely(tbase_get_deferrable(timer->base)))
+       if (unlikely(timer->flags & TIMER_DEFERRABLE))
                flag |= TIMER_STATS_FLAG_DEFERRABLE;
 
        timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
@@ -636,7 +601,7 @@ static inline void
 debug_activate(struct timer_list *timer, unsigned long expires)
 {
        debug_timer_activate(timer);
-       trace_timer_start(timer, expires, tbase_get_deferrable(timer->base));
+       trace_timer_start(timer, expires, timer->flags);
 }
 
 static inline void debug_deactivate(struct timer_list *timer)
@@ -653,10 +618,8 @@ static inline void debug_assert_init(struct timer_list *timer)
 static void do_init_timer(struct timer_list *timer, unsigned int flags,
                          const char *name, struct lock_class_key *key)
 {
-       struct tvec_base *base = raw_cpu_read(tvec_bases);
-
        timer->entry.pprev = NULL;
-       timer->base = (void *)((unsigned long)base | flags);
+       timer->flags = flags | raw_smp_processor_id();
        timer->slack = -1;
 #ifdef CONFIG_TIMER_STATS
        timer->start_site = NULL;
@@ -701,7 +664,7 @@ static inline void
 detach_expired_timer(struct timer_list *timer, struct tvec_base *base)
 {
        detach_timer(timer, true);
-       if (!tbase_get_deferrable(timer->base))
+       if (!(timer->flags & TIMER_DEFERRABLE))
                base->active_timers--;
        base->all_timers--;
 }
@@ -713,7 +676,7 @@ static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
                return 0;
 
        detach_timer(timer, clear_pending);
-       if (!tbase_get_deferrable(timer->base)) {
+       if (!(timer->flags & TIMER_DEFERRABLE)) {
                base->active_timers--;
                if (timer->expires == base->next_timer)
                        base->next_timer = base->timer_jiffies;
@@ -732,24 +695,22 @@ static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
  * So __run_timers/migrate_timers can safely modify all timers which could
  * be found on ->tvX lists.
  *
- * When the timer's base is locked, and the timer removed from list, it is
- * possible to set timer->base = NULL and drop the lock: the timer remains
- * locked.
+ * When the timer's base is locked and removed from the list, the
+ * TIMER_MIGRATING flag is set, FIXME
  */
 static struct tvec_base *lock_timer_base(struct timer_list *timer,
                                        unsigned long *flags)
        __acquires(timer->base->lock)
 {
-       struct tvec_base *base;
-
        for (;;) {
-               struct tvec_base *prelock_base = timer->base;
-               base = tbase_get_base(prelock_base);
-               if (likely(base != NULL)) {
+               u32 tf = timer->flags;
+               struct tvec_base *base;
+
+               if (!(tf & TIMER_MIGRATING)) {
+                       base = per_cpu_ptr(&tvec_bases, tf & TIMER_CPUMASK);
                        spin_lock_irqsave(&base->lock, *flags);
-                       if (likely(prelock_base == timer->base))
+                       if (timer->flags == tf)
                                return base;
-                       /* The timer has migrated to another CPU */
                        spin_unlock_irqrestore(&base->lock, *flags);
                }
                cpu_relax();
@@ -776,7 +737,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
        debug_activate(timer, expires);
 
        cpu = get_nohz_timer_target(pinned);
-       new_base = per_cpu(tvec_bases, cpu);
+       new_base = per_cpu_ptr(&tvec_bases, cpu);
 
        if (base != new_base) {
                /*
@@ -788,11 +749,12 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
                 */
                if (likely(base->running_timer != timer)) {
                        /* See the comment in lock_timer_base() */
-                       timer_set_base(timer, NULL);
+                       timer->flags |= TIMER_MIGRATING;
+
                        spin_unlock(&base->lock);
                        base = new_base;
                        spin_lock(&base->lock);
-                       timer_set_base(timer, base);
+                       timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu;
                }
        }
 
@@ -954,13 +916,13 @@ EXPORT_SYMBOL(add_timer);
  */
 void add_timer_on(struct timer_list *timer, int cpu)
 {
-       struct tvec_base *base = per_cpu(tvec_bases, cpu);
+       struct tvec_base *base = per_cpu_ptr(&tvec_bases, cpu);
        unsigned long flags;
 
        timer_stats_timer_set_start_info(timer);
        BUG_ON(timer_pending(timer) || !timer->function);
        spin_lock_irqsave(&base->lock, flags);
-       timer_set_base(timer, base);
+       timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu;
        debug_activate(timer, timer->expires);
        internal_add_timer(base, timer);
        spin_unlock_irqrestore(&base->lock, flags);
@@ -1025,8 +987,6 @@ int try_to_del_timer_sync(struct timer_list *timer)
 EXPORT_SYMBOL(try_to_del_timer_sync);
 
 #ifdef CONFIG_SMP
-static DEFINE_PER_CPU(struct tvec_base, __tvec_bases);
-
 /**
  * del_timer_sync - deactivate a timer and wait for the handler to finish.
  * @timer: the timer to be deactivated
@@ -1081,7 +1041,7 @@ int del_timer_sync(struct timer_list *timer)
         * don't use it in hardirq context, because it
         * could lead to deadlock.
         */
-       WARN_ON(in_irq() && !tbase_get_irqsafe(timer->base));
+       WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE));
        for (;;) {
                int ret = try_to_del_timer_sync(timer);
                if (ret >= 0)
@@ -1106,7 +1066,6 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index)
         * don't have to detach them individually.
         */
        hlist_for_each_entry_safe(timer, tmp, &tv_list, entry) {
-               BUG_ON(tbase_get_base(timer->base) != base);
                /* No accounting, while moving them */
                __internal_add_timer(base, timer);
        }
@@ -1202,7 +1161,7 @@ static inline void __run_timers(struct tvec_base *base)
                        timer = hlist_entry(head->first, struct timer_list, entry);
                        fn = timer->function;
                        data = timer->data;
-                       irqsafe = tbase_get_irqsafe(timer->base);
+                       irqsafe = timer->flags & TIMER_IRQSAFE;
 
                        timer_stats_account_timer(timer);
 
@@ -1242,7 +1201,7 @@ static unsigned long __next_timer_interrupt(struct tvec_base *base)
        index = slot = timer_jiffies & TVR_MASK;
        do {
                hlist_for_each_entry(nte, base->tv1.vec + slot, entry) {
-                       if (tbase_get_deferrable(nte->base))
+                       if (nte->flags & TIMER_DEFERRABLE)
                                continue;
 
                        found = 1;
@@ -1273,7 +1232,7 @@ cascade:
                index = slot = timer_jiffies & TVN_MASK;
                do {
                        hlist_for_each_entry(nte, varp->vec + slot, entry) {
-                               if (tbase_get_deferrable(nte->base))
+                               if (nte->flags & TIMER_DEFERRABLE)
                                        continue;
 
                                found = 1;
@@ -1343,7 +1302,7 @@ static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
  */
 u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
 {
-       struct tvec_base *base = __this_cpu_read(tvec_bases);
+       struct tvec_base *base = this_cpu_ptr(&tvec_bases);
        u64 expires = KTIME_MAX;
        unsigned long nextevt;
 
@@ -1395,7 +1354,7 @@ void update_process_times(int user_tick)
  */
 static void run_timer_softirq(struct softirq_action *h)
 {
-       struct tvec_base *base = __this_cpu_read(tvec_bases);
+       struct tvec_base *base = this_cpu_ptr(&tvec_bases);
 
        if (time_after_eq(jiffies, base->timer_jiffies))
                __run_timers(base);
@@ -1534,12 +1493,13 @@ EXPORT_SYMBOL(schedule_timeout_uninterruptible);
 static void migrate_timer_list(struct tvec_base *new_base, struct hlist_head *head)
 {
        struct timer_list *timer;
+       int cpu = new_base->cpu;
 
        while (!hlist_empty(head)) {
                timer = hlist_entry(head->first, struct timer_list, entry);
                /* We ignore the accounting on the dying cpu */
                detach_timer(timer, false);
-               timer_set_base(timer, new_base);
+               timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu;
                internal_add_timer(new_base, timer);
        }
 }
@@ -1551,8 +1511,8 @@ static void migrate_timers(int cpu)
        int i;
 
        BUG_ON(cpu_online(cpu));
-       old_base = per_cpu(tvec_bases, cpu);
-       new_base = get_cpu_var(tvec_bases);
+       old_base = per_cpu_ptr(&tvec_bases, cpu);
+       new_base = this_cpu_ptr(&tvec_bases);
        /*
         * The caller is globally serialized and nobody else
         * takes two locks at once, deadlock is not possible.
@@ -1576,7 +1536,6 @@ static void migrate_timers(int cpu)
 
        spin_unlock(&old_base->lock);
        spin_unlock_irq(&new_base->lock);
-       put_cpu_var(tvec_bases);
 }
 
 static int timer_cpu_notify(struct notifier_block *self,
@@ -1602,12 +1561,11 @@ static inline void timer_register_cpu_notifier(void)
 static inline void timer_register_cpu_notifier(void) { }
 #endif /* CONFIG_HOTPLUG_CPU */
 
-static void __init init_timer_cpu(struct tvec_base *base, int cpu)
+static void __init init_timer_cpu(int cpu)
 {
-       BUG_ON(base != tbase_get_base(base));
+       struct tvec_base *base = per_cpu_ptr(&tvec_bases, cpu);
 
        base->cpu = cpu;
-       per_cpu(tvec_bases, cpu) = base;
        spin_lock_init(&base->lock);
 
        base->timer_jiffies = jiffies;
@@ -1616,27 +1574,14 @@ static void __init init_timer_cpu(struct tvec_base *base, int cpu)
 
 static void __init init_timer_cpus(void)
 {
-       struct tvec_base *base;
-       int local_cpu = smp_processor_id();
        int cpu;
 
-       for_each_possible_cpu(cpu) {
-               if (cpu == local_cpu)
-                       base = &boot_tvec_bases;
-#ifdef CONFIG_SMP
-               else
-                       base = per_cpu_ptr(&__tvec_bases, cpu);
-#endif
-
-               init_timer_cpu(base, cpu);
-       }
+       for_each_possible_cpu(cpu)
+               init_timer_cpu(cpu);
 }
 
 void __init init_timers(void)
 {
-       /* ensure there are enough low bits for flags in timer->base pointer */
-       BUILD_BUG_ON(__alignof__(struct tvec_base) & TIMER_FLAG_MASK);
-
        init_timer_cpus();
        init_timer_stats();
        timer_register_cpu_notifier();