Merge branch 'for-joerg/batched-unmap' of git://git.kernel.org/pub/scm/linux/kernel...
[platform/kernel/linux-rpi.git] / kernel / softirq.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *      linux/kernel/softirq.c
4  *
5  *      Copyright (C) 1992 Linus Torvalds
6  *
7  *      Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
8  */
9
10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11
12 #include <linux/export.h>
13 #include <linux/kernel_stat.h>
14 #include <linux/interrupt.h>
15 #include <linux/init.h>
16 #include <linux/mm.h>
17 #include <linux/notifier.h>
18 #include <linux/percpu.h>
19 #include <linux/cpu.h>
20 #include <linux/freezer.h>
21 #include <linux/kthread.h>
22 #include <linux/rcupdate.h>
23 #include <linux/ftrace.h>
24 #include <linux/smp.h>
25 #include <linux/smpboot.h>
26 #include <linux/tick.h>
27 #include <linux/irq.h>
28
29 #define CREATE_TRACE_POINTS
30 #include <trace/events/irq.h>
31
32 /*
33    - No shared variables, all the data are CPU local.
34    - If a softirq needs serialization, let it serialize itself
35      by its own spinlocks.
36    - Even if softirq is serialized, only local cpu is marked for
37      execution. Hence, we get something sort of weak cpu binding.
38      Though it is still not clear, will it result in better locality
39      or will not.
40
41    Examples:
42    - NET RX softirq. It is multithreaded and does not require
43      any global serialization.
44    - NET TX softirq. It kicks software netdevice queues, hence
45      it is logically serialized per device, but this serialization
46      is invisible to common code.
47    - Tasklets: serialized wrt itself.
48  */
49
50 #ifndef __ARCH_IRQ_STAT
51 DEFINE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat);
52 EXPORT_PER_CPU_SYMBOL(irq_stat);
53 #endif
54
55 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
56
57 DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
58
59 const char * const softirq_to_name[NR_SOFTIRQS] = {
60         "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL",
61         "TASKLET", "SCHED", "HRTIMER", "RCU"
62 };
63
64 /*
65  * we cannot loop indefinitely here to avoid userspace starvation,
66  * but we also don't want to introduce a worst case 1/HZ latency
67  * to the pending events, so lets the scheduler to balance
68  * the softirq load for us.
69  */
70 static void wakeup_softirqd(void)
71 {
72         /* Interrupts are disabled: no need to stop preemption */
73         struct task_struct *tsk = __this_cpu_read(ksoftirqd);
74
75         if (tsk && tsk->state != TASK_RUNNING)
76                 wake_up_process(tsk);
77 }
78
79 /*
80  * If ksoftirqd is scheduled, we do not want to process pending softirqs
81  * right now. Let ksoftirqd handle this at its own rate, to get fairness,
82  * unless we're doing some of the synchronous softirqs.
83  */
84 #define SOFTIRQ_NOW_MASK ((1 << HI_SOFTIRQ) | (1 << TASKLET_SOFTIRQ))
85 static bool ksoftirqd_running(unsigned long pending)
86 {
87         struct task_struct *tsk = __this_cpu_read(ksoftirqd);
88
89         if (pending & SOFTIRQ_NOW_MASK)
90                 return false;
91         return tsk && (tsk->state == TASK_RUNNING) &&
92                 !__kthread_should_park(tsk);
93 }
94
95 /*
96  * preempt_count and SOFTIRQ_OFFSET usage:
97  * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
98  *   softirq processing.
99  * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
100  *   on local_bh_disable or local_bh_enable.
101  * This lets us distinguish between whether we are currently processing
102  * softirq and whether we just have bh disabled.
103  */
104
105 /*
106  * This one is for softirq.c-internal use,
107  * where hardirqs are disabled legitimately:
108  */
109 #ifdef CONFIG_TRACE_IRQFLAGS
110 void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
111 {
112         unsigned long flags;
113
114         WARN_ON_ONCE(in_irq());
115
116         raw_local_irq_save(flags);
117         /*
118          * The preempt tracer hooks into preempt_count_add and will break
119          * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
120          * is set and before current->softirq_enabled is cleared.
121          * We must manually increment preempt_count here and manually
122          * call the trace_preempt_off later.
123          */
124         __preempt_count_add(cnt);
125         /*
126          * Were softirqs turned off above:
127          */
128         if (softirq_count() == (cnt & SOFTIRQ_MASK))
129                 trace_softirqs_off(ip);
130         raw_local_irq_restore(flags);
131
132         if (preempt_count() == cnt) {
133 #ifdef CONFIG_DEBUG_PREEMPT
134                 current->preempt_disable_ip = get_lock_parent_ip();
135 #endif
136                 trace_preempt_off(CALLER_ADDR0, get_lock_parent_ip());
137         }
138 }
139 EXPORT_SYMBOL(__local_bh_disable_ip);
140 #endif /* CONFIG_TRACE_IRQFLAGS */
141
142 static void __local_bh_enable(unsigned int cnt)
143 {
144         lockdep_assert_irqs_disabled();
145
146         if (preempt_count() == cnt)
147                 trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
148
149         if (softirq_count() == (cnt & SOFTIRQ_MASK))
150                 trace_softirqs_on(_RET_IP_);
151
152         __preempt_count_sub(cnt);
153 }
154
155 /*
156  * Special-case - softirqs can safely be enabled by __do_softirq(),
157  * without processing still-pending softirqs:
158  */
159 void _local_bh_enable(void)
160 {
161         WARN_ON_ONCE(in_irq());
162         __local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
163 }
164 EXPORT_SYMBOL(_local_bh_enable);
165
166 void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
167 {
168         WARN_ON_ONCE(in_irq());
169         lockdep_assert_irqs_enabled();
170 #ifdef CONFIG_TRACE_IRQFLAGS
171         local_irq_disable();
172 #endif
173         /*
174          * Are softirqs going to be turned on now:
175          */
176         if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
177                 trace_softirqs_on(ip);
178         /*
179          * Keep preemption disabled until we are done with
180          * softirq processing:
181          */
182         preempt_count_sub(cnt - 1);
183
184         if (unlikely(!in_interrupt() && local_softirq_pending())) {
185                 /*
186                  * Run softirq if any pending. And do it in its own stack
187                  * as we may be calling this deep in a task call stack already.
188                  */
189                 do_softirq();
190         }
191
192         preempt_count_dec();
193 #ifdef CONFIG_TRACE_IRQFLAGS
194         local_irq_enable();
195 #endif
196         preempt_check_resched();
197 }
198 EXPORT_SYMBOL(__local_bh_enable_ip);
199
200 /*
201  * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times,
202  * but break the loop if need_resched() is set or after 2 ms.
203  * The MAX_SOFTIRQ_TIME provides a nice upper bound in most cases, but in
204  * certain cases, such as stop_machine(), jiffies may cease to
205  * increment and so we need the MAX_SOFTIRQ_RESTART limit as
206  * well to make sure we eventually return from this method.
207  *
208  * These limits have been established via experimentation.
209  * The two things to balance is latency against fairness -
210  * we want to handle softirqs as soon as possible, but they
211  * should not be able to lock up the box.
212  */
213 #define MAX_SOFTIRQ_TIME  msecs_to_jiffies(2)
214 #define MAX_SOFTIRQ_RESTART 10
215
216 #ifdef CONFIG_TRACE_IRQFLAGS
217 /*
218  * When we run softirqs from irq_exit() and thus on the hardirq stack we need
219  * to keep the lockdep irq context tracking as tight as possible in order to
220  * not miss-qualify lock contexts and miss possible deadlocks.
221  */
222
223 static inline bool lockdep_softirq_start(void)
224 {
225         bool in_hardirq = false;
226
227         if (trace_hardirq_context(current)) {
228                 in_hardirq = true;
229                 trace_hardirq_exit();
230         }
231
232         lockdep_softirq_enter();
233
234         return in_hardirq;
235 }
236
237 static inline void lockdep_softirq_end(bool in_hardirq)
238 {
239         lockdep_softirq_exit();
240
241         if (in_hardirq)
242                 trace_hardirq_enter();
243 }
244 #else
245 static inline bool lockdep_softirq_start(void) { return false; }
246 static inline void lockdep_softirq_end(bool in_hardirq) { }
247 #endif
248
249 asmlinkage __visible void __softirq_entry __do_softirq(void)
250 {
251         unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
252         unsigned long old_flags = current->flags;
253         int max_restart = MAX_SOFTIRQ_RESTART;
254         struct softirq_action *h;
255         bool in_hardirq;
256         __u32 pending;
257         int softirq_bit;
258
259         /*
260          * Mask out PF_MEMALLOC as the current task context is borrowed for the
261          * softirq. A softirq handled, such as network RX, might set PF_MEMALLOC
262          * again if the socket is related to swapping.
263          */
264         current->flags &= ~PF_MEMALLOC;
265
266         pending = local_softirq_pending();
267         account_irq_enter_time(current);
268
269         __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
270         in_hardirq = lockdep_softirq_start();
271
272 restart:
273         /* Reset the pending bitmask before enabling irqs */
274         set_softirq_pending(0);
275
276         local_irq_enable();
277
278         h = softirq_vec;
279
280         while ((softirq_bit = ffs(pending))) {
281                 unsigned int vec_nr;
282                 int prev_count;
283
284                 h += softirq_bit - 1;
285
286                 vec_nr = h - softirq_vec;
287                 prev_count = preempt_count();
288
289                 kstat_incr_softirqs_this_cpu(vec_nr);
290
291                 trace_softirq_entry(vec_nr);
292                 h->action(h);
293                 trace_softirq_exit(vec_nr);
294                 if (unlikely(prev_count != preempt_count())) {
295                         pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
296                                vec_nr, softirq_to_name[vec_nr], h->action,
297                                prev_count, preempt_count());
298                         preempt_count_set(prev_count);
299                 }
300                 h++;
301                 pending >>= softirq_bit;
302         }
303
304         if (__this_cpu_read(ksoftirqd) == current)
305                 rcu_softirq_qs();
306         local_irq_disable();
307
308         pending = local_softirq_pending();
309         if (pending) {
310                 if (time_before(jiffies, end) && !need_resched() &&
311                     --max_restart)
312                         goto restart;
313
314                 wakeup_softirqd();
315         }
316
317         lockdep_softirq_end(in_hardirq);
318         account_irq_exit_time(current);
319         __local_bh_enable(SOFTIRQ_OFFSET);
320         WARN_ON_ONCE(in_interrupt());
321         current_restore_flags(old_flags, PF_MEMALLOC);
322 }
323
324 asmlinkage __visible void do_softirq(void)
325 {
326         __u32 pending;
327         unsigned long flags;
328
329         if (in_interrupt())
330                 return;
331
332         local_irq_save(flags);
333
334         pending = local_softirq_pending();
335
336         if (pending && !ksoftirqd_running(pending))
337                 do_softirq_own_stack();
338
339         local_irq_restore(flags);
340 }
341
342 /*
343  * Enter an interrupt context.
344  */
345 void irq_enter(void)
346 {
347         rcu_irq_enter();
348         if (is_idle_task(current) && !in_interrupt()) {
349                 /*
350                  * Prevent raise_softirq from needlessly waking up ksoftirqd
351                  * here, as softirq will be serviced on return from interrupt.
352                  */
353                 local_bh_disable();
354                 tick_irq_enter();
355                 _local_bh_enable();
356         }
357
358         __irq_enter();
359 }
360
361 static inline void invoke_softirq(void)
362 {
363         if (ksoftirqd_running(local_softirq_pending()))
364                 return;
365
366         if (!force_irqthreads) {
367 #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
368                 /*
369                  * We can safely execute softirq on the current stack if
370                  * it is the irq stack, because it should be near empty
371                  * at this stage.
372                  */
373                 __do_softirq();
374 #else
375                 /*
376                  * Otherwise, irq_exit() is called on the task stack that can
377                  * be potentially deep already. So call softirq in its own stack
378                  * to prevent from any overrun.
379                  */
380                 do_softirq_own_stack();
381 #endif
382         } else {
383                 wakeup_softirqd();
384         }
385 }
386
387 static inline void tick_irq_exit(void)
388 {
389 #ifdef CONFIG_NO_HZ_COMMON
390         int cpu = smp_processor_id();
391
392         /* Make sure that timer wheel updates are propagated */
393         if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
394                 if (!in_irq())
395                         tick_nohz_irq_exit();
396         }
397 #endif
398 }
399
400 /*
401  * Exit an interrupt context. Process softirqs if needed and possible:
402  */
403 void irq_exit(void)
404 {
405 #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
406         local_irq_disable();
407 #else
408         lockdep_assert_irqs_disabled();
409 #endif
410         account_irq_exit_time(current);
411         preempt_count_sub(HARDIRQ_OFFSET);
412         if (!in_interrupt() && local_softirq_pending())
413                 invoke_softirq();
414
415         tick_irq_exit();
416         rcu_irq_exit();
417         trace_hardirq_exit(); /* must be last! */
418 }
419
420 /*
421  * This function must run with irqs disabled!
422  */
423 inline void raise_softirq_irqoff(unsigned int nr)
424 {
425         __raise_softirq_irqoff(nr);
426
427         /*
428          * If we're in an interrupt or softirq, we're done
429          * (this also catches softirq-disabled code). We will
430          * actually run the softirq once we return from
431          * the irq or softirq.
432          *
433          * Otherwise we wake up ksoftirqd to make sure we
434          * schedule the softirq soon.
435          */
436         if (!in_interrupt())
437                 wakeup_softirqd();
438 }
439
440 void raise_softirq(unsigned int nr)
441 {
442         unsigned long flags;
443
444         local_irq_save(flags);
445         raise_softirq_irqoff(nr);
446         local_irq_restore(flags);
447 }
448
449 void __raise_softirq_irqoff(unsigned int nr)
450 {
451         trace_softirq_raise(nr);
452         or_softirq_pending(1UL << nr);
453 }
454
455 void open_softirq(int nr, void (*action)(struct softirq_action *))
456 {
457         softirq_vec[nr].action = action;
458 }
459
460 /*
461  * Tasklets
462  */
463 struct tasklet_head {
464         struct tasklet_struct *head;
465         struct tasklet_struct **tail;
466 };
467
468 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
469 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
470
471 static void __tasklet_schedule_common(struct tasklet_struct *t,
472                                       struct tasklet_head __percpu *headp,
473                                       unsigned int softirq_nr)
474 {
475         struct tasklet_head *head;
476         unsigned long flags;
477
478         local_irq_save(flags);
479         head = this_cpu_ptr(headp);
480         t->next = NULL;
481         *head->tail = t;
482         head->tail = &(t->next);
483         raise_softirq_irqoff(softirq_nr);
484         local_irq_restore(flags);
485 }
486
487 void __tasklet_schedule(struct tasklet_struct *t)
488 {
489         __tasklet_schedule_common(t, &tasklet_vec,
490                                   TASKLET_SOFTIRQ);
491 }
492 EXPORT_SYMBOL(__tasklet_schedule);
493
494 void __tasklet_hi_schedule(struct tasklet_struct *t)
495 {
496         __tasklet_schedule_common(t, &tasklet_hi_vec,
497                                   HI_SOFTIRQ);
498 }
499 EXPORT_SYMBOL(__tasklet_hi_schedule);
500
501 static void tasklet_action_common(struct softirq_action *a,
502                                   struct tasklet_head *tl_head,
503                                   unsigned int softirq_nr)
504 {
505         struct tasklet_struct *list;
506
507         local_irq_disable();
508         list = tl_head->head;
509         tl_head->head = NULL;
510         tl_head->tail = &tl_head->head;
511         local_irq_enable();
512
513         while (list) {
514                 struct tasklet_struct *t = list;
515
516                 list = list->next;
517
518                 if (tasklet_trylock(t)) {
519                         if (!atomic_read(&t->count)) {
520                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED,
521                                                         &t->state))
522                                         BUG();
523                                 t->func(t->data);
524                                 tasklet_unlock(t);
525                                 continue;
526                         }
527                         tasklet_unlock(t);
528                 }
529
530                 local_irq_disable();
531                 t->next = NULL;
532                 *tl_head->tail = t;
533                 tl_head->tail = &t->next;
534                 __raise_softirq_irqoff(softirq_nr);
535                 local_irq_enable();
536         }
537 }
538
539 static __latent_entropy void tasklet_action(struct softirq_action *a)
540 {
541         tasklet_action_common(a, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ);
542 }
543
544 static __latent_entropy void tasklet_hi_action(struct softirq_action *a)
545 {
546         tasklet_action_common(a, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ);
547 }
548
549 void tasklet_init(struct tasklet_struct *t,
550                   void (*func)(unsigned long), unsigned long data)
551 {
552         t->next = NULL;
553         t->state = 0;
554         atomic_set(&t->count, 0);
555         t->func = func;
556         t->data = data;
557 }
558 EXPORT_SYMBOL(tasklet_init);
559
560 void tasklet_kill(struct tasklet_struct *t)
561 {
562         if (in_interrupt())
563                 pr_notice("Attempt to kill tasklet from interrupt\n");
564
565         while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
566                 do {
567                         yield();
568                 } while (test_bit(TASKLET_STATE_SCHED, &t->state));
569         }
570         tasklet_unlock_wait(t);
571         clear_bit(TASKLET_STATE_SCHED, &t->state);
572 }
573 EXPORT_SYMBOL(tasklet_kill);
574
575 void __init softirq_init(void)
576 {
577         int cpu;
578
579         for_each_possible_cpu(cpu) {
580                 per_cpu(tasklet_vec, cpu).tail =
581                         &per_cpu(tasklet_vec, cpu).head;
582                 per_cpu(tasklet_hi_vec, cpu).tail =
583                         &per_cpu(tasklet_hi_vec, cpu).head;
584         }
585
586         open_softirq(TASKLET_SOFTIRQ, tasklet_action);
587         open_softirq(HI_SOFTIRQ, tasklet_hi_action);
588 }
589
590 static int ksoftirqd_should_run(unsigned int cpu)
591 {
592         return local_softirq_pending();
593 }
594
595 static void run_ksoftirqd(unsigned int cpu)
596 {
597         local_irq_disable();
598         if (local_softirq_pending()) {
599                 /*
600                  * We can safely run softirq on inline stack, as we are not deep
601                  * in the task stack here.
602                  */
603                 __do_softirq();
604                 local_irq_enable();
605                 cond_resched();
606                 return;
607         }
608         local_irq_enable();
609 }
610
611 #ifdef CONFIG_HOTPLUG_CPU
612 /*
613  * tasklet_kill_immediate is called to remove a tasklet which can already be
614  * scheduled for execution on @cpu.
615  *
616  * Unlike tasklet_kill, this function removes the tasklet
617  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
618  *
619  * When this function is called, @cpu must be in the CPU_DEAD state.
620  */
621 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
622 {
623         struct tasklet_struct **i;
624
625         BUG_ON(cpu_online(cpu));
626         BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
627
628         if (!test_bit(TASKLET_STATE_SCHED, &t->state))
629                 return;
630
631         /* CPU is dead, so no lock needed. */
632         for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
633                 if (*i == t) {
634                         *i = t->next;
635                         /* If this was the tail element, move the tail ptr */
636                         if (*i == NULL)
637                                 per_cpu(tasklet_vec, cpu).tail = i;
638                         return;
639                 }
640         }
641         BUG();
642 }
643
644 static int takeover_tasklets(unsigned int cpu)
645 {
646         /* CPU is dead, so no lock needed. */
647         local_irq_disable();
648
649         /* Find end, append list for that CPU. */
650         if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
651                 *__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
652                 __this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
653                 per_cpu(tasklet_vec, cpu).head = NULL;
654                 per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
655         }
656         raise_softirq_irqoff(TASKLET_SOFTIRQ);
657
658         if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
659                 *__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
660                 __this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
661                 per_cpu(tasklet_hi_vec, cpu).head = NULL;
662                 per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
663         }
664         raise_softirq_irqoff(HI_SOFTIRQ);
665
666         local_irq_enable();
667         return 0;
668 }
669 #else
670 #define takeover_tasklets       NULL
671 #endif /* CONFIG_HOTPLUG_CPU */
672
673 static struct smp_hotplug_thread softirq_threads = {
674         .store                  = &ksoftirqd,
675         .thread_should_run      = ksoftirqd_should_run,
676         .thread_fn              = run_ksoftirqd,
677         .thread_comm            = "ksoftirqd/%u",
678 };
679
680 static __init int spawn_ksoftirqd(void)
681 {
682         cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
683                                   takeover_tasklets);
684         BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
685
686         return 0;
687 }
688 early_initcall(spawn_ksoftirqd);
689
690 /*
691  * [ These __weak aliases are kept in a separate compilation unit, so that
692  *   GCC does not inline them incorrectly. ]
693  */
694
695 int __init __weak early_irq_init(void)
696 {
697         return 0;
698 }
699
700 int __init __weak arch_probe_nr_irqs(void)
701 {
702         return NR_IRQS_LEGACY;
703 }
704
705 int __init __weak arch_early_irq_init(void)
706 {
707         return 0;
708 }
709
710 unsigned int __weak arch_dynirq_lower_bound(unsigned int from)
711 {
712         return from;
713 }