Merge 6.4-rc5 into usb-next
[platform/kernel/linux-starfive.git] / kernel / trace / trace_osnoise.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * OS Noise Tracer: computes the OS Noise suffered by a running thread.
4  * Timerlat Tracer: measures the wakeup latency of a timer triggered IRQ and thread.
5  *
6  * Based on "hwlat_detector" tracer by:
7  *   Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
8  *   Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com>
9  *   With feedback from Clark Williams <williams@redhat.com>
10  *
11  * And also based on the rtsl tracer presented on:
12  *  DE OLIVEIRA, Daniel Bristot, et al. Demystifying the real-time linux
13  *  scheduling latency. In: 32nd Euromicro Conference on Real-Time Systems
14  *  (ECRTS 2020). Schloss Dagstuhl-Leibniz-Zentrum fur Informatik, 2020.
15  *
16  * Copyright (C) 2021 Daniel Bristot de Oliveira, Red Hat, Inc. <bristot@redhat.com>
17  */
18
19 #include <linux/kthread.h>
20 #include <linux/tracefs.h>
21 #include <linux/uaccess.h>
22 #include <linux/cpumask.h>
23 #include <linux/delay.h>
24 #include <linux/sched/clock.h>
25 #include <uapi/linux/sched/types.h>
26 #include <linux/sched.h>
27 #include "trace.h"
28
29 #ifdef CONFIG_X86_LOCAL_APIC
30 #include <asm/trace/irq_vectors.h>
31 #undef TRACE_INCLUDE_PATH
32 #undef TRACE_INCLUDE_FILE
33 #endif /* CONFIG_X86_LOCAL_APIC */
34
35 #include <trace/events/irq.h>
36 #include <trace/events/sched.h>
37
38 #define CREATE_TRACE_POINTS
39 #include <trace/events/osnoise.h>
40
41 /*
42  * Default values.
43  */
44 #define BANNER                  "osnoise: "
45 #define DEFAULT_SAMPLE_PERIOD   1000000                 /* 1s */
46 #define DEFAULT_SAMPLE_RUNTIME  1000000                 /* 1s */
47
48 #define DEFAULT_TIMERLAT_PERIOD 1000                    /* 1ms */
49 #define DEFAULT_TIMERLAT_PRIO   95                      /* FIFO 95 */
50
51 /*
52  * osnoise/options entries.
53  */
54 enum osnoise_options_index {
55         OSN_DEFAULTS = 0,
56         OSN_WORKLOAD,
57         OSN_PANIC_ON_STOP,
58         OSN_PREEMPT_DISABLE,
59         OSN_IRQ_DISABLE,
60         OSN_MAX
61 };
62
63 static const char * const osnoise_options_str[OSN_MAX] = {
64                                                         "DEFAULTS",
65                                                         "OSNOISE_WORKLOAD",
66                                                         "PANIC_ON_STOP",
67                                                         "OSNOISE_PREEMPT_DISABLE",
68                                                         "OSNOISE_IRQ_DISABLE" };
69
70 #define OSN_DEFAULT_OPTIONS             0x2
71 static unsigned long osnoise_options    = OSN_DEFAULT_OPTIONS;
72
73 /*
74  * trace_array of the enabled osnoise/timerlat instances.
75  */
76 struct osnoise_instance {
77         struct list_head        list;
78         struct trace_array      *tr;
79 };
80
81 static struct list_head osnoise_instances;
82
83 static bool osnoise_has_registered_instances(void)
84 {
85         return !!list_first_or_null_rcu(&osnoise_instances,
86                                         struct osnoise_instance,
87                                         list);
88 }
89
90 /*
91  * osnoise_instance_registered - check if a tr is already registered
92  */
93 static int osnoise_instance_registered(struct trace_array *tr)
94 {
95         struct osnoise_instance *inst;
96         int found = 0;
97
98         rcu_read_lock();
99         list_for_each_entry_rcu(inst, &osnoise_instances, list) {
100                 if (inst->tr == tr)
101                         found = 1;
102         }
103         rcu_read_unlock();
104
105         return found;
106 }
107
108 /*
109  * osnoise_register_instance - register a new trace instance
110  *
111  * Register a trace_array *tr in the list of instances running
112  * osnoise/timerlat tracers.
113  */
114 static int osnoise_register_instance(struct trace_array *tr)
115 {
116         struct osnoise_instance *inst;
117
118         /*
119          * register/unregister serialization is provided by trace's
120          * trace_types_lock.
121          */
122         lockdep_assert_held(&trace_types_lock);
123
124         inst = kmalloc(sizeof(*inst), GFP_KERNEL);
125         if (!inst)
126                 return -ENOMEM;
127
128         INIT_LIST_HEAD_RCU(&inst->list);
129         inst->tr = tr;
130         list_add_tail_rcu(&inst->list, &osnoise_instances);
131
132         return 0;
133 }
134
135 /*
136  *  osnoise_unregister_instance - unregister a registered trace instance
137  *
138  * Remove the trace_array *tr from the list of instances running
139  * osnoise/timerlat tracers.
140  */
141 static void osnoise_unregister_instance(struct trace_array *tr)
142 {
143         struct osnoise_instance *inst;
144         int found = 0;
145
146         /*
147          * register/unregister serialization is provided by trace's
148          * trace_types_lock.
149          */
150         list_for_each_entry_rcu(inst, &osnoise_instances, list,
151                                 lockdep_is_held(&trace_types_lock)) {
152                 if (inst->tr == tr) {
153                         list_del_rcu(&inst->list);
154                         found = 1;
155                         break;
156                 }
157         }
158
159         if (!found)
160                 return;
161
162         kvfree_rcu_mightsleep(inst);
163 }
164
165 /*
166  * NMI runtime info.
167  */
168 struct osn_nmi {
169         u64     count;
170         u64     delta_start;
171 };
172
173 /*
174  * IRQ runtime info.
175  */
176 struct osn_irq {
177         u64     count;
178         u64     arrival_time;
179         u64     delta_start;
180 };
181
182 #define IRQ_CONTEXT     0
183 #define THREAD_CONTEXT  1
184 /*
185  * sofirq runtime info.
186  */
187 struct osn_softirq {
188         u64     count;
189         u64     arrival_time;
190         u64     delta_start;
191 };
192
193 /*
194  * thread runtime info.
195  */
196 struct osn_thread {
197         u64     count;
198         u64     arrival_time;
199         u64     delta_start;
200 };
201
202 /*
203  * Runtime information: this structure saves the runtime information used by
204  * one sampling thread.
205  */
206 struct osnoise_variables {
207         struct task_struct      *kthread;
208         bool                    sampling;
209         pid_t                   pid;
210         struct osn_nmi          nmi;
211         struct osn_irq          irq;
212         struct osn_softirq      softirq;
213         struct osn_thread       thread;
214         local_t                 int_counter;
215 };
216
217 /*
218  * Per-cpu runtime information.
219  */
220 static DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var);
221
222 /*
223  * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU
224  */
225 static inline struct osnoise_variables *this_cpu_osn_var(void)
226 {
227         return this_cpu_ptr(&per_cpu_osnoise_var);
228 }
229
230 #ifdef CONFIG_TIMERLAT_TRACER
231 /*
232  * Runtime information for the timer mode.
233  */
234 struct timerlat_variables {
235         struct task_struct      *kthread;
236         struct hrtimer          timer;
237         u64                     rel_period;
238         u64                     abs_period;
239         bool                    tracing_thread;
240         u64                     count;
241 };
242
243 static DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var);
244
245 /*
246  * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU
247  */
248 static inline struct timerlat_variables *this_cpu_tmr_var(void)
249 {
250         return this_cpu_ptr(&per_cpu_timerlat_var);
251 }
252
253 /*
254  * tlat_var_reset - Reset the values of the given timerlat_variables
255  */
256 static inline void tlat_var_reset(void)
257 {
258         struct timerlat_variables *tlat_var;
259         int cpu;
260         /*
261          * So far, all the values are initialized as 0, so
262          * zeroing the structure is perfect.
263          */
264         for_each_cpu(cpu, cpu_online_mask) {
265                 tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu);
266                 memset(tlat_var, 0, sizeof(*tlat_var));
267         }
268 }
269 #else /* CONFIG_TIMERLAT_TRACER */
270 #define tlat_var_reset()        do {} while (0)
271 #endif /* CONFIG_TIMERLAT_TRACER */
272
273 /*
274  * osn_var_reset - Reset the values of the given osnoise_variables
275  */
276 static inline void osn_var_reset(void)
277 {
278         struct osnoise_variables *osn_var;
279         int cpu;
280
281         /*
282          * So far, all the values are initialized as 0, so
283          * zeroing the structure is perfect.
284          */
285         for_each_cpu(cpu, cpu_online_mask) {
286                 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
287                 memset(osn_var, 0, sizeof(*osn_var));
288         }
289 }
290
291 /*
292  * osn_var_reset_all - Reset the value of all per-cpu osnoise_variables
293  */
294 static inline void osn_var_reset_all(void)
295 {
296         osn_var_reset();
297         tlat_var_reset();
298 }
299
300 /*
301  * Tells NMIs to call back to the osnoise tracer to record timestamps.
302  */
303 bool trace_osnoise_callback_enabled;
304
305 /*
306  * osnoise sample structure definition. Used to store the statistics of a
307  * sample run.
308  */
309 struct osnoise_sample {
310         u64                     runtime;        /* runtime */
311         u64                     noise;          /* noise */
312         u64                     max_sample;     /* max single noise sample */
313         int                     hw_count;       /* # HW (incl. hypervisor) interference */
314         int                     nmi_count;      /* # NMIs during this sample */
315         int                     irq_count;      /* # IRQs during this sample */
316         int                     softirq_count;  /* # softirqs during this sample */
317         int                     thread_count;   /* # threads during this sample */
318 };
319
320 #ifdef CONFIG_TIMERLAT_TRACER
321 /*
322  * timerlat sample structure definition. Used to store the statistics of
323  * a sample run.
324  */
325 struct timerlat_sample {
326         u64                     timer_latency;  /* timer_latency */
327         unsigned int            seqnum;         /* unique sequence */
328         int                     context;        /* timer context */
329 };
330 #endif
331
332 /*
333  * Protect the interface.
334  */
335 static struct mutex interface_lock;
336
337 /*
338  * Tracer data.
339  */
340 static struct osnoise_data {
341         u64     sample_period;          /* total sampling period */
342         u64     sample_runtime;         /* active sampling portion of period */
343         u64     stop_tracing;           /* stop trace in the internal operation (loop/irq) */
344         u64     stop_tracing_total;     /* stop trace in the final operation (report/thread) */
345 #ifdef CONFIG_TIMERLAT_TRACER
346         u64     timerlat_period;        /* timerlat period */
347         u64     print_stack;            /* print IRQ stack if total > */
348         int     timerlat_tracer;        /* timerlat tracer */
349 #endif
350         bool    tainted;                /* infor users and developers about a problem */
351 } osnoise_data = {
352         .sample_period                  = DEFAULT_SAMPLE_PERIOD,
353         .sample_runtime                 = DEFAULT_SAMPLE_RUNTIME,
354         .stop_tracing                   = 0,
355         .stop_tracing_total             = 0,
356 #ifdef CONFIG_TIMERLAT_TRACER
357         .print_stack                    = 0,
358         .timerlat_period                = DEFAULT_TIMERLAT_PERIOD,
359         .timerlat_tracer                = 0,
360 #endif
361 };
362
363 #ifdef CONFIG_TIMERLAT_TRACER
364 static inline bool timerlat_enabled(void)
365 {
366         return osnoise_data.timerlat_tracer;
367 }
368
369 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var)
370 {
371         struct timerlat_variables *tlat_var = this_cpu_tmr_var();
372         /*
373          * If the timerlat is enabled, but the irq handler did
374          * not run yet enabling timerlat_tracer, do not trace.
375          */
376         if (!tlat_var->tracing_thread) {
377                 osn_var->softirq.arrival_time = 0;
378                 osn_var->softirq.delta_start = 0;
379                 return 0;
380         }
381         return 1;
382 }
383
384 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var)
385 {
386         struct timerlat_variables *tlat_var = this_cpu_tmr_var();
387         /*
388          * If the timerlat is enabled, but the irq handler did
389          * not run yet enabling timerlat_tracer, do not trace.
390          */
391         if (!tlat_var->tracing_thread) {
392                 osn_var->thread.delta_start = 0;
393                 osn_var->thread.arrival_time = 0;
394                 return 0;
395         }
396         return 1;
397 }
398 #else /* CONFIG_TIMERLAT_TRACER */
399 static inline bool timerlat_enabled(void)
400 {
401         return false;
402 }
403
404 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var)
405 {
406         return 1;
407 }
408 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var)
409 {
410         return 1;
411 }
412 #endif
413
414 #ifdef CONFIG_PREEMPT_RT
415 /*
416  * Print the osnoise header info.
417  */
418 static void print_osnoise_headers(struct seq_file *s)
419 {
420         if (osnoise_data.tainted)
421                 seq_puts(s, "# osnoise is tainted!\n");
422
423         seq_puts(s, "#                                _-------=> irqs-off\n");
424         seq_puts(s, "#                               / _------=> need-resched\n");
425         seq_puts(s, "#                              | / _-----=> need-resched-lazy\n");
426         seq_puts(s, "#                              || / _----=> hardirq/softirq\n");
427         seq_puts(s, "#                              ||| / _---=> preempt-depth\n");
428         seq_puts(s, "#                              |||| / _--=> preempt-lazy-depth\n");
429         seq_puts(s, "#                              ||||| / _-=> migrate-disable\n");
430
431         seq_puts(s, "#                              |||||| /          ");
432         seq_puts(s, "                                     MAX\n");
433
434         seq_puts(s, "#                              ||||| /                         ");
435         seq_puts(s, "                    SINGLE      Interference counters:\n");
436
437         seq_puts(s, "#                              |||||||               RUNTIME   ");
438         seq_puts(s, "   NOISE  %% OF CPU  NOISE    +-----------------------------+\n");
439
440         seq_puts(s, "#           TASK-PID      CPU# |||||||   TIMESTAMP    IN US    ");
441         seq_puts(s, "   IN US  AVAILABLE  IN US     HW    NMI    IRQ   SIRQ THREAD\n");
442
443         seq_puts(s, "#              | |         |   |||||||      |           |      ");
444         seq_puts(s, "       |    |            |      |      |      |      |      |\n");
445 }
446 #else /* CONFIG_PREEMPT_RT */
447 static void print_osnoise_headers(struct seq_file *s)
448 {
449         if (osnoise_data.tainted)
450                 seq_puts(s, "# osnoise is tainted!\n");
451
452         seq_puts(s, "#                                _-----=> irqs-off\n");
453         seq_puts(s, "#                               / _----=> need-resched\n");
454         seq_puts(s, "#                              | / _---=> hardirq/softirq\n");
455         seq_puts(s, "#                              || / _--=> preempt-depth\n");
456         seq_puts(s, "#                              ||| / _-=> migrate-disable     ");
457         seq_puts(s, "                    MAX\n");
458         seq_puts(s, "#                              |||| /     delay               ");
459         seq_puts(s, "                    SINGLE      Interference counters:\n");
460
461         seq_puts(s, "#                              |||||               RUNTIME   ");
462         seq_puts(s, "   NOISE  %% OF CPU  NOISE    +-----------------------------+\n");
463
464         seq_puts(s, "#           TASK-PID      CPU# |||||   TIMESTAMP    IN US    ");
465         seq_puts(s, "   IN US  AVAILABLE  IN US     HW    NMI    IRQ   SIRQ THREAD\n");
466
467         seq_puts(s, "#              | |         |   |||||      |           |      ");
468         seq_puts(s, "       |    |            |      |      |      |      |      |\n");
469 }
470 #endif /* CONFIG_PREEMPT_RT */
471
472 /*
473  * osnoise_taint - report an osnoise error.
474  */
475 #define osnoise_taint(msg) ({                                                   \
476         struct osnoise_instance *inst;                                          \
477         struct trace_buffer *buffer;                                            \
478                                                                                 \
479         rcu_read_lock();                                                        \
480         list_for_each_entry_rcu(inst, &osnoise_instances, list) {               \
481                 buffer = inst->tr->array_buffer.buffer;                         \
482                 trace_array_printk_buf(buffer, _THIS_IP_, msg);                 \
483         }                                                                       \
484         rcu_read_unlock();                                                      \
485         osnoise_data.tainted = true;                                            \
486 })
487
488 /*
489  * Record an osnoise_sample into the tracer buffer.
490  */
491 static void
492 __trace_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer)
493 {
494         struct trace_event_call *call = &event_osnoise;
495         struct ring_buffer_event *event;
496         struct osnoise_entry *entry;
497
498         event = trace_buffer_lock_reserve(buffer, TRACE_OSNOISE, sizeof(*entry),
499                                           tracing_gen_ctx());
500         if (!event)
501                 return;
502         entry   = ring_buffer_event_data(event);
503         entry->runtime          = sample->runtime;
504         entry->noise            = sample->noise;
505         entry->max_sample       = sample->max_sample;
506         entry->hw_count         = sample->hw_count;
507         entry->nmi_count        = sample->nmi_count;
508         entry->irq_count        = sample->irq_count;
509         entry->softirq_count    = sample->softirq_count;
510         entry->thread_count     = sample->thread_count;
511
512         if (!call_filter_check_discard(call, entry, buffer, event))
513                 trace_buffer_unlock_commit_nostack(buffer, event);
514 }
515
516 /*
517  * Record an osnoise_sample on all osnoise instances.
518  */
519 static void trace_osnoise_sample(struct osnoise_sample *sample)
520 {
521         struct osnoise_instance *inst;
522         struct trace_buffer *buffer;
523
524         rcu_read_lock();
525         list_for_each_entry_rcu(inst, &osnoise_instances, list) {
526                 buffer = inst->tr->array_buffer.buffer;
527                 __trace_osnoise_sample(sample, buffer);
528         }
529         rcu_read_unlock();
530 }
531
532 #ifdef CONFIG_TIMERLAT_TRACER
533 /*
534  * Print the timerlat header info.
535  */
536 #ifdef CONFIG_PREEMPT_RT
537 static void print_timerlat_headers(struct seq_file *s)
538 {
539         seq_puts(s, "#                                _-------=> irqs-off\n");
540         seq_puts(s, "#                               / _------=> need-resched\n");
541         seq_puts(s, "#                              | / _-----=> need-resched-lazy\n");
542         seq_puts(s, "#                              || / _----=> hardirq/softirq\n");
543         seq_puts(s, "#                              ||| / _---=> preempt-depth\n");
544         seq_puts(s, "#                              |||| / _--=> preempt-lazy-depth\n");
545         seq_puts(s, "#                              ||||| / _-=> migrate-disable\n");
546         seq_puts(s, "#                              |||||| /\n");
547         seq_puts(s, "#                              |||||||             ACTIVATION\n");
548         seq_puts(s, "#           TASK-PID      CPU# |||||||   TIMESTAMP    ID     ");
549         seq_puts(s, "       CONTEXT                LATENCY\n");
550         seq_puts(s, "#              | |         |   |||||||      |         |      ");
551         seq_puts(s, "            |                       |\n");
552 }
553 #else /* CONFIG_PREEMPT_RT */
554 static void print_timerlat_headers(struct seq_file *s)
555 {
556         seq_puts(s, "#                                _-----=> irqs-off\n");
557         seq_puts(s, "#                               / _----=> need-resched\n");
558         seq_puts(s, "#                              | / _---=> hardirq/softirq\n");
559         seq_puts(s, "#                              || / _--=> preempt-depth\n");
560         seq_puts(s, "#                              ||| / _-=> migrate-disable\n");
561         seq_puts(s, "#                              |||| /     delay\n");
562         seq_puts(s, "#                              |||||            ACTIVATION\n");
563         seq_puts(s, "#           TASK-PID      CPU# |||||   TIMESTAMP   ID      ");
564         seq_puts(s, "      CONTEXT                 LATENCY\n");
565         seq_puts(s, "#              | |         |   |||||      |         |      ");
566         seq_puts(s, "            |                       |\n");
567 }
568 #endif /* CONFIG_PREEMPT_RT */
569
570 static void
571 __trace_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer)
572 {
573         struct trace_event_call *call = &event_osnoise;
574         struct ring_buffer_event *event;
575         struct timerlat_entry *entry;
576
577         event = trace_buffer_lock_reserve(buffer, TRACE_TIMERLAT, sizeof(*entry),
578                                           tracing_gen_ctx());
579         if (!event)
580                 return;
581         entry   = ring_buffer_event_data(event);
582         entry->seqnum                   = sample->seqnum;
583         entry->context                  = sample->context;
584         entry->timer_latency            = sample->timer_latency;
585
586         if (!call_filter_check_discard(call, entry, buffer, event))
587                 trace_buffer_unlock_commit_nostack(buffer, event);
588 }
589
590 /*
591  * Record an timerlat_sample into the tracer buffer.
592  */
593 static void trace_timerlat_sample(struct timerlat_sample *sample)
594 {
595         struct osnoise_instance *inst;
596         struct trace_buffer *buffer;
597
598         rcu_read_lock();
599         list_for_each_entry_rcu(inst, &osnoise_instances, list) {
600                 buffer = inst->tr->array_buffer.buffer;
601                 __trace_timerlat_sample(sample, buffer);
602         }
603         rcu_read_unlock();
604 }
605
606 #ifdef CONFIG_STACKTRACE
607
608 #define MAX_CALLS       256
609
610 /*
611  * Stack trace will take place only at IRQ level, so, no need
612  * to control nesting here.
613  */
614 struct trace_stack {
615         int             stack_size;
616         int             nr_entries;
617         unsigned long   calls[MAX_CALLS];
618 };
619
620 static DEFINE_PER_CPU(struct trace_stack, trace_stack);
621
622 /*
623  * timerlat_save_stack - save a stack trace without printing
624  *
625  * Save the current stack trace without printing. The
626  * stack will be printed later, after the end of the measurement.
627  */
628 static void timerlat_save_stack(int skip)
629 {
630         unsigned int size, nr_entries;
631         struct trace_stack *fstack;
632
633         fstack = this_cpu_ptr(&trace_stack);
634
635         size = ARRAY_SIZE(fstack->calls);
636
637         nr_entries = stack_trace_save(fstack->calls, size, skip);
638
639         fstack->stack_size = nr_entries * sizeof(unsigned long);
640         fstack->nr_entries = nr_entries;
641
642         return;
643
644 }
645
646 static void
647 __timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, unsigned int size)
648 {
649         struct trace_event_call *call = &event_osnoise;
650         struct ring_buffer_event *event;
651         struct stack_entry *entry;
652
653         event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry) + size,
654                                           tracing_gen_ctx());
655         if (!event)
656                 return;
657
658         entry = ring_buffer_event_data(event);
659
660         memcpy(&entry->caller, fstack->calls, size);
661         entry->size = fstack->nr_entries;
662
663         if (!call_filter_check_discard(call, entry, buffer, event))
664                 trace_buffer_unlock_commit_nostack(buffer, event);
665 }
666
667 /*
668  * timerlat_dump_stack - dump a stack trace previously saved
669  */
670 static void timerlat_dump_stack(u64 latency)
671 {
672         struct osnoise_instance *inst;
673         struct trace_buffer *buffer;
674         struct trace_stack *fstack;
675         unsigned int size;
676
677         /*
678          * trace only if latency > print_stack config, if enabled.
679          */
680         if (!osnoise_data.print_stack || osnoise_data.print_stack > latency)
681                 return;
682
683         preempt_disable_notrace();
684         fstack = this_cpu_ptr(&trace_stack);
685         size = fstack->stack_size;
686
687         rcu_read_lock();
688         list_for_each_entry_rcu(inst, &osnoise_instances, list) {
689                 buffer = inst->tr->array_buffer.buffer;
690                 __timerlat_dump_stack(buffer, fstack, size);
691
692         }
693         rcu_read_unlock();
694         preempt_enable_notrace();
695 }
696 #else /* CONFIG_STACKTRACE */
697 #define timerlat_dump_stack(u64 latency) do {} while (0)
698 #define timerlat_save_stack(a) do {} while (0)
699 #endif /* CONFIG_STACKTRACE */
700 #endif /* CONFIG_TIMERLAT_TRACER */
701
702 /*
703  * Macros to encapsulate the time capturing infrastructure.
704  */
705 #define time_get()      trace_clock_local()
706 #define time_to_us(x)   div_u64(x, 1000)
707 #define time_sub(a, b)  ((a) - (b))
708
709 /*
710  * cond_move_irq_delta_start - Forward the delta_start of a running IRQ
711  *
712  * If an IRQ is preempted by an NMI, its delta_start is pushed forward
713  * to discount the NMI interference.
714  *
715  * See get_int_safe_duration().
716  */
717 static inline void
718 cond_move_irq_delta_start(struct osnoise_variables *osn_var, u64 duration)
719 {
720         if (osn_var->irq.delta_start)
721                 osn_var->irq.delta_start += duration;
722 }
723
724 #ifndef CONFIG_PREEMPT_RT
725 /*
726  * cond_move_softirq_delta_start - Forward the delta_start of a running softirq.
727  *
728  * If a softirq is preempted by an IRQ or NMI, its delta_start is pushed
729  * forward to discount the interference.
730  *
731  * See get_int_safe_duration().
732  */
733 static inline void
734 cond_move_softirq_delta_start(struct osnoise_variables *osn_var, u64 duration)
735 {
736         if (osn_var->softirq.delta_start)
737                 osn_var->softirq.delta_start += duration;
738 }
739 #else /* CONFIG_PREEMPT_RT */
740 #define cond_move_softirq_delta_start(osn_var, duration) do {} while (0)
741 #endif
742
743 /*
744  * cond_move_thread_delta_start - Forward the delta_start of a running thread
745  *
746  * If a noisy thread is preempted by an softirq, IRQ or NMI, its delta_start
747  * is pushed forward to discount the interference.
748  *
749  * See get_int_safe_duration().
750  */
751 static inline void
752 cond_move_thread_delta_start(struct osnoise_variables *osn_var, u64 duration)
753 {
754         if (osn_var->thread.delta_start)
755                 osn_var->thread.delta_start += duration;
756 }
757
758 /*
759  * get_int_safe_duration - Get the duration of a window
760  *
761  * The irq, softirq and thread varaibles need to have its duration without
762  * the interference from higher priority interrupts. Instead of keeping a
763  * variable to discount the interrupt interference from these variables, the
764  * starting time of these variables are pushed forward with the interrupt's
765  * duration. In this way, a single variable is used to:
766  *
767  *   - Know if a given window is being measured.
768  *   - Account its duration.
769  *   - Discount the interference.
770  *
771  * To avoid getting inconsistent values, e.g.,:
772  *
773  *      now = time_get()
774  *              --->    interrupt!
775  *                      delta_start -= int duration;
776  *              <---
777  *      duration = now - delta_start;
778  *
779  *      result: negative duration if the variable duration before the
780  *      interrupt was smaller than the interrupt execution.
781  *
782  * A counter of interrupts is used. If the counter increased, try
783  * to capture an interference safe duration.
784  */
785 static inline s64
786 get_int_safe_duration(struct osnoise_variables *osn_var, u64 *delta_start)
787 {
788         u64 int_counter, now;
789         s64 duration;
790
791         do {
792                 int_counter = local_read(&osn_var->int_counter);
793                 /* synchronize with interrupts */
794                 barrier();
795
796                 now = time_get();
797                 duration = (now - *delta_start);
798
799                 /* synchronize with interrupts */
800                 barrier();
801         } while (int_counter != local_read(&osn_var->int_counter));
802
803         /*
804          * This is an evidence of race conditions that cause
805          * a value to be "discounted" too much.
806          */
807         if (duration < 0)
808                 osnoise_taint("Negative duration!\n");
809
810         *delta_start = 0;
811
812         return duration;
813 }
814
815 /*
816  *
817  * set_int_safe_time - Save the current time on *time, aware of interference
818  *
819  * Get the time, taking into consideration a possible interference from
820  * higher priority interrupts.
821  *
822  * See get_int_safe_duration() for an explanation.
823  */
824 static u64
825 set_int_safe_time(struct osnoise_variables *osn_var, u64 *time)
826 {
827         u64 int_counter;
828
829         do {
830                 int_counter = local_read(&osn_var->int_counter);
831                 /* synchronize with interrupts */
832                 barrier();
833
834                 *time = time_get();
835
836                 /* synchronize with interrupts */
837                 barrier();
838         } while (int_counter != local_read(&osn_var->int_counter));
839
840         return int_counter;
841 }
842
843 #ifdef CONFIG_TIMERLAT_TRACER
844 /*
845  * copy_int_safe_time - Copy *src into *desc aware of interference
846  */
847 static u64
848 copy_int_safe_time(struct osnoise_variables *osn_var, u64 *dst, u64 *src)
849 {
850         u64 int_counter;
851
852         do {
853                 int_counter = local_read(&osn_var->int_counter);
854                 /* synchronize with interrupts */
855                 barrier();
856
857                 *dst = *src;
858
859                 /* synchronize with interrupts */
860                 barrier();
861         } while (int_counter != local_read(&osn_var->int_counter));
862
863         return int_counter;
864 }
865 #endif /* CONFIG_TIMERLAT_TRACER */
866
867 /*
868  * trace_osnoise_callback - NMI entry/exit callback
869  *
870  * This function is called at the entry and exit NMI code. The bool enter
871  * distinguishes between either case. This function is used to note a NMI
872  * occurrence, compute the noise caused by the NMI, and to remove the noise
873  * it is potentially causing on other interference variables.
874  */
875 void trace_osnoise_callback(bool enter)
876 {
877         struct osnoise_variables *osn_var = this_cpu_osn_var();
878         u64 duration;
879
880         if (!osn_var->sampling)
881                 return;
882
883         /*
884          * Currently trace_clock_local() calls sched_clock() and the
885          * generic version is not NMI safe.
886          */
887         if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) {
888                 if (enter) {
889                         osn_var->nmi.delta_start = time_get();
890                         local_inc(&osn_var->int_counter);
891                 } else {
892                         duration = time_get() - osn_var->nmi.delta_start;
893
894                         trace_nmi_noise(osn_var->nmi.delta_start, duration);
895
896                         cond_move_irq_delta_start(osn_var, duration);
897                         cond_move_softirq_delta_start(osn_var, duration);
898                         cond_move_thread_delta_start(osn_var, duration);
899                 }
900         }
901
902         if (enter)
903                 osn_var->nmi.count++;
904 }
905
906 /*
907  * osnoise_trace_irq_entry - Note the starting of an IRQ
908  *
909  * Save the starting time of an IRQ. As IRQs are non-preemptive to other IRQs,
910  * it is safe to use a single variable (ons_var->irq) to save the statistics.
911  * The arrival_time is used to report... the arrival time. The delta_start
912  * is used to compute the duration at the IRQ exit handler. See
913  * cond_move_irq_delta_start().
914  */
915 void osnoise_trace_irq_entry(int id)
916 {
917         struct osnoise_variables *osn_var = this_cpu_osn_var();
918
919         if (!osn_var->sampling)
920                 return;
921         /*
922          * This value will be used in the report, but not to compute
923          * the execution time, so it is safe to get it unsafe.
924          */
925         osn_var->irq.arrival_time = time_get();
926         set_int_safe_time(osn_var, &osn_var->irq.delta_start);
927         osn_var->irq.count++;
928
929         local_inc(&osn_var->int_counter);
930 }
931
932 /*
933  * osnoise_irq_exit - Note the end of an IRQ, sava data and trace
934  *
935  * Computes the duration of the IRQ noise, and trace it. Also discounts the
936  * interference from other sources of noise could be currently being accounted.
937  */
938 void osnoise_trace_irq_exit(int id, const char *desc)
939 {
940         struct osnoise_variables *osn_var = this_cpu_osn_var();
941         s64 duration;
942
943         if (!osn_var->sampling)
944                 return;
945
946         duration = get_int_safe_duration(osn_var, &osn_var->irq.delta_start);
947         trace_irq_noise(id, desc, osn_var->irq.arrival_time, duration);
948         osn_var->irq.arrival_time = 0;
949         cond_move_softirq_delta_start(osn_var, duration);
950         cond_move_thread_delta_start(osn_var, duration);
951 }
952
953 /*
954  * trace_irqentry_callback - Callback to the irq:irq_entry traceevent
955  *
956  * Used to note the starting of an IRQ occurece.
957  */
958 static void trace_irqentry_callback(void *data, int irq,
959                                     struct irqaction *action)
960 {
961         osnoise_trace_irq_entry(irq);
962 }
963
964 /*
965  * trace_irqexit_callback - Callback to the irq:irq_exit traceevent
966  *
967  * Used to note the end of an IRQ occurece.
968  */
969 static void trace_irqexit_callback(void *data, int irq,
970                                    struct irqaction *action, int ret)
971 {
972         osnoise_trace_irq_exit(irq, action->name);
973 }
974
975 /*
976  * arch specific register function.
977  */
978 int __weak osnoise_arch_register(void)
979 {
980         return 0;
981 }
982
983 /*
984  * arch specific unregister function.
985  */
986 void __weak osnoise_arch_unregister(void)
987 {
988         return;
989 }
990
991 /*
992  * hook_irq_events - Hook IRQ handling events
993  *
994  * This function hooks the IRQ related callbacks to the respective trace
995  * events.
996  */
997 static int hook_irq_events(void)
998 {
999         int ret;
1000
1001         ret = register_trace_irq_handler_entry(trace_irqentry_callback, NULL);
1002         if (ret)
1003                 goto out_err;
1004
1005         ret = register_trace_irq_handler_exit(trace_irqexit_callback, NULL);
1006         if (ret)
1007                 goto out_unregister_entry;
1008
1009         ret = osnoise_arch_register();
1010         if (ret)
1011                 goto out_irq_exit;
1012
1013         return 0;
1014
1015 out_irq_exit:
1016         unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL);
1017 out_unregister_entry:
1018         unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL);
1019 out_err:
1020         return -EINVAL;
1021 }
1022
1023 /*
1024  * unhook_irq_events - Unhook IRQ handling events
1025  *
1026  * This function unhooks the IRQ related callbacks to the respective trace
1027  * events.
1028  */
1029 static void unhook_irq_events(void)
1030 {
1031         osnoise_arch_unregister();
1032         unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL);
1033         unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL);
1034 }
1035
1036 #ifndef CONFIG_PREEMPT_RT
1037 /*
1038  * trace_softirq_entry_callback - Note the starting of a softirq
1039  *
1040  * Save the starting time of a softirq. As softirqs are non-preemptive to
1041  * other softirqs, it is safe to use a single variable (ons_var->softirq)
1042  * to save the statistics. The arrival_time is used to report... the
1043  * arrival time. The delta_start is used to compute the duration at the
1044  * softirq exit handler. See cond_move_softirq_delta_start().
1045  */
1046 static void trace_softirq_entry_callback(void *data, unsigned int vec_nr)
1047 {
1048         struct osnoise_variables *osn_var = this_cpu_osn_var();
1049
1050         if (!osn_var->sampling)
1051                 return;
1052         /*
1053          * This value will be used in the report, but not to compute
1054          * the execution time, so it is safe to get it unsafe.
1055          */
1056         osn_var->softirq.arrival_time = time_get();
1057         set_int_safe_time(osn_var, &osn_var->softirq.delta_start);
1058         osn_var->softirq.count++;
1059
1060         local_inc(&osn_var->int_counter);
1061 }
1062
1063 /*
1064  * trace_softirq_exit_callback - Note the end of an softirq
1065  *
1066  * Computes the duration of the softirq noise, and trace it. Also discounts the
1067  * interference from other sources of noise could be currently being accounted.
1068  */
1069 static void trace_softirq_exit_callback(void *data, unsigned int vec_nr)
1070 {
1071         struct osnoise_variables *osn_var = this_cpu_osn_var();
1072         s64 duration;
1073
1074         if (!osn_var->sampling)
1075                 return;
1076
1077         if (unlikely(timerlat_enabled()))
1078                 if (!timerlat_softirq_exit(osn_var))
1079                         return;
1080
1081         duration = get_int_safe_duration(osn_var, &osn_var->softirq.delta_start);
1082         trace_softirq_noise(vec_nr, osn_var->softirq.arrival_time, duration);
1083         cond_move_thread_delta_start(osn_var, duration);
1084         osn_var->softirq.arrival_time = 0;
1085 }
1086
1087 /*
1088  * hook_softirq_events - Hook softirq handling events
1089  *
1090  * This function hooks the softirq related callbacks to the respective trace
1091  * events.
1092  */
1093 static int hook_softirq_events(void)
1094 {
1095         int ret;
1096
1097         ret = register_trace_softirq_entry(trace_softirq_entry_callback, NULL);
1098         if (ret)
1099                 goto out_err;
1100
1101         ret = register_trace_softirq_exit(trace_softirq_exit_callback, NULL);
1102         if (ret)
1103                 goto out_unreg_entry;
1104
1105         return 0;
1106
1107 out_unreg_entry:
1108         unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL);
1109 out_err:
1110         return -EINVAL;
1111 }
1112
1113 /*
1114  * unhook_softirq_events - Unhook softirq handling events
1115  *
1116  * This function hooks the softirq related callbacks to the respective trace
1117  * events.
1118  */
1119 static void unhook_softirq_events(void)
1120 {
1121         unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL);
1122         unregister_trace_softirq_exit(trace_softirq_exit_callback, NULL);
1123 }
1124 #else /* CONFIG_PREEMPT_RT */
1125 /*
1126  * softirq are threads on the PREEMPT_RT mode.
1127  */
1128 static int hook_softirq_events(void)
1129 {
1130         return 0;
1131 }
1132 static void unhook_softirq_events(void)
1133 {
1134 }
1135 #endif
1136
1137 /*
1138  * thread_entry - Record the starting of a thread noise window
1139  *
1140  * It saves the context switch time for a noisy thread, and increments
1141  * the interference counters.
1142  */
1143 static void
1144 thread_entry(struct osnoise_variables *osn_var, struct task_struct *t)
1145 {
1146         if (!osn_var->sampling)
1147                 return;
1148         /*
1149          * The arrival time will be used in the report, but not to compute
1150          * the execution time, so it is safe to get it unsafe.
1151          */
1152         osn_var->thread.arrival_time = time_get();
1153
1154         set_int_safe_time(osn_var, &osn_var->thread.delta_start);
1155
1156         osn_var->thread.count++;
1157         local_inc(&osn_var->int_counter);
1158 }
1159
1160 /*
1161  * thread_exit - Report the end of a thread noise window
1162  *
1163  * It computes the total noise from a thread, tracing if needed.
1164  */
1165 static void
1166 thread_exit(struct osnoise_variables *osn_var, struct task_struct *t)
1167 {
1168         s64 duration;
1169
1170         if (!osn_var->sampling)
1171                 return;
1172
1173         if (unlikely(timerlat_enabled()))
1174                 if (!timerlat_thread_exit(osn_var))
1175                         return;
1176
1177         duration = get_int_safe_duration(osn_var, &osn_var->thread.delta_start);
1178
1179         trace_thread_noise(t, osn_var->thread.arrival_time, duration);
1180
1181         osn_var->thread.arrival_time = 0;
1182 }
1183
1184 /*
1185  * trace_sched_switch - sched:sched_switch trace event handler
1186  *
1187  * This function is hooked to the sched:sched_switch trace event, and it is
1188  * used to record the beginning and to report the end of a thread noise window.
1189  */
1190 static void
1191 trace_sched_switch_callback(void *data, bool preempt,
1192                             struct task_struct *p,
1193                             struct task_struct *n,
1194                             unsigned int prev_state)
1195 {
1196         struct osnoise_variables *osn_var = this_cpu_osn_var();
1197         int workload = test_bit(OSN_WORKLOAD, &osnoise_options);
1198
1199         if ((p->pid != osn_var->pid) || !workload)
1200                 thread_exit(osn_var, p);
1201
1202         if ((n->pid != osn_var->pid) || !workload)
1203                 thread_entry(osn_var, n);
1204 }
1205
1206 /*
1207  * hook_thread_events - Hook the insturmentation for thread noise
1208  *
1209  * Hook the osnoise tracer callbacks to handle the noise from other
1210  * threads on the necessary kernel events.
1211  */
1212 static int hook_thread_events(void)
1213 {
1214         int ret;
1215
1216         ret = register_trace_sched_switch(trace_sched_switch_callback, NULL);
1217         if (ret)
1218                 return -EINVAL;
1219
1220         return 0;
1221 }
1222
1223 /*
1224  * unhook_thread_events - *nhook the insturmentation for thread noise
1225  *
1226  * Unook the osnoise tracer callbacks to handle the noise from other
1227  * threads on the necessary kernel events.
1228  */
1229 static void unhook_thread_events(void)
1230 {
1231         unregister_trace_sched_switch(trace_sched_switch_callback, NULL);
1232 }
1233
1234 /*
1235  * save_osn_sample_stats - Save the osnoise_sample statistics
1236  *
1237  * Save the osnoise_sample statistics before the sampling phase. These
1238  * values will be used later to compute the diff betwneen the statistics
1239  * before and after the osnoise sampling.
1240  */
1241 static void
1242 save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s)
1243 {
1244         s->nmi_count = osn_var->nmi.count;
1245         s->irq_count = osn_var->irq.count;
1246         s->softirq_count = osn_var->softirq.count;
1247         s->thread_count = osn_var->thread.count;
1248 }
1249
1250 /*
1251  * diff_osn_sample_stats - Compute the osnoise_sample statistics
1252  *
1253  * After a sample period, compute the difference on the osnoise_sample
1254  * statistics. The struct osnoise_sample *s contains the statistics saved via
1255  * save_osn_sample_stats() before the osnoise sampling.
1256  */
1257 static void
1258 diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s)
1259 {
1260         s->nmi_count = osn_var->nmi.count - s->nmi_count;
1261         s->irq_count = osn_var->irq.count - s->irq_count;
1262         s->softirq_count = osn_var->softirq.count - s->softirq_count;
1263         s->thread_count = osn_var->thread.count - s->thread_count;
1264 }
1265
1266 /*
1267  * osnoise_stop_tracing - Stop tracing and the tracer.
1268  */
1269 static __always_inline void osnoise_stop_tracing(void)
1270 {
1271         struct osnoise_instance *inst;
1272         struct trace_array *tr;
1273
1274         rcu_read_lock();
1275         list_for_each_entry_rcu(inst, &osnoise_instances, list) {
1276                 tr = inst->tr;
1277                 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
1278                                 "stop tracing hit on cpu %d\n", smp_processor_id());
1279
1280                 if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options))
1281                         panic("tracer hit stop condition on CPU %d\n", smp_processor_id());
1282
1283                 tracer_tracing_off(tr);
1284         }
1285         rcu_read_unlock();
1286 }
1287
1288 /*
1289  * notify_new_max_latency - Notify a new max latency via fsnotify interface.
1290  */
1291 static void notify_new_max_latency(u64 latency)
1292 {
1293         struct osnoise_instance *inst;
1294         struct trace_array *tr;
1295
1296         rcu_read_lock();
1297         list_for_each_entry_rcu(inst, &osnoise_instances, list) {
1298                 tr = inst->tr;
1299                 if (tracer_tracing_is_on(tr) && tr->max_latency < latency) {
1300                         tr->max_latency = latency;
1301                         latency_fsnotify(tr);
1302                 }
1303         }
1304         rcu_read_unlock();
1305 }
1306
1307 /*
1308  * run_osnoise - Sample the time and look for osnoise
1309  *
1310  * Used to capture the time, looking for potential osnoise latency repeatedly.
1311  * Different from hwlat_detector, it is called with preemption and interrupts
1312  * enabled. This allows irqs, softirqs and threads to run, interfering on the
1313  * osnoise sampling thread, as they would do with a regular thread.
1314  */
1315 static int run_osnoise(void)
1316 {
1317         bool disable_irq = test_bit(OSN_IRQ_DISABLE, &osnoise_options);
1318         struct osnoise_variables *osn_var = this_cpu_osn_var();
1319         u64 start, sample, last_sample;
1320         u64 last_int_count, int_count;
1321         s64 noise = 0, max_noise = 0;
1322         s64 total, last_total = 0;
1323         struct osnoise_sample s;
1324         bool disable_preemption;
1325         unsigned int threshold;
1326         u64 runtime, stop_in;
1327         u64 sum_noise = 0;
1328         int hw_count = 0;
1329         int ret = -1;
1330
1331         /*
1332          * Disabling preemption is only required if IRQs are enabled,
1333          * and the options is set on.
1334          */
1335         disable_preemption = !disable_irq && test_bit(OSN_PREEMPT_DISABLE, &osnoise_options);
1336
1337         /*
1338          * Considers the current thread as the workload.
1339          */
1340         osn_var->pid = current->pid;
1341
1342         /*
1343          * Save the current stats for the diff
1344          */
1345         save_osn_sample_stats(osn_var, &s);
1346
1347         /*
1348          * if threshold is 0, use the default value of 5 us.
1349          */
1350         threshold = tracing_thresh ? : 5000;
1351
1352         /*
1353          * Apply PREEMPT and IRQ disabled options.
1354          */
1355         if (disable_irq)
1356                 local_irq_disable();
1357
1358         if (disable_preemption)
1359                 preempt_disable();
1360
1361         /*
1362          * Make sure NMIs see sampling first
1363          */
1364         osn_var->sampling = true;
1365         barrier();
1366
1367         /*
1368          * Transform the *_us config to nanoseconds to avoid the
1369          * division on the main loop.
1370          */
1371         runtime = osnoise_data.sample_runtime * NSEC_PER_USEC;
1372         stop_in = osnoise_data.stop_tracing * NSEC_PER_USEC;
1373
1374         /*
1375          * Start timestemp
1376          */
1377         start = time_get();
1378
1379         /*
1380          * "previous" loop.
1381          */
1382         last_int_count = set_int_safe_time(osn_var, &last_sample);
1383
1384         do {
1385                 /*
1386                  * Get sample!
1387                  */
1388                 int_count = set_int_safe_time(osn_var, &sample);
1389
1390                 noise = time_sub(sample, last_sample);
1391
1392                 /*
1393                  * This shouldn't happen.
1394                  */
1395                 if (noise < 0) {
1396                         osnoise_taint("negative noise!");
1397                         goto out;
1398                 }
1399
1400                 /*
1401                  * Sample runtime.
1402                  */
1403                 total = time_sub(sample, start);
1404
1405                 /*
1406                  * Check for possible overflows.
1407                  */
1408                 if (total < last_total) {
1409                         osnoise_taint("total overflow!");
1410                         break;
1411                 }
1412
1413                 last_total = total;
1414
1415                 if (noise >= threshold) {
1416                         int interference = int_count - last_int_count;
1417
1418                         if (noise > max_noise)
1419                                 max_noise = noise;
1420
1421                         if (!interference)
1422                                 hw_count++;
1423
1424                         sum_noise += noise;
1425
1426                         trace_sample_threshold(last_sample, noise, interference);
1427
1428                         if (osnoise_data.stop_tracing)
1429                                 if (noise > stop_in)
1430                                         osnoise_stop_tracing();
1431                 }
1432
1433                 /*
1434                  * In some cases, notably when running on a nohz_full CPU with
1435                  * a stopped tick PREEMPT_RCU has no way to account for QSs.
1436                  * This will eventually cause unwarranted noise as PREEMPT_RCU
1437                  * will force preemption as the means of ending the current
1438                  * grace period. We avoid this problem by calling
1439                  * rcu_momentary_dyntick_idle(), which performs a zero duration
1440                  * EQS allowing PREEMPT_RCU to end the current grace period.
1441                  * This call shouldn't be wrapped inside an RCU critical
1442                  * section.
1443                  *
1444                  * Note that in non PREEMPT_RCU kernels QSs are handled through
1445                  * cond_resched()
1446                  */
1447                 if (IS_ENABLED(CONFIG_PREEMPT_RCU)) {
1448                         if (!disable_irq)
1449                                 local_irq_disable();
1450
1451                         rcu_momentary_dyntick_idle();
1452
1453                         if (!disable_irq)
1454                                 local_irq_enable();
1455                 }
1456
1457                 /*
1458                  * For the non-preemptive kernel config: let threads runs, if
1459                  * they so wish, unless set not do to so.
1460                  */
1461                 if (!disable_irq && !disable_preemption)
1462                         cond_resched();
1463
1464                 last_sample = sample;
1465                 last_int_count = int_count;
1466
1467         } while (total < runtime && !kthread_should_stop());
1468
1469         /*
1470          * Finish the above in the view for interrupts.
1471          */
1472         barrier();
1473
1474         osn_var->sampling = false;
1475
1476         /*
1477          * Make sure sampling data is no longer updated.
1478          */
1479         barrier();
1480
1481         /*
1482          * Return to the preemptive state.
1483          */
1484         if (disable_preemption)
1485                 preempt_enable();
1486
1487         if (disable_irq)
1488                 local_irq_enable();
1489
1490         /*
1491          * Save noise info.
1492          */
1493         s.noise = time_to_us(sum_noise);
1494         s.runtime = time_to_us(total);
1495         s.max_sample = time_to_us(max_noise);
1496         s.hw_count = hw_count;
1497
1498         /* Save interference stats info */
1499         diff_osn_sample_stats(osn_var, &s);
1500
1501         trace_osnoise_sample(&s);
1502
1503         notify_new_max_latency(max_noise);
1504
1505         if (osnoise_data.stop_tracing_total)
1506                 if (s.noise > osnoise_data.stop_tracing_total)
1507                         osnoise_stop_tracing();
1508
1509         return 0;
1510 out:
1511         return ret;
1512 }
1513
1514 static struct cpumask osnoise_cpumask;
1515 static struct cpumask save_cpumask;
1516
1517 /*
1518  * osnoise_sleep - sleep until the next period
1519  */
1520 static void osnoise_sleep(void)
1521 {
1522         u64 interval;
1523         ktime_t wake_time;
1524
1525         mutex_lock(&interface_lock);
1526         interval = osnoise_data.sample_period - osnoise_data.sample_runtime;
1527         mutex_unlock(&interface_lock);
1528
1529         /*
1530          * differently from hwlat_detector, the osnoise tracer can run
1531          * without a pause because preemption is on.
1532          */
1533         if (!interval) {
1534                 /* Let synchronize_rcu_tasks() make progress */
1535                 cond_resched_tasks_rcu_qs();
1536                 return;
1537         }
1538
1539         wake_time = ktime_add_us(ktime_get(), interval);
1540         __set_current_state(TASK_INTERRUPTIBLE);
1541
1542         while (schedule_hrtimeout(&wake_time, HRTIMER_MODE_ABS)) {
1543                 if (kthread_should_stop())
1544                         break;
1545         }
1546 }
1547
1548 /*
1549  * osnoise_main - The osnoise detection kernel thread
1550  *
1551  * Calls run_osnoise() function to measure the osnoise for the configured runtime,
1552  * every period.
1553  */
1554 static int osnoise_main(void *data)
1555 {
1556
1557         while (!kthread_should_stop()) {
1558                 run_osnoise();
1559                 osnoise_sleep();
1560         }
1561
1562         return 0;
1563 }
1564
1565 #ifdef CONFIG_TIMERLAT_TRACER
1566 /*
1567  * timerlat_irq - hrtimer handler for timerlat.
1568  */
1569 static enum hrtimer_restart timerlat_irq(struct hrtimer *timer)
1570 {
1571         struct osnoise_variables *osn_var = this_cpu_osn_var();
1572         struct timerlat_variables *tlat;
1573         struct timerlat_sample s;
1574         u64 now;
1575         u64 diff;
1576
1577         /*
1578          * I am not sure if the timer was armed for this CPU. So, get
1579          * the timerlat struct from the timer itself, not from this
1580          * CPU.
1581          */
1582         tlat = container_of(timer, struct timerlat_variables, timer);
1583
1584         now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
1585
1586         /*
1587          * Enable the osnoise: events for thread an softirq.
1588          */
1589         tlat->tracing_thread = true;
1590
1591         osn_var->thread.arrival_time = time_get();
1592
1593         /*
1594          * A hardirq is running: the timer IRQ. It is for sure preempting
1595          * a thread, and potentially preempting a softirq.
1596          *
1597          * At this point, it is not interesting to know the duration of the
1598          * preempted thread (and maybe softirq), but how much time they will
1599          * delay the beginning of the execution of the timer thread.
1600          *
1601          * To get the correct (net) delay added by the softirq, its delta_start
1602          * is set as the IRQ one. In this way, at the return of the IRQ, the delta
1603          * start of the sofitrq will be zeroed, accounting then only the time
1604          * after that.
1605          *
1606          * The thread follows the same principle. However, if a softirq is
1607          * running, the thread needs to receive the softirq delta_start. The
1608          * reason being is that the softirq will be the last to be unfolded,
1609          * resseting the thread delay to zero.
1610          *
1611          * The PREEMPT_RT is a special case, though. As softirqs run as threads
1612          * on RT, moving the thread is enough.
1613          */
1614         if (!IS_ENABLED(CONFIG_PREEMPT_RT) && osn_var->softirq.delta_start) {
1615                 copy_int_safe_time(osn_var, &osn_var->thread.delta_start,
1616                                    &osn_var->softirq.delta_start);
1617
1618                 copy_int_safe_time(osn_var, &osn_var->softirq.delta_start,
1619                                     &osn_var->irq.delta_start);
1620         } else {
1621                 copy_int_safe_time(osn_var, &osn_var->thread.delta_start,
1622                                     &osn_var->irq.delta_start);
1623         }
1624
1625         /*
1626          * Compute the current time with the expected time.
1627          */
1628         diff = now - tlat->abs_period;
1629
1630         tlat->count++;
1631         s.seqnum = tlat->count;
1632         s.timer_latency = diff;
1633         s.context = IRQ_CONTEXT;
1634
1635         trace_timerlat_sample(&s);
1636
1637         if (osnoise_data.stop_tracing) {
1638                 if (time_to_us(diff) >= osnoise_data.stop_tracing) {
1639
1640                         /*
1641                          * At this point, if stop_tracing is set and <= print_stack,
1642                          * print_stack is set and would be printed in the thread handler.
1643                          *
1644                          * Thus, print the stack trace as it is helpful to define the
1645                          * root cause of an IRQ latency.
1646                          */
1647                         if (osnoise_data.stop_tracing <= osnoise_data.print_stack) {
1648                                 timerlat_save_stack(0);
1649                                 timerlat_dump_stack(time_to_us(diff));
1650                         }
1651
1652                         osnoise_stop_tracing();
1653                         notify_new_max_latency(diff);
1654
1655                         wake_up_process(tlat->kthread);
1656
1657                         return HRTIMER_NORESTART;
1658                 }
1659         }
1660
1661         wake_up_process(tlat->kthread);
1662
1663         if (osnoise_data.print_stack)
1664                 timerlat_save_stack(0);
1665
1666         return HRTIMER_NORESTART;
1667 }
1668
1669 /*
1670  * wait_next_period - Wait for the next period for timerlat
1671  */
1672 static int wait_next_period(struct timerlat_variables *tlat)
1673 {
1674         ktime_t next_abs_period, now;
1675         u64 rel_period = osnoise_data.timerlat_period * 1000;
1676
1677         now = hrtimer_cb_get_time(&tlat->timer);
1678         next_abs_period = ns_to_ktime(tlat->abs_period + rel_period);
1679
1680         /*
1681          * Save the next abs_period.
1682          */
1683         tlat->abs_period = (u64) ktime_to_ns(next_abs_period);
1684
1685         /*
1686          * If the new abs_period is in the past, skip the activation.
1687          */
1688         while (ktime_compare(now, next_abs_period) > 0) {
1689                 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period);
1690                 tlat->abs_period = (u64) ktime_to_ns(next_abs_period);
1691         }
1692
1693         set_current_state(TASK_INTERRUPTIBLE);
1694
1695         hrtimer_start(&tlat->timer, next_abs_period, HRTIMER_MODE_ABS_PINNED_HARD);
1696         schedule();
1697         return 1;
1698 }
1699
1700 /*
1701  * timerlat_main- Timerlat main
1702  */
1703 static int timerlat_main(void *data)
1704 {
1705         struct osnoise_variables *osn_var = this_cpu_osn_var();
1706         struct timerlat_variables *tlat = this_cpu_tmr_var();
1707         struct timerlat_sample s;
1708         struct sched_param sp;
1709         u64 now, diff;
1710
1711         /*
1712          * Make the thread RT, that is how cyclictest is usually used.
1713          */
1714         sp.sched_priority = DEFAULT_TIMERLAT_PRIO;
1715         sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
1716
1717         tlat->count = 0;
1718         tlat->tracing_thread = false;
1719
1720         hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
1721         tlat->timer.function = timerlat_irq;
1722         tlat->kthread = current;
1723         osn_var->pid = current->pid;
1724         /*
1725          * Anotate the arrival time.
1726          */
1727         tlat->abs_period = hrtimer_cb_get_time(&tlat->timer);
1728
1729         wait_next_period(tlat);
1730
1731         osn_var->sampling = 1;
1732
1733         while (!kthread_should_stop()) {
1734                 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
1735                 diff = now - tlat->abs_period;
1736
1737                 s.seqnum = tlat->count;
1738                 s.timer_latency = diff;
1739                 s.context = THREAD_CONTEXT;
1740
1741                 trace_timerlat_sample(&s);
1742
1743                 notify_new_max_latency(diff);
1744
1745                 timerlat_dump_stack(time_to_us(diff));
1746
1747                 tlat->tracing_thread = false;
1748                 if (osnoise_data.stop_tracing_total)
1749                         if (time_to_us(diff) >= osnoise_data.stop_tracing_total)
1750                                 osnoise_stop_tracing();
1751
1752                 wait_next_period(tlat);
1753         }
1754
1755         hrtimer_cancel(&tlat->timer);
1756         return 0;
1757 }
1758 #else /* CONFIG_TIMERLAT_TRACER */
1759 static int timerlat_main(void *data)
1760 {
1761         return 0;
1762 }
1763 #endif /* CONFIG_TIMERLAT_TRACER */
1764
1765 /*
1766  * stop_kthread - stop a workload thread
1767  */
1768 static void stop_kthread(unsigned int cpu)
1769 {
1770         struct task_struct *kthread;
1771
1772         kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread;
1773         if (kthread) {
1774                 kthread_stop(kthread);
1775                 per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL;
1776         } else {
1777                 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
1778                         per_cpu(per_cpu_osnoise_var, cpu).sampling = false;
1779                         barrier();
1780                         return;
1781                 }
1782         }
1783 }
1784
1785 /*
1786  * stop_per_cpu_kthread - Stop per-cpu threads
1787  *
1788  * Stop the osnoise sampling htread. Use this on unload and at system
1789  * shutdown.
1790  */
1791 static void stop_per_cpu_kthreads(void)
1792 {
1793         int cpu;
1794
1795         cpus_read_lock();
1796
1797         for_each_online_cpu(cpu)
1798                 stop_kthread(cpu);
1799
1800         cpus_read_unlock();
1801 }
1802
1803 /*
1804  * start_kthread - Start a workload tread
1805  */
1806 static int start_kthread(unsigned int cpu)
1807 {
1808         struct task_struct *kthread;
1809         void *main = osnoise_main;
1810         char comm[24];
1811
1812         if (timerlat_enabled()) {
1813                 snprintf(comm, 24, "timerlat/%d", cpu);
1814                 main = timerlat_main;
1815         } else {
1816                 /* if no workload, just return */
1817                 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
1818                         per_cpu(per_cpu_osnoise_var, cpu).sampling = true;
1819                         barrier();
1820                         return 0;
1821                 }
1822
1823                 snprintf(comm, 24, "osnoise/%d", cpu);
1824         }
1825
1826         kthread = kthread_run_on_cpu(main, NULL, cpu, comm);
1827
1828         if (IS_ERR(kthread)) {
1829                 pr_err(BANNER "could not start sampling thread\n");
1830                 stop_per_cpu_kthreads();
1831                 return -ENOMEM;
1832         }
1833
1834         per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread;
1835
1836         return 0;
1837 }
1838
1839 /*
1840  * start_per_cpu_kthread - Kick off per-cpu osnoise sampling kthreads
1841  *
1842  * This starts the kernel thread that will look for osnoise on many
1843  * cpus.
1844  */
1845 static int start_per_cpu_kthreads(void)
1846 {
1847         struct cpumask *current_mask = &save_cpumask;
1848         int retval = 0;
1849         int cpu;
1850
1851         cpus_read_lock();
1852         /*
1853          * Run only on online CPUs in which osnoise is allowed to run.
1854          */
1855         cpumask_and(current_mask, cpu_online_mask, &osnoise_cpumask);
1856
1857         for_each_possible_cpu(cpu)
1858                 per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL;
1859
1860         for_each_cpu(cpu, current_mask) {
1861                 retval = start_kthread(cpu);
1862                 if (retval) {
1863                         cpus_read_unlock();
1864                         stop_per_cpu_kthreads();
1865                         return retval;
1866                 }
1867         }
1868
1869         cpus_read_unlock();
1870
1871         return retval;
1872 }
1873
1874 #ifdef CONFIG_HOTPLUG_CPU
1875 static void osnoise_hotplug_workfn(struct work_struct *dummy)
1876 {
1877         unsigned int cpu = smp_processor_id();
1878
1879         mutex_lock(&trace_types_lock);
1880
1881         if (!osnoise_has_registered_instances())
1882                 goto out_unlock_trace;
1883
1884         mutex_lock(&interface_lock);
1885         cpus_read_lock();
1886
1887         if (!cpumask_test_cpu(cpu, &osnoise_cpumask))
1888                 goto out_unlock;
1889
1890         start_kthread(cpu);
1891
1892 out_unlock:
1893         cpus_read_unlock();
1894         mutex_unlock(&interface_lock);
1895 out_unlock_trace:
1896         mutex_unlock(&trace_types_lock);
1897 }
1898
1899 static DECLARE_WORK(osnoise_hotplug_work, osnoise_hotplug_workfn);
1900
1901 /*
1902  * osnoise_cpu_init - CPU hotplug online callback function
1903  */
1904 static int osnoise_cpu_init(unsigned int cpu)
1905 {
1906         schedule_work_on(cpu, &osnoise_hotplug_work);
1907         return 0;
1908 }
1909
1910 /*
1911  * osnoise_cpu_die - CPU hotplug offline callback function
1912  */
1913 static int osnoise_cpu_die(unsigned int cpu)
1914 {
1915         stop_kthread(cpu);
1916         return 0;
1917 }
1918
1919 static void osnoise_init_hotplug_support(void)
1920 {
1921         int ret;
1922
1923         ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "trace/osnoise:online",
1924                                 osnoise_cpu_init, osnoise_cpu_die);
1925         if (ret < 0)
1926                 pr_warn(BANNER "Error to init cpu hotplug support\n");
1927
1928         return;
1929 }
1930 #else /* CONFIG_HOTPLUG_CPU */
1931 static void osnoise_init_hotplug_support(void)
1932 {
1933         return;
1934 }
1935 #endif /* CONFIG_HOTPLUG_CPU */
1936
1937 /*
1938  * seq file functions for the osnoise/options file.
1939  */
1940 static void *s_options_start(struct seq_file *s, loff_t *pos)
1941 {
1942         int option = *pos;
1943
1944         mutex_lock(&interface_lock);
1945
1946         if (option >= OSN_MAX)
1947                 return NULL;
1948
1949         return pos;
1950 }
1951
1952 static void *s_options_next(struct seq_file *s, void *v, loff_t *pos)
1953 {
1954         int option = ++(*pos);
1955
1956         if (option >= OSN_MAX)
1957                 return NULL;
1958
1959         return pos;
1960 }
1961
1962 static int s_options_show(struct seq_file *s, void *v)
1963 {
1964         loff_t *pos = v;
1965         int option = *pos;
1966
1967         if (option == OSN_DEFAULTS) {
1968                 if (osnoise_options == OSN_DEFAULT_OPTIONS)
1969                         seq_printf(s, "%s", osnoise_options_str[option]);
1970                 else
1971                         seq_printf(s, "NO_%s", osnoise_options_str[option]);
1972                 goto out;
1973         }
1974
1975         if (test_bit(option, &osnoise_options))
1976                 seq_printf(s, "%s", osnoise_options_str[option]);
1977         else
1978                 seq_printf(s, "NO_%s", osnoise_options_str[option]);
1979
1980 out:
1981         if (option != OSN_MAX)
1982                 seq_puts(s, " ");
1983
1984         return 0;
1985 }
1986
1987 static void s_options_stop(struct seq_file *s, void *v)
1988 {
1989         seq_puts(s, "\n");
1990         mutex_unlock(&interface_lock);
1991 }
1992
1993 static const struct seq_operations osnoise_options_seq_ops = {
1994         .start          = s_options_start,
1995         .next           = s_options_next,
1996         .show           = s_options_show,
1997         .stop           = s_options_stop
1998 };
1999
2000 static int osnoise_options_open(struct inode *inode, struct file *file)
2001 {
2002         return seq_open(file, &osnoise_options_seq_ops);
2003 };
2004
2005 /**
2006  * osnoise_options_write - Write function for "options" entry
2007  * @filp: The active open file structure
2008  * @ubuf: The user buffer that contains the value to write
2009  * @cnt: The maximum number of bytes to write to "file"
2010  * @ppos: The current position in @file
2011  *
2012  * Writing the option name sets the option, writing the "NO_"
2013  * prefix in front of the option name disables it.
2014  *
2015  * Writing "DEFAULTS" resets the option values to the default ones.
2016  */
2017 static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf,
2018                                      size_t cnt, loff_t *ppos)
2019 {
2020         int running, option, enable, retval;
2021         char buf[256], *option_str;
2022
2023         if (cnt >= 256)
2024                 return -EINVAL;
2025
2026         if (copy_from_user(buf, ubuf, cnt))
2027                 return -EFAULT;
2028
2029         buf[cnt] = 0;
2030
2031         if (strncmp(buf, "NO_", 3)) {
2032                 option_str = strstrip(buf);
2033                 enable = true;
2034         } else {
2035                 option_str = strstrip(&buf[3]);
2036                 enable = false;
2037         }
2038
2039         option = match_string(osnoise_options_str, OSN_MAX, option_str);
2040         if (option < 0)
2041                 return -EINVAL;
2042
2043         /*
2044          * trace_types_lock is taken to avoid concurrency on start/stop.
2045          */
2046         mutex_lock(&trace_types_lock);
2047         running = osnoise_has_registered_instances();
2048         if (running)
2049                 stop_per_cpu_kthreads();
2050
2051         mutex_lock(&interface_lock);
2052         /*
2053          * avoid CPU hotplug operations that might read options.
2054          */
2055         cpus_read_lock();
2056
2057         retval = cnt;
2058
2059         if (enable) {
2060                 if (option == OSN_DEFAULTS)
2061                         osnoise_options = OSN_DEFAULT_OPTIONS;
2062                 else
2063                         set_bit(option, &osnoise_options);
2064         } else {
2065                 if (option == OSN_DEFAULTS)
2066                         retval = -EINVAL;
2067                 else
2068                         clear_bit(option, &osnoise_options);
2069         }
2070
2071         cpus_read_unlock();
2072         mutex_unlock(&interface_lock);
2073
2074         if (running)
2075                 start_per_cpu_kthreads();
2076         mutex_unlock(&trace_types_lock);
2077
2078         return retval;
2079 }
2080
2081 /*
2082  * osnoise_cpus_read - Read function for reading the "cpus" file
2083  * @filp: The active open file structure
2084  * @ubuf: The userspace provided buffer to read value into
2085  * @cnt: The maximum number of bytes to read
2086  * @ppos: The current "file" position
2087  *
2088  * Prints the "cpus" output into the user-provided buffer.
2089  */
2090 static ssize_t
2091 osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count,
2092                   loff_t *ppos)
2093 {
2094         char *mask_str;
2095         int len;
2096
2097         mutex_lock(&interface_lock);
2098
2099         len = snprintf(NULL, 0, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)) + 1;
2100         mask_str = kmalloc(len, GFP_KERNEL);
2101         if (!mask_str) {
2102                 count = -ENOMEM;
2103                 goto out_unlock;
2104         }
2105
2106         len = snprintf(mask_str, len, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask));
2107         if (len >= count) {
2108                 count = -EINVAL;
2109                 goto out_free;
2110         }
2111
2112         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
2113
2114 out_free:
2115         kfree(mask_str);
2116 out_unlock:
2117         mutex_unlock(&interface_lock);
2118
2119         return count;
2120 }
2121
2122 /*
2123  * osnoise_cpus_write - Write function for "cpus" entry
2124  * @filp: The active open file structure
2125  * @ubuf: The user buffer that contains the value to write
2126  * @cnt: The maximum number of bytes to write to "file"
2127  * @ppos: The current position in @file
2128  *
2129  * This function provides a write implementation for the "cpus"
2130  * interface to the osnoise trace. By default, it lists all  CPUs,
2131  * in this way, allowing osnoise threads to run on any online CPU
2132  * of the system. It serves to restrict the execution of osnoise to the
2133  * set of CPUs writing via this interface. Why not use "tracing_cpumask"?
2134  * Because the user might be interested in tracing what is running on
2135  * other CPUs. For instance, one might run osnoise in one HT CPU
2136  * while observing what is running on the sibling HT CPU.
2137  */
2138 static ssize_t
2139 osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count,
2140                    loff_t *ppos)
2141 {
2142         cpumask_var_t osnoise_cpumask_new;
2143         int running, err;
2144         char buf[256];
2145
2146         if (count >= 256)
2147                 return -EINVAL;
2148
2149         if (copy_from_user(buf, ubuf, count))
2150                 return -EFAULT;
2151
2152         if (!zalloc_cpumask_var(&osnoise_cpumask_new, GFP_KERNEL))
2153                 return -ENOMEM;
2154
2155         err = cpulist_parse(buf, osnoise_cpumask_new);
2156         if (err)
2157                 goto err_free;
2158
2159         /*
2160          * trace_types_lock is taken to avoid concurrency on start/stop.
2161          */
2162         mutex_lock(&trace_types_lock);
2163         running = osnoise_has_registered_instances();
2164         if (running)
2165                 stop_per_cpu_kthreads();
2166
2167         mutex_lock(&interface_lock);
2168         /*
2169          * osnoise_cpumask is read by CPU hotplug operations.
2170          */
2171         cpus_read_lock();
2172
2173         cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new);
2174
2175         cpus_read_unlock();
2176         mutex_unlock(&interface_lock);
2177
2178         if (running)
2179                 start_per_cpu_kthreads();
2180         mutex_unlock(&trace_types_lock);
2181
2182         free_cpumask_var(osnoise_cpumask_new);
2183         return count;
2184
2185 err_free:
2186         free_cpumask_var(osnoise_cpumask_new);
2187
2188         return err;
2189 }
2190
2191 /*
2192  * osnoise/runtime_us: cannot be greater than the period.
2193  */
2194 static struct trace_min_max_param osnoise_runtime = {
2195         .lock   = &interface_lock,
2196         .val    = &osnoise_data.sample_runtime,
2197         .max    = &osnoise_data.sample_period,
2198         .min    = NULL,
2199 };
2200
2201 /*
2202  * osnoise/period_us: cannot be smaller than the runtime.
2203  */
2204 static struct trace_min_max_param osnoise_period = {
2205         .lock   = &interface_lock,
2206         .val    = &osnoise_data.sample_period,
2207         .max    = NULL,
2208         .min    = &osnoise_data.sample_runtime,
2209 };
2210
2211 /*
2212  * osnoise/stop_tracing_us: no limit.
2213  */
2214 static struct trace_min_max_param osnoise_stop_tracing_in = {
2215         .lock   = &interface_lock,
2216         .val    = &osnoise_data.stop_tracing,
2217         .max    = NULL,
2218         .min    = NULL,
2219 };
2220
2221 /*
2222  * osnoise/stop_tracing_total_us: no limit.
2223  */
2224 static struct trace_min_max_param osnoise_stop_tracing_total = {
2225         .lock   = &interface_lock,
2226         .val    = &osnoise_data.stop_tracing_total,
2227         .max    = NULL,
2228         .min    = NULL,
2229 };
2230
2231 #ifdef CONFIG_TIMERLAT_TRACER
2232 /*
2233  * osnoise/print_stack: print the stacktrace of the IRQ handler if the total
2234  * latency is higher than val.
2235  */
2236 static struct trace_min_max_param osnoise_print_stack = {
2237         .lock   = &interface_lock,
2238         .val    = &osnoise_data.print_stack,
2239         .max    = NULL,
2240         .min    = NULL,
2241 };
2242
2243 /*
2244  * osnoise/timerlat_period: min 100 us, max 1 s
2245  */
2246 static u64 timerlat_min_period = 100;
2247 static u64 timerlat_max_period = 1000000;
2248 static struct trace_min_max_param timerlat_period = {
2249         .lock   = &interface_lock,
2250         .val    = &osnoise_data.timerlat_period,
2251         .max    = &timerlat_max_period,
2252         .min    = &timerlat_min_period,
2253 };
2254 #endif
2255
2256 static const struct file_operations cpus_fops = {
2257         .open           = tracing_open_generic,
2258         .read           = osnoise_cpus_read,
2259         .write          = osnoise_cpus_write,
2260         .llseek         = generic_file_llseek,
2261 };
2262
2263 static const struct file_operations osnoise_options_fops = {
2264         .open           = osnoise_options_open,
2265         .read           = seq_read,
2266         .llseek         = seq_lseek,
2267         .release        = seq_release,
2268         .write          = osnoise_options_write
2269 };
2270
2271 #ifdef CONFIG_TIMERLAT_TRACER
2272 #ifdef CONFIG_STACKTRACE
2273 static int init_timerlat_stack_tracefs(struct dentry *top_dir)
2274 {
2275         struct dentry *tmp;
2276
2277         tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir,
2278                                   &osnoise_print_stack, &trace_min_max_fops);
2279         if (!tmp)
2280                 return -ENOMEM;
2281
2282         return 0;
2283 }
2284 #else /* CONFIG_STACKTRACE */
2285 static int init_timerlat_stack_tracefs(struct dentry *top_dir)
2286 {
2287         return 0;
2288 }
2289 #endif /* CONFIG_STACKTRACE */
2290
2291 /*
2292  * init_timerlat_tracefs - A function to initialize the timerlat interface files
2293  */
2294 static int init_timerlat_tracefs(struct dentry *top_dir)
2295 {
2296         struct dentry *tmp;
2297
2298         tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir,
2299                                   &timerlat_period, &trace_min_max_fops);
2300         if (!tmp)
2301                 return -ENOMEM;
2302
2303         return init_timerlat_stack_tracefs(top_dir);
2304 }
2305 #else /* CONFIG_TIMERLAT_TRACER */
2306 static int init_timerlat_tracefs(struct dentry *top_dir)
2307 {
2308         return 0;
2309 }
2310 #endif /* CONFIG_TIMERLAT_TRACER */
2311
2312 /*
2313  * init_tracefs - A function to initialize the tracefs interface files
2314  *
2315  * This function creates entries in tracefs for "osnoise" and "timerlat".
2316  * It creates these directories in the tracing directory, and within that
2317  * directory the use can change and view the configs.
2318  */
2319 static int init_tracefs(void)
2320 {
2321         struct dentry *top_dir;
2322         struct dentry *tmp;
2323         int ret;
2324
2325         ret = tracing_init_dentry();
2326         if (ret)
2327                 return -ENOMEM;
2328
2329         top_dir = tracefs_create_dir("osnoise", NULL);
2330         if (!top_dir)
2331                 return 0;
2332
2333         tmp = tracefs_create_file("period_us", TRACE_MODE_WRITE, top_dir,
2334                                   &osnoise_period, &trace_min_max_fops);
2335         if (!tmp)
2336                 goto err;
2337
2338         tmp = tracefs_create_file("runtime_us", TRACE_MODE_WRITE, top_dir,
2339                                   &osnoise_runtime, &trace_min_max_fops);
2340         if (!tmp)
2341                 goto err;
2342
2343         tmp = tracefs_create_file("stop_tracing_us", TRACE_MODE_WRITE, top_dir,
2344                                   &osnoise_stop_tracing_in, &trace_min_max_fops);
2345         if (!tmp)
2346                 goto err;
2347
2348         tmp = tracefs_create_file("stop_tracing_total_us", TRACE_MODE_WRITE, top_dir,
2349                                   &osnoise_stop_tracing_total, &trace_min_max_fops);
2350         if (!tmp)
2351                 goto err;
2352
2353         tmp = trace_create_file("cpus", TRACE_MODE_WRITE, top_dir, NULL, &cpus_fops);
2354         if (!tmp)
2355                 goto err;
2356
2357         tmp = trace_create_file("options", TRACE_MODE_WRITE, top_dir, NULL,
2358                                 &osnoise_options_fops);
2359         if (!tmp)
2360                 goto err;
2361
2362         ret = init_timerlat_tracefs(top_dir);
2363         if (ret)
2364                 goto err;
2365
2366         return 0;
2367
2368 err:
2369         tracefs_remove(top_dir);
2370         return -ENOMEM;
2371 }
2372
2373 static int osnoise_hook_events(void)
2374 {
2375         int retval;
2376
2377         /*
2378          * Trace is already hooked, we are re-enabling from
2379          * a stop_tracing_*.
2380          */
2381         if (trace_osnoise_callback_enabled)
2382                 return 0;
2383
2384         retval = hook_irq_events();
2385         if (retval)
2386                 return -EINVAL;
2387
2388         retval = hook_softirq_events();
2389         if (retval)
2390                 goto out_unhook_irq;
2391
2392         retval = hook_thread_events();
2393         /*
2394          * All fine!
2395          */
2396         if (!retval)
2397                 return 0;
2398
2399         unhook_softirq_events();
2400 out_unhook_irq:
2401         unhook_irq_events();
2402         return -EINVAL;
2403 }
2404
2405 static void osnoise_unhook_events(void)
2406 {
2407         unhook_thread_events();
2408         unhook_softirq_events();
2409         unhook_irq_events();
2410 }
2411
2412 /*
2413  * osnoise_workload_start - start the workload and hook to events
2414  */
2415 static int osnoise_workload_start(void)
2416 {
2417         int retval;
2418
2419         /*
2420          * Instances need to be registered after calling workload
2421          * start. Hence, if there is already an instance, the
2422          * workload was already registered. Otherwise, this
2423          * code is on the way to register the first instance,
2424          * and the workload will start.
2425          */
2426         if (osnoise_has_registered_instances())
2427                 return 0;
2428
2429         osn_var_reset_all();
2430
2431         retval = osnoise_hook_events();
2432         if (retval)
2433                 return retval;
2434
2435         /*
2436          * Make sure that ftrace_nmi_enter/exit() see reset values
2437          * before enabling trace_osnoise_callback_enabled.
2438          */
2439         barrier();
2440         trace_osnoise_callback_enabled = true;
2441
2442         retval = start_per_cpu_kthreads();
2443         if (retval) {
2444                 trace_osnoise_callback_enabled = false;
2445                 /*
2446                  * Make sure that ftrace_nmi_enter/exit() see
2447                  * trace_osnoise_callback_enabled as false before continuing.
2448                  */
2449                 barrier();
2450
2451                 osnoise_unhook_events();
2452                 return retval;
2453         }
2454
2455         return 0;
2456 }
2457
2458 /*
2459  * osnoise_workload_stop - stop the workload and unhook the events
2460  */
2461 static void osnoise_workload_stop(void)
2462 {
2463         /*
2464          * Instances need to be unregistered before calling
2465          * stop. Hence, if there is a registered instance, more
2466          * than one instance is running, and the workload will not
2467          * yet stop. Otherwise, this code is on the way to disable
2468          * the last instance, and the workload can stop.
2469          */
2470         if (osnoise_has_registered_instances())
2471                 return;
2472
2473         /*
2474          * If callbacks were already disabled in a previous stop
2475          * call, there is no need to disable then again.
2476          *
2477          * For instance, this happens when tracing is stopped via:
2478          * echo 0 > tracing_on
2479          * echo nop > current_tracer.
2480          */
2481         if (!trace_osnoise_callback_enabled)
2482                 return;
2483
2484         trace_osnoise_callback_enabled = false;
2485         /*
2486          * Make sure that ftrace_nmi_enter/exit() see
2487          * trace_osnoise_callback_enabled as false before continuing.
2488          */
2489         barrier();
2490
2491         stop_per_cpu_kthreads();
2492
2493         osnoise_unhook_events();
2494 }
2495
2496 static void osnoise_tracer_start(struct trace_array *tr)
2497 {
2498         int retval;
2499
2500         /*
2501          * If the instance is already registered, there is no need to
2502          * register it again.
2503          */
2504         if (osnoise_instance_registered(tr))
2505                 return;
2506
2507         retval = osnoise_workload_start();
2508         if (retval)
2509                 pr_err(BANNER "Error starting osnoise tracer\n");
2510
2511         osnoise_register_instance(tr);
2512 }
2513
2514 static void osnoise_tracer_stop(struct trace_array *tr)
2515 {
2516         osnoise_unregister_instance(tr);
2517         osnoise_workload_stop();
2518 }
2519
2520 static int osnoise_tracer_init(struct trace_array *tr)
2521 {
2522         /*
2523          * Only allow osnoise tracer if timerlat tracer is not running
2524          * already.
2525          */
2526         if (timerlat_enabled())
2527                 return -EBUSY;
2528
2529         tr->max_latency = 0;
2530
2531         osnoise_tracer_start(tr);
2532         return 0;
2533 }
2534
2535 static void osnoise_tracer_reset(struct trace_array *tr)
2536 {
2537         osnoise_tracer_stop(tr);
2538 }
2539
2540 static struct tracer osnoise_tracer __read_mostly = {
2541         .name           = "osnoise",
2542         .init           = osnoise_tracer_init,
2543         .reset          = osnoise_tracer_reset,
2544         .start          = osnoise_tracer_start,
2545         .stop           = osnoise_tracer_stop,
2546         .print_header   = print_osnoise_headers,
2547         .allow_instances = true,
2548 };
2549
2550 #ifdef CONFIG_TIMERLAT_TRACER
2551 static void timerlat_tracer_start(struct trace_array *tr)
2552 {
2553         int retval;
2554
2555         /*
2556          * If the instance is already registered, there is no need to
2557          * register it again.
2558          */
2559         if (osnoise_instance_registered(tr))
2560                 return;
2561
2562         retval = osnoise_workload_start();
2563         if (retval)
2564                 pr_err(BANNER "Error starting timerlat tracer\n");
2565
2566         osnoise_register_instance(tr);
2567
2568         return;
2569 }
2570
2571 static void timerlat_tracer_stop(struct trace_array *tr)
2572 {
2573         int cpu;
2574
2575         osnoise_unregister_instance(tr);
2576
2577         /*
2578          * Instruct the threads to stop only if this is the last instance.
2579          */
2580         if (!osnoise_has_registered_instances()) {
2581                 for_each_online_cpu(cpu)
2582                         per_cpu(per_cpu_osnoise_var, cpu).sampling = 0;
2583         }
2584
2585         osnoise_workload_stop();
2586 }
2587
2588 static int timerlat_tracer_init(struct trace_array *tr)
2589 {
2590         /*
2591          * Only allow timerlat tracer if osnoise tracer is not running already.
2592          */
2593         if (osnoise_has_registered_instances() && !osnoise_data.timerlat_tracer)
2594                 return -EBUSY;
2595
2596         /*
2597          * If this is the first instance, set timerlat_tracer to block
2598          * osnoise tracer start.
2599          */
2600         if (!osnoise_has_registered_instances())
2601                 osnoise_data.timerlat_tracer = 1;
2602
2603         tr->max_latency = 0;
2604         timerlat_tracer_start(tr);
2605
2606         return 0;
2607 }
2608
2609 static void timerlat_tracer_reset(struct trace_array *tr)
2610 {
2611         timerlat_tracer_stop(tr);
2612
2613         /*
2614          * If this is the last instance, reset timerlat_tracer allowing
2615          * osnoise to be started.
2616          */
2617         if (!osnoise_has_registered_instances())
2618                 osnoise_data.timerlat_tracer = 0;
2619 }
2620
2621 static struct tracer timerlat_tracer __read_mostly = {
2622         .name           = "timerlat",
2623         .init           = timerlat_tracer_init,
2624         .reset          = timerlat_tracer_reset,
2625         .start          = timerlat_tracer_start,
2626         .stop           = timerlat_tracer_stop,
2627         .print_header   = print_timerlat_headers,
2628         .allow_instances = true,
2629 };
2630
2631 __init static int init_timerlat_tracer(void)
2632 {
2633         return register_tracer(&timerlat_tracer);
2634 }
2635 #else /* CONFIG_TIMERLAT_TRACER */
2636 __init static int init_timerlat_tracer(void)
2637 {
2638         return 0;
2639 }
2640 #endif /* CONFIG_TIMERLAT_TRACER */
2641
2642 __init static int init_osnoise_tracer(void)
2643 {
2644         int ret;
2645
2646         mutex_init(&interface_lock);
2647
2648         cpumask_copy(&osnoise_cpumask, cpu_all_mask);
2649
2650         ret = register_tracer(&osnoise_tracer);
2651         if (ret) {
2652                 pr_err(BANNER "Error registering osnoise!\n");
2653                 return ret;
2654         }
2655
2656         ret = init_timerlat_tracer();
2657         if (ret) {
2658                 pr_err(BANNER "Error registering timerlat!\n");
2659                 return ret;
2660         }
2661
2662         osnoise_init_hotplug_support();
2663
2664         INIT_LIST_HEAD_RCU(&osnoise_instances);
2665
2666         init_tracefs();
2667
2668         return 0;
2669 }
2670 late_initcall(init_osnoise_tracer);