usb: typec: mux: fix static inline syntax error
[platform/kernel/linux-starfive.git] / kernel / trace / trace_osnoise.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * OS Noise Tracer: computes the OS Noise suffered by a running thread.
4  * Timerlat Tracer: measures the wakeup latency of a timer triggered IRQ and thread.
5  *
6  * Based on "hwlat_detector" tracer by:
7  *   Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
8  *   Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com>
9  *   With feedback from Clark Williams <williams@redhat.com>
10  *
11  * And also based on the rtsl tracer presented on:
12  *  DE OLIVEIRA, Daniel Bristot, et al. Demystifying the real-time linux
13  *  scheduling latency. In: 32nd Euromicro Conference on Real-Time Systems
14  *  (ECRTS 2020). Schloss Dagstuhl-Leibniz-Zentrum fur Informatik, 2020.
15  *
16  * Copyright (C) 2021 Daniel Bristot de Oliveira, Red Hat, Inc. <bristot@redhat.com>
17  */
18
19 #include <linux/kthread.h>
20 #include <linux/tracefs.h>
21 #include <linux/uaccess.h>
22 #include <linux/cpumask.h>
23 #include <linux/delay.h>
24 #include <linux/sched/clock.h>
25 #include <uapi/linux/sched/types.h>
26 #include <linux/sched.h>
27 #include "trace.h"
28
29 #ifdef CONFIG_X86_LOCAL_APIC
30 #include <asm/trace/irq_vectors.h>
31 #undef TRACE_INCLUDE_PATH
32 #undef TRACE_INCLUDE_FILE
33 #endif /* CONFIG_X86_LOCAL_APIC */
34
35 #include <trace/events/irq.h>
36 #include <trace/events/sched.h>
37
38 #define CREATE_TRACE_POINTS
39 #include <trace/events/osnoise.h>
40
41 /*
42  * Default values.
43  */
44 #define BANNER                  "osnoise: "
45 #define DEFAULT_SAMPLE_PERIOD   1000000                 /* 1s */
46 #define DEFAULT_SAMPLE_RUNTIME  1000000                 /* 1s */
47
48 #define DEFAULT_TIMERLAT_PERIOD 1000                    /* 1ms */
49 #define DEFAULT_TIMERLAT_PRIO   95                      /* FIFO 95 */
50
51 /*
52  * osnoise/options entries.
53  */
54 enum osnoise_options_index {
55         OSN_DEFAULTS = 0,
56         OSN_WORKLOAD,
57         OSN_PANIC_ON_STOP,
58         OSN_PREEMPT_DISABLE,
59         OSN_IRQ_DISABLE,
60         OSN_MAX
61 };
62
63 static const char * const osnoise_options_str[OSN_MAX] = {
64                                                         "DEFAULTS",
65                                                         "OSNOISE_WORKLOAD",
66                                                         "PANIC_ON_STOP",
67                                                         "OSNOISE_PREEMPT_DISABLE",
68                                                         "OSNOISE_IRQ_DISABLE" };
69
70 #define OSN_DEFAULT_OPTIONS             0x2
71 static unsigned long osnoise_options    = OSN_DEFAULT_OPTIONS;
72
73 /*
74  * trace_array of the enabled osnoise/timerlat instances.
75  */
76 struct osnoise_instance {
77         struct list_head        list;
78         struct trace_array      *tr;
79 };
80
81 static struct list_head osnoise_instances;
82
83 static bool osnoise_has_registered_instances(void)
84 {
85         return !!list_first_or_null_rcu(&osnoise_instances,
86                                         struct osnoise_instance,
87                                         list);
88 }
89
90 /*
91  * osnoise_instance_registered - check if a tr is already registered
92  */
93 static int osnoise_instance_registered(struct trace_array *tr)
94 {
95         struct osnoise_instance *inst;
96         int found = 0;
97
98         rcu_read_lock();
99         list_for_each_entry_rcu(inst, &osnoise_instances, list) {
100                 if (inst->tr == tr)
101                         found = 1;
102         }
103         rcu_read_unlock();
104
105         return found;
106 }
107
108 /*
109  * osnoise_register_instance - register a new trace instance
110  *
111  * Register a trace_array *tr in the list of instances running
112  * osnoise/timerlat tracers.
113  */
114 static int osnoise_register_instance(struct trace_array *tr)
115 {
116         struct osnoise_instance *inst;
117
118         /*
119          * register/unregister serialization is provided by trace's
120          * trace_types_lock.
121          */
122         lockdep_assert_held(&trace_types_lock);
123
124         inst = kmalloc(sizeof(*inst), GFP_KERNEL);
125         if (!inst)
126                 return -ENOMEM;
127
128         INIT_LIST_HEAD_RCU(&inst->list);
129         inst->tr = tr;
130         list_add_tail_rcu(&inst->list, &osnoise_instances);
131
132         return 0;
133 }
134
135 /*
136  *  osnoise_unregister_instance - unregister a registered trace instance
137  *
138  * Remove the trace_array *tr from the list of instances running
139  * osnoise/timerlat tracers.
140  */
141 static void osnoise_unregister_instance(struct trace_array *tr)
142 {
143         struct osnoise_instance *inst;
144         int found = 0;
145
146         /*
147          * register/unregister serialization is provided by trace's
148          * trace_types_lock.
149          */
150         list_for_each_entry_rcu(inst, &osnoise_instances, list,
151                                 lockdep_is_held(&trace_types_lock)) {
152                 if (inst->tr == tr) {
153                         list_del_rcu(&inst->list);
154                         found = 1;
155                         break;
156                 }
157         }
158
159         if (!found)
160                 return;
161
162         kvfree_rcu_mightsleep(inst);
163 }
164
165 /*
166  * NMI runtime info.
167  */
168 struct osn_nmi {
169         u64     count;
170         u64     delta_start;
171 };
172
173 /*
174  * IRQ runtime info.
175  */
176 struct osn_irq {
177         u64     count;
178         u64     arrival_time;
179         u64     delta_start;
180 };
181
182 #define IRQ_CONTEXT     0
183 #define THREAD_CONTEXT  1
184 /*
185  * sofirq runtime info.
186  */
187 struct osn_softirq {
188         u64     count;
189         u64     arrival_time;
190         u64     delta_start;
191 };
192
193 /*
194  * thread runtime info.
195  */
196 struct osn_thread {
197         u64     count;
198         u64     arrival_time;
199         u64     delta_start;
200 };
201
202 /*
203  * Runtime information: this structure saves the runtime information used by
204  * one sampling thread.
205  */
206 struct osnoise_variables {
207         struct task_struct      *kthread;
208         bool                    sampling;
209         pid_t                   pid;
210         struct osn_nmi          nmi;
211         struct osn_irq          irq;
212         struct osn_softirq      softirq;
213         struct osn_thread       thread;
214         local_t                 int_counter;
215 };
216
217 /*
218  * Per-cpu runtime information.
219  */
220 static DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var);
221
222 /*
223  * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU
224  */
225 static inline struct osnoise_variables *this_cpu_osn_var(void)
226 {
227         return this_cpu_ptr(&per_cpu_osnoise_var);
228 }
229
230 #ifdef CONFIG_TIMERLAT_TRACER
231 /*
232  * Runtime information for the timer mode.
233  */
234 struct timerlat_variables {
235         struct task_struct      *kthread;
236         struct hrtimer          timer;
237         u64                     rel_period;
238         u64                     abs_period;
239         bool                    tracing_thread;
240         u64                     count;
241 };
242
243 static DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var);
244
245 /*
246  * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU
247  */
248 static inline struct timerlat_variables *this_cpu_tmr_var(void)
249 {
250         return this_cpu_ptr(&per_cpu_timerlat_var);
251 }
252
253 /*
254  * tlat_var_reset - Reset the values of the given timerlat_variables
255  */
256 static inline void tlat_var_reset(void)
257 {
258         struct timerlat_variables *tlat_var;
259         int cpu;
260         /*
261          * So far, all the values are initialized as 0, so
262          * zeroing the structure is perfect.
263          */
264         for_each_cpu(cpu, cpu_online_mask) {
265                 tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu);
266                 memset(tlat_var, 0, sizeof(*tlat_var));
267         }
268 }
269 #else /* CONFIG_TIMERLAT_TRACER */
270 #define tlat_var_reset()        do {} while (0)
271 #endif /* CONFIG_TIMERLAT_TRACER */
272
273 /*
274  * osn_var_reset - Reset the values of the given osnoise_variables
275  */
276 static inline void osn_var_reset(void)
277 {
278         struct osnoise_variables *osn_var;
279         int cpu;
280
281         /*
282          * So far, all the values are initialized as 0, so
283          * zeroing the structure is perfect.
284          */
285         for_each_cpu(cpu, cpu_online_mask) {
286                 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
287                 memset(osn_var, 0, sizeof(*osn_var));
288         }
289 }
290
291 /*
292  * osn_var_reset_all - Reset the value of all per-cpu osnoise_variables
293  */
294 static inline void osn_var_reset_all(void)
295 {
296         osn_var_reset();
297         tlat_var_reset();
298 }
299
300 /*
301  * Tells NMIs to call back to the osnoise tracer to record timestamps.
302  */
303 bool trace_osnoise_callback_enabled;
304
305 /*
306  * osnoise sample structure definition. Used to store the statistics of a
307  * sample run.
308  */
309 struct osnoise_sample {
310         u64                     runtime;        /* runtime */
311         u64                     noise;          /* noise */
312         u64                     max_sample;     /* max single noise sample */
313         int                     hw_count;       /* # HW (incl. hypervisor) interference */
314         int                     nmi_count;      /* # NMIs during this sample */
315         int                     irq_count;      /* # IRQs during this sample */
316         int                     softirq_count;  /* # softirqs during this sample */
317         int                     thread_count;   /* # threads during this sample */
318 };
319
320 #ifdef CONFIG_TIMERLAT_TRACER
321 /*
322  * timerlat sample structure definition. Used to store the statistics of
323  * a sample run.
324  */
325 struct timerlat_sample {
326         u64                     timer_latency;  /* timer_latency */
327         unsigned int            seqnum;         /* unique sequence */
328         int                     context;        /* timer context */
329 };
330 #endif
331
332 /*
333  * Protect the interface.
334  */
335 static struct mutex interface_lock;
336
337 /*
338  * Tracer data.
339  */
340 static struct osnoise_data {
341         u64     sample_period;          /* total sampling period */
342         u64     sample_runtime;         /* active sampling portion of period */
343         u64     stop_tracing;           /* stop trace in the internal operation (loop/irq) */
344         u64     stop_tracing_total;     /* stop trace in the final operation (report/thread) */
345 #ifdef CONFIG_TIMERLAT_TRACER
346         u64     timerlat_period;        /* timerlat period */
347         u64     print_stack;            /* print IRQ stack if total > */
348         int     timerlat_tracer;        /* timerlat tracer */
349 #endif
350         bool    tainted;                /* infor users and developers about a problem */
351 } osnoise_data = {
352         .sample_period                  = DEFAULT_SAMPLE_PERIOD,
353         .sample_runtime                 = DEFAULT_SAMPLE_RUNTIME,
354         .stop_tracing                   = 0,
355         .stop_tracing_total             = 0,
356 #ifdef CONFIG_TIMERLAT_TRACER
357         .print_stack                    = 0,
358         .timerlat_period                = DEFAULT_TIMERLAT_PERIOD,
359         .timerlat_tracer                = 0,
360 #endif
361 };
362
363 #ifdef CONFIG_TIMERLAT_TRACER
364 static inline bool timerlat_enabled(void)
365 {
366         return osnoise_data.timerlat_tracer;
367 }
368
369 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var)
370 {
371         struct timerlat_variables *tlat_var = this_cpu_tmr_var();
372         /*
373          * If the timerlat is enabled, but the irq handler did
374          * not run yet enabling timerlat_tracer, do not trace.
375          */
376         if (!tlat_var->tracing_thread) {
377                 osn_var->softirq.arrival_time = 0;
378                 osn_var->softirq.delta_start = 0;
379                 return 0;
380         }
381         return 1;
382 }
383
384 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var)
385 {
386         struct timerlat_variables *tlat_var = this_cpu_tmr_var();
387         /*
388          * If the timerlat is enabled, but the irq handler did
389          * not run yet enabling timerlat_tracer, do not trace.
390          */
391         if (!tlat_var->tracing_thread) {
392                 osn_var->thread.delta_start = 0;
393                 osn_var->thread.arrival_time = 0;
394                 return 0;
395         }
396         return 1;
397 }
398 #else /* CONFIG_TIMERLAT_TRACER */
399 static inline bool timerlat_enabled(void)
400 {
401         return false;
402 }
403
404 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var)
405 {
406         return 1;
407 }
408 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var)
409 {
410         return 1;
411 }
412 #endif
413
414 #ifdef CONFIG_PREEMPT_RT
415 /*
416  * Print the osnoise header info.
417  */
418 static void print_osnoise_headers(struct seq_file *s)
419 {
420         if (osnoise_data.tainted)
421                 seq_puts(s, "# osnoise is tainted!\n");
422
423         seq_puts(s, "#                                _-------=> irqs-off\n");
424         seq_puts(s, "#                               / _------=> need-resched\n");
425         seq_puts(s, "#                              | / _-----=> need-resched-lazy\n");
426         seq_puts(s, "#                              || / _----=> hardirq/softirq\n");
427         seq_puts(s, "#                              ||| / _---=> preempt-depth\n");
428         seq_puts(s, "#                              |||| / _--=> preempt-lazy-depth\n");
429         seq_puts(s, "#                              ||||| / _-=> migrate-disable\n");
430
431         seq_puts(s, "#                              |||||| /          ");
432         seq_puts(s, "                                     MAX\n");
433
434         seq_puts(s, "#                              ||||| /                         ");
435         seq_puts(s, "                    SINGLE      Interference counters:\n");
436
437         seq_puts(s, "#                              |||||||               RUNTIME   ");
438         seq_puts(s, "   NOISE  %% OF CPU  NOISE    +-----------------------------+\n");
439
440         seq_puts(s, "#           TASK-PID      CPU# |||||||   TIMESTAMP    IN US    ");
441         seq_puts(s, "   IN US  AVAILABLE  IN US     HW    NMI    IRQ   SIRQ THREAD\n");
442
443         seq_puts(s, "#              | |         |   |||||||      |           |      ");
444         seq_puts(s, "       |    |            |      |      |      |      |      |\n");
445 }
446 #else /* CONFIG_PREEMPT_RT */
447 static void print_osnoise_headers(struct seq_file *s)
448 {
449         if (osnoise_data.tainted)
450                 seq_puts(s, "# osnoise is tainted!\n");
451
452         seq_puts(s, "#                                _-----=> irqs-off\n");
453         seq_puts(s, "#                               / _----=> need-resched\n");
454         seq_puts(s, "#                              | / _---=> hardirq/softirq\n");
455         seq_puts(s, "#                              || / _--=> preempt-depth\n");
456         seq_puts(s, "#                              ||| / _-=> migrate-disable     ");
457         seq_puts(s, "                    MAX\n");
458         seq_puts(s, "#                              |||| /     delay               ");
459         seq_puts(s, "                    SINGLE      Interference counters:\n");
460
461         seq_puts(s, "#                              |||||               RUNTIME   ");
462         seq_puts(s, "   NOISE  %% OF CPU  NOISE    +-----------------------------+\n");
463
464         seq_puts(s, "#           TASK-PID      CPU# |||||   TIMESTAMP    IN US    ");
465         seq_puts(s, "   IN US  AVAILABLE  IN US     HW    NMI    IRQ   SIRQ THREAD\n");
466
467         seq_puts(s, "#              | |         |   |||||      |           |      ");
468         seq_puts(s, "       |    |            |      |      |      |      |      |\n");
469 }
470 #endif /* CONFIG_PREEMPT_RT */
471
472 /*
473  * osnoise_taint - report an osnoise error.
474  */
475 #define osnoise_taint(msg) ({                                                   \
476         struct osnoise_instance *inst;                                          \
477         struct trace_buffer *buffer;                                            \
478                                                                                 \
479         rcu_read_lock();                                                        \
480         list_for_each_entry_rcu(inst, &osnoise_instances, list) {               \
481                 buffer = inst->tr->array_buffer.buffer;                         \
482                 trace_array_printk_buf(buffer, _THIS_IP_, msg);                 \
483         }                                                                       \
484         rcu_read_unlock();                                                      \
485         osnoise_data.tainted = true;                                            \
486 })
487
488 /*
489  * Record an osnoise_sample into the tracer buffer.
490  */
491 static void
492 __trace_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer)
493 {
494         struct trace_event_call *call = &event_osnoise;
495         struct ring_buffer_event *event;
496         struct osnoise_entry *entry;
497
498         event = trace_buffer_lock_reserve(buffer, TRACE_OSNOISE, sizeof(*entry),
499                                           tracing_gen_ctx());
500         if (!event)
501                 return;
502         entry   = ring_buffer_event_data(event);
503         entry->runtime          = sample->runtime;
504         entry->noise            = sample->noise;
505         entry->max_sample       = sample->max_sample;
506         entry->hw_count         = sample->hw_count;
507         entry->nmi_count        = sample->nmi_count;
508         entry->irq_count        = sample->irq_count;
509         entry->softirq_count    = sample->softirq_count;
510         entry->thread_count     = sample->thread_count;
511
512         if (!call_filter_check_discard(call, entry, buffer, event))
513                 trace_buffer_unlock_commit_nostack(buffer, event);
514 }
515
516 /*
517  * Record an osnoise_sample on all osnoise instances.
518  */
519 static void trace_osnoise_sample(struct osnoise_sample *sample)
520 {
521         struct osnoise_instance *inst;
522         struct trace_buffer *buffer;
523
524         rcu_read_lock();
525         list_for_each_entry_rcu(inst, &osnoise_instances, list) {
526                 buffer = inst->tr->array_buffer.buffer;
527                 __trace_osnoise_sample(sample, buffer);
528         }
529         rcu_read_unlock();
530 }
531
532 #ifdef CONFIG_TIMERLAT_TRACER
533 /*
534  * Print the timerlat header info.
535  */
536 #ifdef CONFIG_PREEMPT_RT
537 static void print_timerlat_headers(struct seq_file *s)
538 {
539         seq_puts(s, "#                                _-------=> irqs-off\n");
540         seq_puts(s, "#                               / _------=> need-resched\n");
541         seq_puts(s, "#                              | / _-----=> need-resched-lazy\n");
542         seq_puts(s, "#                              || / _----=> hardirq/softirq\n");
543         seq_puts(s, "#                              ||| / _---=> preempt-depth\n");
544         seq_puts(s, "#                              |||| / _--=> preempt-lazy-depth\n");
545         seq_puts(s, "#                              ||||| / _-=> migrate-disable\n");
546         seq_puts(s, "#                              |||||| /\n");
547         seq_puts(s, "#                              |||||||             ACTIVATION\n");
548         seq_puts(s, "#           TASK-PID      CPU# |||||||   TIMESTAMP    ID     ");
549         seq_puts(s, "       CONTEXT                LATENCY\n");
550         seq_puts(s, "#              | |         |   |||||||      |         |      ");
551         seq_puts(s, "            |                       |\n");
552 }
553 #else /* CONFIG_PREEMPT_RT */
554 static void print_timerlat_headers(struct seq_file *s)
555 {
556         seq_puts(s, "#                                _-----=> irqs-off\n");
557         seq_puts(s, "#                               / _----=> need-resched\n");
558         seq_puts(s, "#                              | / _---=> hardirq/softirq\n");
559         seq_puts(s, "#                              || / _--=> preempt-depth\n");
560         seq_puts(s, "#                              ||| / _-=> migrate-disable\n");
561         seq_puts(s, "#                              |||| /     delay\n");
562         seq_puts(s, "#                              |||||            ACTIVATION\n");
563         seq_puts(s, "#           TASK-PID      CPU# |||||   TIMESTAMP   ID      ");
564         seq_puts(s, "      CONTEXT                 LATENCY\n");
565         seq_puts(s, "#              | |         |   |||||      |         |      ");
566         seq_puts(s, "            |                       |\n");
567 }
568 #endif /* CONFIG_PREEMPT_RT */
569
570 static void
571 __trace_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer)
572 {
573         struct trace_event_call *call = &event_osnoise;
574         struct ring_buffer_event *event;
575         struct timerlat_entry *entry;
576
577         event = trace_buffer_lock_reserve(buffer, TRACE_TIMERLAT, sizeof(*entry),
578                                           tracing_gen_ctx());
579         if (!event)
580                 return;
581         entry   = ring_buffer_event_data(event);
582         entry->seqnum                   = sample->seqnum;
583         entry->context                  = sample->context;
584         entry->timer_latency            = sample->timer_latency;
585
586         if (!call_filter_check_discard(call, entry, buffer, event))
587                 trace_buffer_unlock_commit_nostack(buffer, event);
588 }
589
590 /*
591  * Record an timerlat_sample into the tracer buffer.
592  */
593 static void trace_timerlat_sample(struct timerlat_sample *sample)
594 {
595         struct osnoise_instance *inst;
596         struct trace_buffer *buffer;
597
598         rcu_read_lock();
599         list_for_each_entry_rcu(inst, &osnoise_instances, list) {
600                 buffer = inst->tr->array_buffer.buffer;
601                 __trace_timerlat_sample(sample, buffer);
602         }
603         rcu_read_unlock();
604 }
605
606 #ifdef CONFIG_STACKTRACE
607
608 #define MAX_CALLS       256
609
610 /*
611  * Stack trace will take place only at IRQ level, so, no need
612  * to control nesting here.
613  */
614 struct trace_stack {
615         int             stack_size;
616         int             nr_entries;
617         unsigned long   calls[MAX_CALLS];
618 };
619
620 static DEFINE_PER_CPU(struct trace_stack, trace_stack);
621
622 /*
623  * timerlat_save_stack - save a stack trace without printing
624  *
625  * Save the current stack trace without printing. The
626  * stack will be printed later, after the end of the measurement.
627  */
628 static void timerlat_save_stack(int skip)
629 {
630         unsigned int size, nr_entries;
631         struct trace_stack *fstack;
632
633         fstack = this_cpu_ptr(&trace_stack);
634
635         size = ARRAY_SIZE(fstack->calls);
636
637         nr_entries = stack_trace_save(fstack->calls, size, skip);
638
639         fstack->stack_size = nr_entries * sizeof(unsigned long);
640         fstack->nr_entries = nr_entries;
641
642         return;
643
644 }
645
646 static void
647 __timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, unsigned int size)
648 {
649         struct trace_event_call *call = &event_osnoise;
650         struct ring_buffer_event *event;
651         struct stack_entry *entry;
652
653         event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry) + size,
654                                           tracing_gen_ctx());
655         if (!event)
656                 return;
657
658         entry = ring_buffer_event_data(event);
659
660         memcpy(&entry->caller, fstack->calls, size);
661         entry->size = fstack->nr_entries;
662
663         if (!call_filter_check_discard(call, entry, buffer, event))
664                 trace_buffer_unlock_commit_nostack(buffer, event);
665 }
666
667 /*
668  * timerlat_dump_stack - dump a stack trace previously saved
669  */
670 static void timerlat_dump_stack(u64 latency)
671 {
672         struct osnoise_instance *inst;
673         struct trace_buffer *buffer;
674         struct trace_stack *fstack;
675         unsigned int size;
676
677         /*
678          * trace only if latency > print_stack config, if enabled.
679          */
680         if (!osnoise_data.print_stack || osnoise_data.print_stack > latency)
681                 return;
682
683         preempt_disable_notrace();
684         fstack = this_cpu_ptr(&trace_stack);
685         size = fstack->stack_size;
686
687         rcu_read_lock();
688         list_for_each_entry_rcu(inst, &osnoise_instances, list) {
689                 buffer = inst->tr->array_buffer.buffer;
690                 __timerlat_dump_stack(buffer, fstack, size);
691
692         }
693         rcu_read_unlock();
694         preempt_enable_notrace();
695 }
696 #else /* CONFIG_STACKTRACE */
697 #define timerlat_dump_stack(u64 latency) do {} while (0)
698 #define timerlat_save_stack(a) do {} while (0)
699 #endif /* CONFIG_STACKTRACE */
700 #endif /* CONFIG_TIMERLAT_TRACER */
701
702 /*
703  * Macros to encapsulate the time capturing infrastructure.
704  */
705 #define time_get()      trace_clock_local()
706 #define time_to_us(x)   div_u64(x, 1000)
707 #define time_sub(a, b)  ((a) - (b))
708
709 /*
710  * cond_move_irq_delta_start - Forward the delta_start of a running IRQ
711  *
712  * If an IRQ is preempted by an NMI, its delta_start is pushed forward
713  * to discount the NMI interference.
714  *
715  * See get_int_safe_duration().
716  */
717 static inline void
718 cond_move_irq_delta_start(struct osnoise_variables *osn_var, u64 duration)
719 {
720         if (osn_var->irq.delta_start)
721                 osn_var->irq.delta_start += duration;
722 }
723
724 #ifndef CONFIG_PREEMPT_RT
725 /*
726  * cond_move_softirq_delta_start - Forward the delta_start of a running softirq.
727  *
728  * If a softirq is preempted by an IRQ or NMI, its delta_start is pushed
729  * forward to discount the interference.
730  *
731  * See get_int_safe_duration().
732  */
733 static inline void
734 cond_move_softirq_delta_start(struct osnoise_variables *osn_var, u64 duration)
735 {
736         if (osn_var->softirq.delta_start)
737                 osn_var->softirq.delta_start += duration;
738 }
739 #else /* CONFIG_PREEMPT_RT */
740 #define cond_move_softirq_delta_start(osn_var, duration) do {} while (0)
741 #endif
742
743 /*
744  * cond_move_thread_delta_start - Forward the delta_start of a running thread
745  *
746  * If a noisy thread is preempted by an softirq, IRQ or NMI, its delta_start
747  * is pushed forward to discount the interference.
748  *
749  * See get_int_safe_duration().
750  */
751 static inline void
752 cond_move_thread_delta_start(struct osnoise_variables *osn_var, u64 duration)
753 {
754         if (osn_var->thread.delta_start)
755                 osn_var->thread.delta_start += duration;
756 }
757
758 /*
759  * get_int_safe_duration - Get the duration of a window
760  *
761  * The irq, softirq and thread varaibles need to have its duration without
762  * the interference from higher priority interrupts. Instead of keeping a
763  * variable to discount the interrupt interference from these variables, the
764  * starting time of these variables are pushed forward with the interrupt's
765  * duration. In this way, a single variable is used to:
766  *
767  *   - Know if a given window is being measured.
768  *   - Account its duration.
769  *   - Discount the interference.
770  *
771  * To avoid getting inconsistent values, e.g.,:
772  *
773  *      now = time_get()
774  *              --->    interrupt!
775  *                      delta_start -= int duration;
776  *              <---
777  *      duration = now - delta_start;
778  *
779  *      result: negative duration if the variable duration before the
780  *      interrupt was smaller than the interrupt execution.
781  *
782  * A counter of interrupts is used. If the counter increased, try
783  * to capture an interference safe duration.
784  */
785 static inline s64
786 get_int_safe_duration(struct osnoise_variables *osn_var, u64 *delta_start)
787 {
788         u64 int_counter, now;
789         s64 duration;
790
791         do {
792                 int_counter = local_read(&osn_var->int_counter);
793                 /* synchronize with interrupts */
794                 barrier();
795
796                 now = time_get();
797                 duration = (now - *delta_start);
798
799                 /* synchronize with interrupts */
800                 barrier();
801         } while (int_counter != local_read(&osn_var->int_counter));
802
803         /*
804          * This is an evidence of race conditions that cause
805          * a value to be "discounted" too much.
806          */
807         if (duration < 0)
808                 osnoise_taint("Negative duration!\n");
809
810         *delta_start = 0;
811
812         return duration;
813 }
814
815 /*
816  *
817  * set_int_safe_time - Save the current time on *time, aware of interference
818  *
819  * Get the time, taking into consideration a possible interference from
820  * higher priority interrupts.
821  *
822  * See get_int_safe_duration() for an explanation.
823  */
824 static u64
825 set_int_safe_time(struct osnoise_variables *osn_var, u64 *time)
826 {
827         u64 int_counter;
828
829         do {
830                 int_counter = local_read(&osn_var->int_counter);
831                 /* synchronize with interrupts */
832                 barrier();
833
834                 *time = time_get();
835
836                 /* synchronize with interrupts */
837                 barrier();
838         } while (int_counter != local_read(&osn_var->int_counter));
839
840         return int_counter;
841 }
842
843 #ifdef CONFIG_TIMERLAT_TRACER
844 /*
845  * copy_int_safe_time - Copy *src into *desc aware of interference
846  */
847 static u64
848 copy_int_safe_time(struct osnoise_variables *osn_var, u64 *dst, u64 *src)
849 {
850         u64 int_counter;
851
852         do {
853                 int_counter = local_read(&osn_var->int_counter);
854                 /* synchronize with interrupts */
855                 barrier();
856
857                 *dst = *src;
858
859                 /* synchronize with interrupts */
860                 barrier();
861         } while (int_counter != local_read(&osn_var->int_counter));
862
863         return int_counter;
864 }
865 #endif /* CONFIG_TIMERLAT_TRACER */
866
867 /*
868  * trace_osnoise_callback - NMI entry/exit callback
869  *
870  * This function is called at the entry and exit NMI code. The bool enter
871  * distinguishes between either case. This function is used to note a NMI
872  * occurrence, compute the noise caused by the NMI, and to remove the noise
873  * it is potentially causing on other interference variables.
874  */
875 void trace_osnoise_callback(bool enter)
876 {
877         struct osnoise_variables *osn_var = this_cpu_osn_var();
878         u64 duration;
879
880         if (!osn_var->sampling)
881                 return;
882
883         /*
884          * Currently trace_clock_local() calls sched_clock() and the
885          * generic version is not NMI safe.
886          */
887         if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) {
888                 if (enter) {
889                         osn_var->nmi.delta_start = time_get();
890                         local_inc(&osn_var->int_counter);
891                 } else {
892                         duration = time_get() - osn_var->nmi.delta_start;
893
894                         trace_nmi_noise(osn_var->nmi.delta_start, duration);
895
896                         cond_move_irq_delta_start(osn_var, duration);
897                         cond_move_softirq_delta_start(osn_var, duration);
898                         cond_move_thread_delta_start(osn_var, duration);
899                 }
900         }
901
902         if (enter)
903                 osn_var->nmi.count++;
904 }
905
906 /*
907  * osnoise_trace_irq_entry - Note the starting of an IRQ
908  *
909  * Save the starting time of an IRQ. As IRQs are non-preemptive to other IRQs,
910  * it is safe to use a single variable (ons_var->irq) to save the statistics.
911  * The arrival_time is used to report... the arrival time. The delta_start
912  * is used to compute the duration at the IRQ exit handler. See
913  * cond_move_irq_delta_start().
914  */
915 void osnoise_trace_irq_entry(int id)
916 {
917         struct osnoise_variables *osn_var = this_cpu_osn_var();
918
919         if (!osn_var->sampling)
920                 return;
921         /*
922          * This value will be used in the report, but not to compute
923          * the execution time, so it is safe to get it unsafe.
924          */
925         osn_var->irq.arrival_time = time_get();
926         set_int_safe_time(osn_var, &osn_var->irq.delta_start);
927         osn_var->irq.count++;
928
929         local_inc(&osn_var->int_counter);
930 }
931
932 /*
933  * osnoise_irq_exit - Note the end of an IRQ, sava data and trace
934  *
935  * Computes the duration of the IRQ noise, and trace it. Also discounts the
936  * interference from other sources of noise could be currently being accounted.
937  */
938 void osnoise_trace_irq_exit(int id, const char *desc)
939 {
940         struct osnoise_variables *osn_var = this_cpu_osn_var();
941         s64 duration;
942
943         if (!osn_var->sampling)
944                 return;
945
946         duration = get_int_safe_duration(osn_var, &osn_var->irq.delta_start);
947         trace_irq_noise(id, desc, osn_var->irq.arrival_time, duration);
948         osn_var->irq.arrival_time = 0;
949         cond_move_softirq_delta_start(osn_var, duration);
950         cond_move_thread_delta_start(osn_var, duration);
951 }
952
953 /*
954  * trace_irqentry_callback - Callback to the irq:irq_entry traceevent
955  *
956  * Used to note the starting of an IRQ occurece.
957  */
958 static void trace_irqentry_callback(void *data, int irq,
959                                     struct irqaction *action)
960 {
961         osnoise_trace_irq_entry(irq);
962 }
963
964 /*
965  * trace_irqexit_callback - Callback to the irq:irq_exit traceevent
966  *
967  * Used to note the end of an IRQ occurece.
968  */
969 static void trace_irqexit_callback(void *data, int irq,
970                                    struct irqaction *action, int ret)
971 {
972         osnoise_trace_irq_exit(irq, action->name);
973 }
974
975 /*
976  * arch specific register function.
977  */
978 int __weak osnoise_arch_register(void)
979 {
980         return 0;
981 }
982
983 /*
984  * arch specific unregister function.
985  */
986 void __weak osnoise_arch_unregister(void)
987 {
988         return;
989 }
990
991 /*
992  * hook_irq_events - Hook IRQ handling events
993  *
994  * This function hooks the IRQ related callbacks to the respective trace
995  * events.
996  */
997 static int hook_irq_events(void)
998 {
999         int ret;
1000
1001         ret = register_trace_irq_handler_entry(trace_irqentry_callback, NULL);
1002         if (ret)
1003                 goto out_err;
1004
1005         ret = register_trace_irq_handler_exit(trace_irqexit_callback, NULL);
1006         if (ret)
1007                 goto out_unregister_entry;
1008
1009         ret = osnoise_arch_register();
1010         if (ret)
1011                 goto out_irq_exit;
1012
1013         return 0;
1014
1015 out_irq_exit:
1016         unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL);
1017 out_unregister_entry:
1018         unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL);
1019 out_err:
1020         return -EINVAL;
1021 }
1022
1023 /*
1024  * unhook_irq_events - Unhook IRQ handling events
1025  *
1026  * This function unhooks the IRQ related callbacks to the respective trace
1027  * events.
1028  */
1029 static void unhook_irq_events(void)
1030 {
1031         osnoise_arch_unregister();
1032         unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL);
1033         unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL);
1034 }
1035
1036 #ifndef CONFIG_PREEMPT_RT
1037 /*
1038  * trace_softirq_entry_callback - Note the starting of a softirq
1039  *
1040  * Save the starting time of a softirq. As softirqs are non-preemptive to
1041  * other softirqs, it is safe to use a single variable (ons_var->softirq)
1042  * to save the statistics. The arrival_time is used to report... the
1043  * arrival time. The delta_start is used to compute the duration at the
1044  * softirq exit handler. See cond_move_softirq_delta_start().
1045  */
1046 static void trace_softirq_entry_callback(void *data, unsigned int vec_nr)
1047 {
1048         struct osnoise_variables *osn_var = this_cpu_osn_var();
1049
1050         if (!osn_var->sampling)
1051                 return;
1052         /*
1053          * This value will be used in the report, but not to compute
1054          * the execution time, so it is safe to get it unsafe.
1055          */
1056         osn_var->softirq.arrival_time = time_get();
1057         set_int_safe_time(osn_var, &osn_var->softirq.delta_start);
1058         osn_var->softirq.count++;
1059
1060         local_inc(&osn_var->int_counter);
1061 }
1062
1063 /*
1064  * trace_softirq_exit_callback - Note the end of an softirq
1065  *
1066  * Computes the duration of the softirq noise, and trace it. Also discounts the
1067  * interference from other sources of noise could be currently being accounted.
1068  */
1069 static void trace_softirq_exit_callback(void *data, unsigned int vec_nr)
1070 {
1071         struct osnoise_variables *osn_var = this_cpu_osn_var();
1072         s64 duration;
1073
1074         if (!osn_var->sampling)
1075                 return;
1076
1077         if (unlikely(timerlat_enabled()))
1078                 if (!timerlat_softirq_exit(osn_var))
1079                         return;
1080
1081         duration = get_int_safe_duration(osn_var, &osn_var->softirq.delta_start);
1082         trace_softirq_noise(vec_nr, osn_var->softirq.arrival_time, duration);
1083         cond_move_thread_delta_start(osn_var, duration);
1084         osn_var->softirq.arrival_time = 0;
1085 }
1086
1087 /*
1088  * hook_softirq_events - Hook softirq handling events
1089  *
1090  * This function hooks the softirq related callbacks to the respective trace
1091  * events.
1092  */
1093 static int hook_softirq_events(void)
1094 {
1095         int ret;
1096
1097         ret = register_trace_softirq_entry(trace_softirq_entry_callback, NULL);
1098         if (ret)
1099                 goto out_err;
1100
1101         ret = register_trace_softirq_exit(trace_softirq_exit_callback, NULL);
1102         if (ret)
1103                 goto out_unreg_entry;
1104
1105         return 0;
1106
1107 out_unreg_entry:
1108         unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL);
1109 out_err:
1110         return -EINVAL;
1111 }
1112
1113 /*
1114  * unhook_softirq_events - Unhook softirq handling events
1115  *
1116  * This function hooks the softirq related callbacks to the respective trace
1117  * events.
1118  */
1119 static void unhook_softirq_events(void)
1120 {
1121         unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL);
1122         unregister_trace_softirq_exit(trace_softirq_exit_callback, NULL);
1123 }
1124 #else /* CONFIG_PREEMPT_RT */
1125 /*
1126  * softirq are threads on the PREEMPT_RT mode.
1127  */
1128 static int hook_softirq_events(void)
1129 {
1130         return 0;
1131 }
1132 static void unhook_softirq_events(void)
1133 {
1134 }
1135 #endif
1136
1137 /*
1138  * thread_entry - Record the starting of a thread noise window
1139  *
1140  * It saves the context switch time for a noisy thread, and increments
1141  * the interference counters.
1142  */
1143 static void
1144 thread_entry(struct osnoise_variables *osn_var, struct task_struct *t)
1145 {
1146         if (!osn_var->sampling)
1147                 return;
1148         /*
1149          * The arrival time will be used in the report, but not to compute
1150          * the execution time, so it is safe to get it unsafe.
1151          */
1152         osn_var->thread.arrival_time = time_get();
1153
1154         set_int_safe_time(osn_var, &osn_var->thread.delta_start);
1155
1156         osn_var->thread.count++;
1157         local_inc(&osn_var->int_counter);
1158 }
1159
1160 /*
1161  * thread_exit - Report the end of a thread noise window
1162  *
1163  * It computes the total noise from a thread, tracing if needed.
1164  */
1165 static void
1166 thread_exit(struct osnoise_variables *osn_var, struct task_struct *t)
1167 {
1168         s64 duration;
1169
1170         if (!osn_var->sampling)
1171                 return;
1172
1173         if (unlikely(timerlat_enabled()))
1174                 if (!timerlat_thread_exit(osn_var))
1175                         return;
1176
1177         duration = get_int_safe_duration(osn_var, &osn_var->thread.delta_start);
1178
1179         trace_thread_noise(t, osn_var->thread.arrival_time, duration);
1180
1181         osn_var->thread.arrival_time = 0;
1182 }
1183
1184 /*
1185  * trace_sched_switch - sched:sched_switch trace event handler
1186  *
1187  * This function is hooked to the sched:sched_switch trace event, and it is
1188  * used to record the beginning and to report the end of a thread noise window.
1189  */
1190 static void
1191 trace_sched_switch_callback(void *data, bool preempt,
1192                             struct task_struct *p,
1193                             struct task_struct *n,
1194                             unsigned int prev_state)
1195 {
1196         struct osnoise_variables *osn_var = this_cpu_osn_var();
1197         int workload = test_bit(OSN_WORKLOAD, &osnoise_options);
1198
1199         if ((p->pid != osn_var->pid) || !workload)
1200                 thread_exit(osn_var, p);
1201
1202         if ((n->pid != osn_var->pid) || !workload)
1203                 thread_entry(osn_var, n);
1204 }
1205
1206 /*
1207  * hook_thread_events - Hook the insturmentation for thread noise
1208  *
1209  * Hook the osnoise tracer callbacks to handle the noise from other
1210  * threads on the necessary kernel events.
1211  */
1212 static int hook_thread_events(void)
1213 {
1214         int ret;
1215
1216         ret = register_trace_sched_switch(trace_sched_switch_callback, NULL);
1217         if (ret)
1218                 return -EINVAL;
1219
1220         return 0;
1221 }
1222
1223 /*
1224  * unhook_thread_events - *nhook the insturmentation for thread noise
1225  *
1226  * Unook the osnoise tracer callbacks to handle the noise from other
1227  * threads on the necessary kernel events.
1228  */
1229 static void unhook_thread_events(void)
1230 {
1231         unregister_trace_sched_switch(trace_sched_switch_callback, NULL);
1232 }
1233
1234 /*
1235  * save_osn_sample_stats - Save the osnoise_sample statistics
1236  *
1237  * Save the osnoise_sample statistics before the sampling phase. These
1238  * values will be used later to compute the diff betwneen the statistics
1239  * before and after the osnoise sampling.
1240  */
1241 static void
1242 save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s)
1243 {
1244         s->nmi_count = osn_var->nmi.count;
1245         s->irq_count = osn_var->irq.count;
1246         s->softirq_count = osn_var->softirq.count;
1247         s->thread_count = osn_var->thread.count;
1248 }
1249
1250 /*
1251  * diff_osn_sample_stats - Compute the osnoise_sample statistics
1252  *
1253  * After a sample period, compute the difference on the osnoise_sample
1254  * statistics. The struct osnoise_sample *s contains the statistics saved via
1255  * save_osn_sample_stats() before the osnoise sampling.
1256  */
1257 static void
1258 diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s)
1259 {
1260         s->nmi_count = osn_var->nmi.count - s->nmi_count;
1261         s->irq_count = osn_var->irq.count - s->irq_count;
1262         s->softirq_count = osn_var->softirq.count - s->softirq_count;
1263         s->thread_count = osn_var->thread.count - s->thread_count;
1264 }
1265
1266 /*
1267  * osnoise_stop_tracing - Stop tracing and the tracer.
1268  */
1269 static __always_inline void osnoise_stop_tracing(void)
1270 {
1271         struct osnoise_instance *inst;
1272         struct trace_array *tr;
1273
1274         rcu_read_lock();
1275         list_for_each_entry_rcu(inst, &osnoise_instances, list) {
1276                 tr = inst->tr;
1277                 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
1278                                 "stop tracing hit on cpu %d\n", smp_processor_id());
1279
1280                 if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options))
1281                         panic("tracer hit stop condition on CPU %d\n", smp_processor_id());
1282
1283                 tracer_tracing_off(tr);
1284         }
1285         rcu_read_unlock();
1286 }
1287
1288 /*
1289  * notify_new_max_latency - Notify a new max latency via fsnotify interface.
1290  */
1291 static void notify_new_max_latency(u64 latency)
1292 {
1293         struct osnoise_instance *inst;
1294         struct trace_array *tr;
1295
1296         rcu_read_lock();
1297         list_for_each_entry_rcu(inst, &osnoise_instances, list) {
1298                 tr = inst->tr;
1299                 if (tracer_tracing_is_on(tr) && tr->max_latency < latency) {
1300                         tr->max_latency = latency;
1301                         latency_fsnotify(tr);
1302                 }
1303         }
1304         rcu_read_unlock();
1305 }
1306
1307 /*
1308  * run_osnoise - Sample the time and look for osnoise
1309  *
1310  * Used to capture the time, looking for potential osnoise latency repeatedly.
1311  * Different from hwlat_detector, it is called with preemption and interrupts
1312  * enabled. This allows irqs, softirqs and threads to run, interfering on the
1313  * osnoise sampling thread, as they would do with a regular thread.
1314  */
1315 static int run_osnoise(void)
1316 {
1317         bool disable_irq = test_bit(OSN_IRQ_DISABLE, &osnoise_options);
1318         struct osnoise_variables *osn_var = this_cpu_osn_var();
1319         u64 start, sample, last_sample;
1320         u64 last_int_count, int_count;
1321         s64 noise = 0, max_noise = 0;
1322         s64 total, last_total = 0;
1323         struct osnoise_sample s;
1324         bool disable_preemption;
1325         unsigned int threshold;
1326         u64 runtime, stop_in;
1327         u64 sum_noise = 0;
1328         int hw_count = 0;
1329         int ret = -1;
1330
1331         /*
1332          * Disabling preemption is only required if IRQs are enabled,
1333          * and the options is set on.
1334          */
1335         disable_preemption = !disable_irq && test_bit(OSN_PREEMPT_DISABLE, &osnoise_options);
1336
1337         /*
1338          * Considers the current thread as the workload.
1339          */
1340         osn_var->pid = current->pid;
1341
1342         /*
1343          * Save the current stats for the diff
1344          */
1345         save_osn_sample_stats(osn_var, &s);
1346
1347         /*
1348          * if threshold is 0, use the default value of 5 us.
1349          */
1350         threshold = tracing_thresh ? : 5000;
1351
1352         /*
1353          * Apply PREEMPT and IRQ disabled options.
1354          */
1355         if (disable_irq)
1356                 local_irq_disable();
1357
1358         if (disable_preemption)
1359                 preempt_disable();
1360
1361         /*
1362          * Make sure NMIs see sampling first
1363          */
1364         osn_var->sampling = true;
1365         barrier();
1366
1367         /*
1368          * Transform the *_us config to nanoseconds to avoid the
1369          * division on the main loop.
1370          */
1371         runtime = osnoise_data.sample_runtime * NSEC_PER_USEC;
1372         stop_in = osnoise_data.stop_tracing * NSEC_PER_USEC;
1373
1374         /*
1375          * Start timestemp
1376          */
1377         start = time_get();
1378
1379         /*
1380          * "previous" loop.
1381          */
1382         last_int_count = set_int_safe_time(osn_var, &last_sample);
1383
1384         do {
1385                 /*
1386                  * Get sample!
1387                  */
1388                 int_count = set_int_safe_time(osn_var, &sample);
1389
1390                 noise = time_sub(sample, last_sample);
1391
1392                 /*
1393                  * This shouldn't happen.
1394                  */
1395                 if (noise < 0) {
1396                         osnoise_taint("negative noise!");
1397                         goto out;
1398                 }
1399
1400                 /*
1401                  * Sample runtime.
1402                  */
1403                 total = time_sub(sample, start);
1404
1405                 /*
1406                  * Check for possible overflows.
1407                  */
1408                 if (total < last_total) {
1409                         osnoise_taint("total overflow!");
1410                         break;
1411                 }
1412
1413                 last_total = total;
1414
1415                 if (noise >= threshold) {
1416                         int interference = int_count - last_int_count;
1417
1418                         if (noise > max_noise)
1419                                 max_noise = noise;
1420
1421                         if (!interference)
1422                                 hw_count++;
1423
1424                         sum_noise += noise;
1425
1426                         trace_sample_threshold(last_sample, noise, interference);
1427
1428                         if (osnoise_data.stop_tracing)
1429                                 if (noise > stop_in)
1430                                         osnoise_stop_tracing();
1431                 }
1432
1433                 /*
1434                  * In some cases, notably when running on a nohz_full CPU with
1435                  * a stopped tick PREEMPT_RCU has no way to account for QSs.
1436                  * This will eventually cause unwarranted noise as PREEMPT_RCU
1437                  * will force preemption as the means of ending the current
1438                  * grace period. We avoid this problem by calling
1439                  * rcu_momentary_dyntick_idle(), which performs a zero duration
1440                  * EQS allowing PREEMPT_RCU to end the current grace period.
1441                  * This call shouldn't be wrapped inside an RCU critical
1442                  * section.
1443                  *
1444                  * Note that in non PREEMPT_RCU kernels QSs are handled through
1445                  * cond_resched()
1446                  */
1447                 if (IS_ENABLED(CONFIG_PREEMPT_RCU)) {
1448                         if (!disable_irq)
1449                                 local_irq_disable();
1450
1451                         rcu_momentary_dyntick_idle();
1452
1453                         if (!disable_irq)
1454                                 local_irq_enable();
1455                 }
1456
1457                 /*
1458                  * For the non-preemptive kernel config: let threads runs, if
1459                  * they so wish, unless set not do to so.
1460                  */
1461                 if (!disable_irq && !disable_preemption)
1462                         cond_resched();
1463
1464                 last_sample = sample;
1465                 last_int_count = int_count;
1466
1467         } while (total < runtime && !kthread_should_stop());
1468
1469         /*
1470          * Finish the above in the view for interrupts.
1471          */
1472         barrier();
1473
1474         osn_var->sampling = false;
1475
1476         /*
1477          * Make sure sampling data is no longer updated.
1478          */
1479         barrier();
1480
1481         /*
1482          * Return to the preemptive state.
1483          */
1484         if (disable_preemption)
1485                 preempt_enable();
1486
1487         if (disable_irq)
1488                 local_irq_enable();
1489
1490         /*
1491          * Save noise info.
1492          */
1493         s.noise = time_to_us(sum_noise);
1494         s.runtime = time_to_us(total);
1495         s.max_sample = time_to_us(max_noise);
1496         s.hw_count = hw_count;
1497
1498         /* Save interference stats info */
1499         diff_osn_sample_stats(osn_var, &s);
1500
1501         trace_osnoise_sample(&s);
1502
1503         notify_new_max_latency(max_noise);
1504
1505         if (osnoise_data.stop_tracing_total)
1506                 if (s.noise > osnoise_data.stop_tracing_total)
1507                         osnoise_stop_tracing();
1508
1509         return 0;
1510 out:
1511         return ret;
1512 }
1513
1514 static struct cpumask osnoise_cpumask;
1515 static struct cpumask save_cpumask;
1516
1517 /*
1518  * osnoise_sleep - sleep until the next period
1519  */
1520 static void osnoise_sleep(void)
1521 {
1522         u64 interval;
1523         ktime_t wake_time;
1524
1525         mutex_lock(&interface_lock);
1526         interval = osnoise_data.sample_period - osnoise_data.sample_runtime;
1527         mutex_unlock(&interface_lock);
1528
1529         /*
1530          * differently from hwlat_detector, the osnoise tracer can run
1531          * without a pause because preemption is on.
1532          */
1533         if (!interval) {
1534                 /* Let synchronize_rcu_tasks() make progress */
1535                 cond_resched_tasks_rcu_qs();
1536                 return;
1537         }
1538
1539         wake_time = ktime_add_us(ktime_get(), interval);
1540         __set_current_state(TASK_INTERRUPTIBLE);
1541
1542         while (schedule_hrtimeout(&wake_time, HRTIMER_MODE_ABS)) {
1543                 if (kthread_should_stop())
1544                         break;
1545         }
1546 }
1547
1548 /*
1549  * osnoise_main - The osnoise detection kernel thread
1550  *
1551  * Calls run_osnoise() function to measure the osnoise for the configured runtime,
1552  * every period.
1553  */
1554 static int osnoise_main(void *data)
1555 {
1556
1557         while (!kthread_should_stop()) {
1558                 run_osnoise();
1559                 osnoise_sleep();
1560         }
1561
1562         return 0;
1563 }
1564
1565 #ifdef CONFIG_TIMERLAT_TRACER
1566 /*
1567  * timerlat_irq - hrtimer handler for timerlat.
1568  */
1569 static enum hrtimer_restart timerlat_irq(struct hrtimer *timer)
1570 {
1571         struct osnoise_variables *osn_var = this_cpu_osn_var();
1572         struct timerlat_variables *tlat;
1573         struct timerlat_sample s;
1574         u64 now;
1575         u64 diff;
1576
1577         /*
1578          * I am not sure if the timer was armed for this CPU. So, get
1579          * the timerlat struct from the timer itself, not from this
1580          * CPU.
1581          */
1582         tlat = container_of(timer, struct timerlat_variables, timer);
1583
1584         now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
1585
1586         /*
1587          * Enable the osnoise: events for thread an softirq.
1588          */
1589         tlat->tracing_thread = true;
1590
1591         osn_var->thread.arrival_time = time_get();
1592
1593         /*
1594          * A hardirq is running: the timer IRQ. It is for sure preempting
1595          * a thread, and potentially preempting a softirq.
1596          *
1597          * At this point, it is not interesting to know the duration of the
1598          * preempted thread (and maybe softirq), but how much time they will
1599          * delay the beginning of the execution of the timer thread.
1600          *
1601          * To get the correct (net) delay added by the softirq, its delta_start
1602          * is set as the IRQ one. In this way, at the return of the IRQ, the delta
1603          * start of the sofitrq will be zeroed, accounting then only the time
1604          * after that.
1605          *
1606          * The thread follows the same principle. However, if a softirq is
1607          * running, the thread needs to receive the softirq delta_start. The
1608          * reason being is that the softirq will be the last to be unfolded,
1609          * resseting the thread delay to zero.
1610          *
1611          * The PREEMPT_RT is a special case, though. As softirqs run as threads
1612          * on RT, moving the thread is enough.
1613          */
1614         if (!IS_ENABLED(CONFIG_PREEMPT_RT) && osn_var->softirq.delta_start) {
1615                 copy_int_safe_time(osn_var, &osn_var->thread.delta_start,
1616                                    &osn_var->softirq.delta_start);
1617
1618                 copy_int_safe_time(osn_var, &osn_var->softirq.delta_start,
1619                                     &osn_var->irq.delta_start);
1620         } else {
1621                 copy_int_safe_time(osn_var, &osn_var->thread.delta_start,
1622                                     &osn_var->irq.delta_start);
1623         }
1624
1625         /*
1626          * Compute the current time with the expected time.
1627          */
1628         diff = now - tlat->abs_period;
1629
1630         tlat->count++;
1631         s.seqnum = tlat->count;
1632         s.timer_latency = diff;
1633         s.context = IRQ_CONTEXT;
1634
1635         trace_timerlat_sample(&s);
1636
1637         if (osnoise_data.stop_tracing) {
1638                 if (time_to_us(diff) >= osnoise_data.stop_tracing) {
1639
1640                         /*
1641                          * At this point, if stop_tracing is set and <= print_stack,
1642                          * print_stack is set and would be printed in the thread handler.
1643                          *
1644                          * Thus, print the stack trace as it is helpful to define the
1645                          * root cause of an IRQ latency.
1646                          */
1647                         if (osnoise_data.stop_tracing <= osnoise_data.print_stack) {
1648                                 timerlat_save_stack(0);
1649                                 timerlat_dump_stack(time_to_us(diff));
1650                         }
1651
1652                         osnoise_stop_tracing();
1653                         notify_new_max_latency(diff);
1654
1655                         return HRTIMER_NORESTART;
1656                 }
1657         }
1658
1659         wake_up_process(tlat->kthread);
1660
1661         if (osnoise_data.print_stack)
1662                 timerlat_save_stack(0);
1663
1664         return HRTIMER_NORESTART;
1665 }
1666
1667 /*
1668  * wait_next_period - Wait for the next period for timerlat
1669  */
1670 static int wait_next_period(struct timerlat_variables *tlat)
1671 {
1672         ktime_t next_abs_period, now;
1673         u64 rel_period = osnoise_data.timerlat_period * 1000;
1674
1675         now = hrtimer_cb_get_time(&tlat->timer);
1676         next_abs_period = ns_to_ktime(tlat->abs_period + rel_period);
1677
1678         /*
1679          * Save the next abs_period.
1680          */
1681         tlat->abs_period = (u64) ktime_to_ns(next_abs_period);
1682
1683         /*
1684          * If the new abs_period is in the past, skip the activation.
1685          */
1686         while (ktime_compare(now, next_abs_period) > 0) {
1687                 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period);
1688                 tlat->abs_period = (u64) ktime_to_ns(next_abs_period);
1689         }
1690
1691         set_current_state(TASK_INTERRUPTIBLE);
1692
1693         hrtimer_start(&tlat->timer, next_abs_period, HRTIMER_MODE_ABS_PINNED_HARD);
1694         schedule();
1695         return 1;
1696 }
1697
1698 /*
1699  * timerlat_main- Timerlat main
1700  */
1701 static int timerlat_main(void *data)
1702 {
1703         struct osnoise_variables *osn_var = this_cpu_osn_var();
1704         struct timerlat_variables *tlat = this_cpu_tmr_var();
1705         struct timerlat_sample s;
1706         struct sched_param sp;
1707         u64 now, diff;
1708
1709         /*
1710          * Make the thread RT, that is how cyclictest is usually used.
1711          */
1712         sp.sched_priority = DEFAULT_TIMERLAT_PRIO;
1713         sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
1714
1715         tlat->count = 0;
1716         tlat->tracing_thread = false;
1717
1718         hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
1719         tlat->timer.function = timerlat_irq;
1720         tlat->kthread = current;
1721         osn_var->pid = current->pid;
1722         /*
1723          * Anotate the arrival time.
1724          */
1725         tlat->abs_period = hrtimer_cb_get_time(&tlat->timer);
1726
1727         wait_next_period(tlat);
1728
1729         osn_var->sampling = 1;
1730
1731         while (!kthread_should_stop()) {
1732                 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
1733                 diff = now - tlat->abs_period;
1734
1735                 s.seqnum = tlat->count;
1736                 s.timer_latency = diff;
1737                 s.context = THREAD_CONTEXT;
1738
1739                 trace_timerlat_sample(&s);
1740
1741                 notify_new_max_latency(diff);
1742
1743                 timerlat_dump_stack(time_to_us(diff));
1744
1745                 tlat->tracing_thread = false;
1746                 if (osnoise_data.stop_tracing_total)
1747                         if (time_to_us(diff) >= osnoise_data.stop_tracing_total)
1748                                 osnoise_stop_tracing();
1749
1750                 wait_next_period(tlat);
1751         }
1752
1753         hrtimer_cancel(&tlat->timer);
1754         return 0;
1755 }
1756 #else /* CONFIG_TIMERLAT_TRACER */
1757 static int timerlat_main(void *data)
1758 {
1759         return 0;
1760 }
1761 #endif /* CONFIG_TIMERLAT_TRACER */
1762
1763 /*
1764  * stop_kthread - stop a workload thread
1765  */
1766 static void stop_kthread(unsigned int cpu)
1767 {
1768         struct task_struct *kthread;
1769
1770         kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread;
1771         if (kthread) {
1772                 kthread_stop(kthread);
1773                 per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL;
1774         } else {
1775                 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
1776                         per_cpu(per_cpu_osnoise_var, cpu).sampling = false;
1777                         barrier();
1778                         return;
1779                 }
1780         }
1781 }
1782
1783 /*
1784  * stop_per_cpu_kthread - Stop per-cpu threads
1785  *
1786  * Stop the osnoise sampling htread. Use this on unload and at system
1787  * shutdown.
1788  */
1789 static void stop_per_cpu_kthreads(void)
1790 {
1791         int cpu;
1792
1793         cpus_read_lock();
1794
1795         for_each_online_cpu(cpu)
1796                 stop_kthread(cpu);
1797
1798         cpus_read_unlock();
1799 }
1800
1801 /*
1802  * start_kthread - Start a workload tread
1803  */
1804 static int start_kthread(unsigned int cpu)
1805 {
1806         struct task_struct *kthread;
1807         void *main = osnoise_main;
1808         char comm[24];
1809
1810         if (timerlat_enabled()) {
1811                 snprintf(comm, 24, "timerlat/%d", cpu);
1812                 main = timerlat_main;
1813         } else {
1814                 /* if no workload, just return */
1815                 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
1816                         per_cpu(per_cpu_osnoise_var, cpu).sampling = true;
1817                         barrier();
1818                         return 0;
1819                 }
1820
1821                 snprintf(comm, 24, "osnoise/%d", cpu);
1822         }
1823
1824         kthread = kthread_run_on_cpu(main, NULL, cpu, comm);
1825
1826         if (IS_ERR(kthread)) {
1827                 pr_err(BANNER "could not start sampling thread\n");
1828                 stop_per_cpu_kthreads();
1829                 return -ENOMEM;
1830         }
1831
1832         per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread;
1833
1834         return 0;
1835 }
1836
1837 /*
1838  * start_per_cpu_kthread - Kick off per-cpu osnoise sampling kthreads
1839  *
1840  * This starts the kernel thread that will look for osnoise on many
1841  * cpus.
1842  */
1843 static int start_per_cpu_kthreads(void)
1844 {
1845         struct cpumask *current_mask = &save_cpumask;
1846         int retval = 0;
1847         int cpu;
1848
1849         cpus_read_lock();
1850         /*
1851          * Run only on online CPUs in which osnoise is allowed to run.
1852          */
1853         cpumask_and(current_mask, cpu_online_mask, &osnoise_cpumask);
1854
1855         for_each_possible_cpu(cpu)
1856                 per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL;
1857
1858         for_each_cpu(cpu, current_mask) {
1859                 retval = start_kthread(cpu);
1860                 if (retval) {
1861                         cpus_read_unlock();
1862                         stop_per_cpu_kthreads();
1863                         return retval;
1864                 }
1865         }
1866
1867         cpus_read_unlock();
1868
1869         return retval;
1870 }
1871
1872 #ifdef CONFIG_HOTPLUG_CPU
1873 static void osnoise_hotplug_workfn(struct work_struct *dummy)
1874 {
1875         unsigned int cpu = smp_processor_id();
1876
1877         mutex_lock(&trace_types_lock);
1878
1879         if (!osnoise_has_registered_instances())
1880                 goto out_unlock_trace;
1881
1882         mutex_lock(&interface_lock);
1883         cpus_read_lock();
1884
1885         if (!cpumask_test_cpu(cpu, &osnoise_cpumask))
1886                 goto out_unlock;
1887
1888         start_kthread(cpu);
1889
1890 out_unlock:
1891         cpus_read_unlock();
1892         mutex_unlock(&interface_lock);
1893 out_unlock_trace:
1894         mutex_unlock(&trace_types_lock);
1895 }
1896
1897 static DECLARE_WORK(osnoise_hotplug_work, osnoise_hotplug_workfn);
1898
1899 /*
1900  * osnoise_cpu_init - CPU hotplug online callback function
1901  */
1902 static int osnoise_cpu_init(unsigned int cpu)
1903 {
1904         schedule_work_on(cpu, &osnoise_hotplug_work);
1905         return 0;
1906 }
1907
1908 /*
1909  * osnoise_cpu_die - CPU hotplug offline callback function
1910  */
1911 static int osnoise_cpu_die(unsigned int cpu)
1912 {
1913         stop_kthread(cpu);
1914         return 0;
1915 }
1916
1917 static void osnoise_init_hotplug_support(void)
1918 {
1919         int ret;
1920
1921         ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "trace/osnoise:online",
1922                                 osnoise_cpu_init, osnoise_cpu_die);
1923         if (ret < 0)
1924                 pr_warn(BANNER "Error to init cpu hotplug support\n");
1925
1926         return;
1927 }
1928 #else /* CONFIG_HOTPLUG_CPU */
1929 static void osnoise_init_hotplug_support(void)
1930 {
1931         return;
1932 }
1933 #endif /* CONFIG_HOTPLUG_CPU */
1934
1935 /*
1936  * seq file functions for the osnoise/options file.
1937  */
1938 static void *s_options_start(struct seq_file *s, loff_t *pos)
1939 {
1940         int option = *pos;
1941
1942         mutex_lock(&interface_lock);
1943
1944         if (option >= OSN_MAX)
1945                 return NULL;
1946
1947         return pos;
1948 }
1949
1950 static void *s_options_next(struct seq_file *s, void *v, loff_t *pos)
1951 {
1952         int option = ++(*pos);
1953
1954         if (option >= OSN_MAX)
1955                 return NULL;
1956
1957         return pos;
1958 }
1959
1960 static int s_options_show(struct seq_file *s, void *v)
1961 {
1962         loff_t *pos = v;
1963         int option = *pos;
1964
1965         if (option == OSN_DEFAULTS) {
1966                 if (osnoise_options == OSN_DEFAULT_OPTIONS)
1967                         seq_printf(s, "%s", osnoise_options_str[option]);
1968                 else
1969                         seq_printf(s, "NO_%s", osnoise_options_str[option]);
1970                 goto out;
1971         }
1972
1973         if (test_bit(option, &osnoise_options))
1974                 seq_printf(s, "%s", osnoise_options_str[option]);
1975         else
1976                 seq_printf(s, "NO_%s", osnoise_options_str[option]);
1977
1978 out:
1979         if (option != OSN_MAX)
1980                 seq_puts(s, " ");
1981
1982         return 0;
1983 }
1984
1985 static void s_options_stop(struct seq_file *s, void *v)
1986 {
1987         seq_puts(s, "\n");
1988         mutex_unlock(&interface_lock);
1989 }
1990
1991 static const struct seq_operations osnoise_options_seq_ops = {
1992         .start          = s_options_start,
1993         .next           = s_options_next,
1994         .show           = s_options_show,
1995         .stop           = s_options_stop
1996 };
1997
1998 static int osnoise_options_open(struct inode *inode, struct file *file)
1999 {
2000         return seq_open(file, &osnoise_options_seq_ops);
2001 };
2002
2003 /**
2004  * osnoise_options_write - Write function for "options" entry
2005  * @filp: The active open file structure
2006  * @ubuf: The user buffer that contains the value to write
2007  * @cnt: The maximum number of bytes to write to "file"
2008  * @ppos: The current position in @file
2009  *
2010  * Writing the option name sets the option, writing the "NO_"
2011  * prefix in front of the option name disables it.
2012  *
2013  * Writing "DEFAULTS" resets the option values to the default ones.
2014  */
2015 static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf,
2016                                      size_t cnt, loff_t *ppos)
2017 {
2018         int running, option, enable, retval;
2019         char buf[256], *option_str;
2020
2021         if (cnt >= 256)
2022                 return -EINVAL;
2023
2024         if (copy_from_user(buf, ubuf, cnt))
2025                 return -EFAULT;
2026
2027         buf[cnt] = 0;
2028
2029         if (strncmp(buf, "NO_", 3)) {
2030                 option_str = strstrip(buf);
2031                 enable = true;
2032         } else {
2033                 option_str = strstrip(&buf[3]);
2034                 enable = false;
2035         }
2036
2037         option = match_string(osnoise_options_str, OSN_MAX, option_str);
2038         if (option < 0)
2039                 return -EINVAL;
2040
2041         /*
2042          * trace_types_lock is taken to avoid concurrency on start/stop.
2043          */
2044         mutex_lock(&trace_types_lock);
2045         running = osnoise_has_registered_instances();
2046         if (running)
2047                 stop_per_cpu_kthreads();
2048
2049         mutex_lock(&interface_lock);
2050         /*
2051          * avoid CPU hotplug operations that might read options.
2052          */
2053         cpus_read_lock();
2054
2055         retval = cnt;
2056
2057         if (enable) {
2058                 if (option == OSN_DEFAULTS)
2059                         osnoise_options = OSN_DEFAULT_OPTIONS;
2060                 else
2061                         set_bit(option, &osnoise_options);
2062         } else {
2063                 if (option == OSN_DEFAULTS)
2064                         retval = -EINVAL;
2065                 else
2066                         clear_bit(option, &osnoise_options);
2067         }
2068
2069         cpus_read_unlock();
2070         mutex_unlock(&interface_lock);
2071
2072         if (running)
2073                 start_per_cpu_kthreads();
2074         mutex_unlock(&trace_types_lock);
2075
2076         return retval;
2077 }
2078
2079 /*
2080  * osnoise_cpus_read - Read function for reading the "cpus" file
2081  * @filp: The active open file structure
2082  * @ubuf: The userspace provided buffer to read value into
2083  * @cnt: The maximum number of bytes to read
2084  * @ppos: The current "file" position
2085  *
2086  * Prints the "cpus" output into the user-provided buffer.
2087  */
2088 static ssize_t
2089 osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count,
2090                   loff_t *ppos)
2091 {
2092         char *mask_str;
2093         int len;
2094
2095         mutex_lock(&interface_lock);
2096
2097         len = snprintf(NULL, 0, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)) + 1;
2098         mask_str = kmalloc(len, GFP_KERNEL);
2099         if (!mask_str) {
2100                 count = -ENOMEM;
2101                 goto out_unlock;
2102         }
2103
2104         len = snprintf(mask_str, len, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask));
2105         if (len >= count) {
2106                 count = -EINVAL;
2107                 goto out_free;
2108         }
2109
2110         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
2111
2112 out_free:
2113         kfree(mask_str);
2114 out_unlock:
2115         mutex_unlock(&interface_lock);
2116
2117         return count;
2118 }
2119
2120 /*
2121  * osnoise_cpus_write - Write function for "cpus" entry
2122  * @filp: The active open file structure
2123  * @ubuf: The user buffer that contains the value to write
2124  * @cnt: The maximum number of bytes to write to "file"
2125  * @ppos: The current position in @file
2126  *
2127  * This function provides a write implementation for the "cpus"
2128  * interface to the osnoise trace. By default, it lists all  CPUs,
2129  * in this way, allowing osnoise threads to run on any online CPU
2130  * of the system. It serves to restrict the execution of osnoise to the
2131  * set of CPUs writing via this interface. Why not use "tracing_cpumask"?
2132  * Because the user might be interested in tracing what is running on
2133  * other CPUs. For instance, one might run osnoise in one HT CPU
2134  * while observing what is running on the sibling HT CPU.
2135  */
2136 static ssize_t
2137 osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count,
2138                    loff_t *ppos)
2139 {
2140         cpumask_var_t osnoise_cpumask_new;
2141         int running, err;
2142         char buf[256];
2143
2144         if (count >= 256)
2145                 return -EINVAL;
2146
2147         if (copy_from_user(buf, ubuf, count))
2148                 return -EFAULT;
2149
2150         if (!zalloc_cpumask_var(&osnoise_cpumask_new, GFP_KERNEL))
2151                 return -ENOMEM;
2152
2153         err = cpulist_parse(buf, osnoise_cpumask_new);
2154         if (err)
2155                 goto err_free;
2156
2157         /*
2158          * trace_types_lock is taken to avoid concurrency on start/stop.
2159          */
2160         mutex_lock(&trace_types_lock);
2161         running = osnoise_has_registered_instances();
2162         if (running)
2163                 stop_per_cpu_kthreads();
2164
2165         mutex_lock(&interface_lock);
2166         /*
2167          * osnoise_cpumask is read by CPU hotplug operations.
2168          */
2169         cpus_read_lock();
2170
2171         cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new);
2172
2173         cpus_read_unlock();
2174         mutex_unlock(&interface_lock);
2175
2176         if (running)
2177                 start_per_cpu_kthreads();
2178         mutex_unlock(&trace_types_lock);
2179
2180         free_cpumask_var(osnoise_cpumask_new);
2181         return count;
2182
2183 err_free:
2184         free_cpumask_var(osnoise_cpumask_new);
2185
2186         return err;
2187 }
2188
2189 /*
2190  * osnoise/runtime_us: cannot be greater than the period.
2191  */
2192 static struct trace_min_max_param osnoise_runtime = {
2193         .lock   = &interface_lock,
2194         .val    = &osnoise_data.sample_runtime,
2195         .max    = &osnoise_data.sample_period,
2196         .min    = NULL,
2197 };
2198
2199 /*
2200  * osnoise/period_us: cannot be smaller than the runtime.
2201  */
2202 static struct trace_min_max_param osnoise_period = {
2203         .lock   = &interface_lock,
2204         .val    = &osnoise_data.sample_period,
2205         .max    = NULL,
2206         .min    = &osnoise_data.sample_runtime,
2207 };
2208
2209 /*
2210  * osnoise/stop_tracing_us: no limit.
2211  */
2212 static struct trace_min_max_param osnoise_stop_tracing_in = {
2213         .lock   = &interface_lock,
2214         .val    = &osnoise_data.stop_tracing,
2215         .max    = NULL,
2216         .min    = NULL,
2217 };
2218
2219 /*
2220  * osnoise/stop_tracing_total_us: no limit.
2221  */
2222 static struct trace_min_max_param osnoise_stop_tracing_total = {
2223         .lock   = &interface_lock,
2224         .val    = &osnoise_data.stop_tracing_total,
2225         .max    = NULL,
2226         .min    = NULL,
2227 };
2228
2229 #ifdef CONFIG_TIMERLAT_TRACER
2230 /*
2231  * osnoise/print_stack: print the stacktrace of the IRQ handler if the total
2232  * latency is higher than val.
2233  */
2234 static struct trace_min_max_param osnoise_print_stack = {
2235         .lock   = &interface_lock,
2236         .val    = &osnoise_data.print_stack,
2237         .max    = NULL,
2238         .min    = NULL,
2239 };
2240
2241 /*
2242  * osnoise/timerlat_period: min 100 us, max 1 s
2243  */
2244 static u64 timerlat_min_period = 100;
2245 static u64 timerlat_max_period = 1000000;
2246 static struct trace_min_max_param timerlat_period = {
2247         .lock   = &interface_lock,
2248         .val    = &osnoise_data.timerlat_period,
2249         .max    = &timerlat_max_period,
2250         .min    = &timerlat_min_period,
2251 };
2252 #endif
2253
2254 static const struct file_operations cpus_fops = {
2255         .open           = tracing_open_generic,
2256         .read           = osnoise_cpus_read,
2257         .write          = osnoise_cpus_write,
2258         .llseek         = generic_file_llseek,
2259 };
2260
2261 static const struct file_operations osnoise_options_fops = {
2262         .open           = osnoise_options_open,
2263         .read           = seq_read,
2264         .llseek         = seq_lseek,
2265         .release        = seq_release,
2266         .write          = osnoise_options_write
2267 };
2268
2269 #ifdef CONFIG_TIMERLAT_TRACER
2270 #ifdef CONFIG_STACKTRACE
2271 static int init_timerlat_stack_tracefs(struct dentry *top_dir)
2272 {
2273         struct dentry *tmp;
2274
2275         tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir,
2276                                   &osnoise_print_stack, &trace_min_max_fops);
2277         if (!tmp)
2278                 return -ENOMEM;
2279
2280         return 0;
2281 }
2282 #else /* CONFIG_STACKTRACE */
2283 static int init_timerlat_stack_tracefs(struct dentry *top_dir)
2284 {
2285         return 0;
2286 }
2287 #endif /* CONFIG_STACKTRACE */
2288
2289 /*
2290  * init_timerlat_tracefs - A function to initialize the timerlat interface files
2291  */
2292 static int init_timerlat_tracefs(struct dentry *top_dir)
2293 {
2294         struct dentry *tmp;
2295
2296         tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir,
2297                                   &timerlat_period, &trace_min_max_fops);
2298         if (!tmp)
2299                 return -ENOMEM;
2300
2301         return init_timerlat_stack_tracefs(top_dir);
2302 }
2303 #else /* CONFIG_TIMERLAT_TRACER */
2304 static int init_timerlat_tracefs(struct dentry *top_dir)
2305 {
2306         return 0;
2307 }
2308 #endif /* CONFIG_TIMERLAT_TRACER */
2309
2310 /*
2311  * init_tracefs - A function to initialize the tracefs interface files
2312  *
2313  * This function creates entries in tracefs for "osnoise" and "timerlat".
2314  * It creates these directories in the tracing directory, and within that
2315  * directory the use can change and view the configs.
2316  */
2317 static int init_tracefs(void)
2318 {
2319         struct dentry *top_dir;
2320         struct dentry *tmp;
2321         int ret;
2322
2323         ret = tracing_init_dentry();
2324         if (ret)
2325                 return -ENOMEM;
2326
2327         top_dir = tracefs_create_dir("osnoise", NULL);
2328         if (!top_dir)
2329                 return 0;
2330
2331         tmp = tracefs_create_file("period_us", TRACE_MODE_WRITE, top_dir,
2332                                   &osnoise_period, &trace_min_max_fops);
2333         if (!tmp)
2334                 goto err;
2335
2336         tmp = tracefs_create_file("runtime_us", TRACE_MODE_WRITE, top_dir,
2337                                   &osnoise_runtime, &trace_min_max_fops);
2338         if (!tmp)
2339                 goto err;
2340
2341         tmp = tracefs_create_file("stop_tracing_us", TRACE_MODE_WRITE, top_dir,
2342                                   &osnoise_stop_tracing_in, &trace_min_max_fops);
2343         if (!tmp)
2344                 goto err;
2345
2346         tmp = tracefs_create_file("stop_tracing_total_us", TRACE_MODE_WRITE, top_dir,
2347                                   &osnoise_stop_tracing_total, &trace_min_max_fops);
2348         if (!tmp)
2349                 goto err;
2350
2351         tmp = trace_create_file("cpus", TRACE_MODE_WRITE, top_dir, NULL, &cpus_fops);
2352         if (!tmp)
2353                 goto err;
2354
2355         tmp = trace_create_file("options", TRACE_MODE_WRITE, top_dir, NULL,
2356                                 &osnoise_options_fops);
2357         if (!tmp)
2358                 goto err;
2359
2360         ret = init_timerlat_tracefs(top_dir);
2361         if (ret)
2362                 goto err;
2363
2364         return 0;
2365
2366 err:
2367         tracefs_remove(top_dir);
2368         return -ENOMEM;
2369 }
2370
2371 static int osnoise_hook_events(void)
2372 {
2373         int retval;
2374
2375         /*
2376          * Trace is already hooked, we are re-enabling from
2377          * a stop_tracing_*.
2378          */
2379         if (trace_osnoise_callback_enabled)
2380                 return 0;
2381
2382         retval = hook_irq_events();
2383         if (retval)
2384                 return -EINVAL;
2385
2386         retval = hook_softirq_events();
2387         if (retval)
2388                 goto out_unhook_irq;
2389
2390         retval = hook_thread_events();
2391         /*
2392          * All fine!
2393          */
2394         if (!retval)
2395                 return 0;
2396
2397         unhook_softirq_events();
2398 out_unhook_irq:
2399         unhook_irq_events();
2400         return -EINVAL;
2401 }
2402
2403 static void osnoise_unhook_events(void)
2404 {
2405         unhook_thread_events();
2406         unhook_softirq_events();
2407         unhook_irq_events();
2408 }
2409
2410 /*
2411  * osnoise_workload_start - start the workload and hook to events
2412  */
2413 static int osnoise_workload_start(void)
2414 {
2415         int retval;
2416
2417         /*
2418          * Instances need to be registered after calling workload
2419          * start. Hence, if there is already an instance, the
2420          * workload was already registered. Otherwise, this
2421          * code is on the way to register the first instance,
2422          * and the workload will start.
2423          */
2424         if (osnoise_has_registered_instances())
2425                 return 0;
2426
2427         osn_var_reset_all();
2428
2429         retval = osnoise_hook_events();
2430         if (retval)
2431                 return retval;
2432
2433         /*
2434          * Make sure that ftrace_nmi_enter/exit() see reset values
2435          * before enabling trace_osnoise_callback_enabled.
2436          */
2437         barrier();
2438         trace_osnoise_callback_enabled = true;
2439
2440         retval = start_per_cpu_kthreads();
2441         if (retval) {
2442                 trace_osnoise_callback_enabled = false;
2443                 /*
2444                  * Make sure that ftrace_nmi_enter/exit() see
2445                  * trace_osnoise_callback_enabled as false before continuing.
2446                  */
2447                 barrier();
2448
2449                 osnoise_unhook_events();
2450                 return retval;
2451         }
2452
2453         return 0;
2454 }
2455
2456 /*
2457  * osnoise_workload_stop - stop the workload and unhook the events
2458  */
2459 static void osnoise_workload_stop(void)
2460 {
2461         /*
2462          * Instances need to be unregistered before calling
2463          * stop. Hence, if there is a registered instance, more
2464          * than one instance is running, and the workload will not
2465          * yet stop. Otherwise, this code is on the way to disable
2466          * the last instance, and the workload can stop.
2467          */
2468         if (osnoise_has_registered_instances())
2469                 return;
2470
2471         /*
2472          * If callbacks were already disabled in a previous stop
2473          * call, there is no need to disable then again.
2474          *
2475          * For instance, this happens when tracing is stopped via:
2476          * echo 0 > tracing_on
2477          * echo nop > current_tracer.
2478          */
2479         if (!trace_osnoise_callback_enabled)
2480                 return;
2481
2482         trace_osnoise_callback_enabled = false;
2483         /*
2484          * Make sure that ftrace_nmi_enter/exit() see
2485          * trace_osnoise_callback_enabled as false before continuing.
2486          */
2487         barrier();
2488
2489         stop_per_cpu_kthreads();
2490
2491         osnoise_unhook_events();
2492 }
2493
2494 static void osnoise_tracer_start(struct trace_array *tr)
2495 {
2496         int retval;
2497
2498         /*
2499          * If the instance is already registered, there is no need to
2500          * register it again.
2501          */
2502         if (osnoise_instance_registered(tr))
2503                 return;
2504
2505         retval = osnoise_workload_start();
2506         if (retval)
2507                 pr_err(BANNER "Error starting osnoise tracer\n");
2508
2509         osnoise_register_instance(tr);
2510 }
2511
2512 static void osnoise_tracer_stop(struct trace_array *tr)
2513 {
2514         osnoise_unregister_instance(tr);
2515         osnoise_workload_stop();
2516 }
2517
2518 static int osnoise_tracer_init(struct trace_array *tr)
2519 {
2520         /*
2521          * Only allow osnoise tracer if timerlat tracer is not running
2522          * already.
2523          */
2524         if (timerlat_enabled())
2525                 return -EBUSY;
2526
2527         tr->max_latency = 0;
2528
2529         osnoise_tracer_start(tr);
2530         return 0;
2531 }
2532
2533 static void osnoise_tracer_reset(struct trace_array *tr)
2534 {
2535         osnoise_tracer_stop(tr);
2536 }
2537
2538 static struct tracer osnoise_tracer __read_mostly = {
2539         .name           = "osnoise",
2540         .init           = osnoise_tracer_init,
2541         .reset          = osnoise_tracer_reset,
2542         .start          = osnoise_tracer_start,
2543         .stop           = osnoise_tracer_stop,
2544         .print_header   = print_osnoise_headers,
2545         .allow_instances = true,
2546 };
2547
2548 #ifdef CONFIG_TIMERLAT_TRACER
2549 static void timerlat_tracer_start(struct trace_array *tr)
2550 {
2551         int retval;
2552
2553         /*
2554          * If the instance is already registered, there is no need to
2555          * register it again.
2556          */
2557         if (osnoise_instance_registered(tr))
2558                 return;
2559
2560         retval = osnoise_workload_start();
2561         if (retval)
2562                 pr_err(BANNER "Error starting timerlat tracer\n");
2563
2564         osnoise_register_instance(tr);
2565
2566         return;
2567 }
2568
2569 static void timerlat_tracer_stop(struct trace_array *tr)
2570 {
2571         int cpu;
2572
2573         osnoise_unregister_instance(tr);
2574
2575         /*
2576          * Instruct the threads to stop only if this is the last instance.
2577          */
2578         if (!osnoise_has_registered_instances()) {
2579                 for_each_online_cpu(cpu)
2580                         per_cpu(per_cpu_osnoise_var, cpu).sampling = 0;
2581         }
2582
2583         osnoise_workload_stop();
2584 }
2585
2586 static int timerlat_tracer_init(struct trace_array *tr)
2587 {
2588         /*
2589          * Only allow timerlat tracer if osnoise tracer is not running already.
2590          */
2591         if (osnoise_has_registered_instances() && !osnoise_data.timerlat_tracer)
2592                 return -EBUSY;
2593
2594         /*
2595          * If this is the first instance, set timerlat_tracer to block
2596          * osnoise tracer start.
2597          */
2598         if (!osnoise_has_registered_instances())
2599                 osnoise_data.timerlat_tracer = 1;
2600
2601         tr->max_latency = 0;
2602         timerlat_tracer_start(tr);
2603
2604         return 0;
2605 }
2606
2607 static void timerlat_tracer_reset(struct trace_array *tr)
2608 {
2609         timerlat_tracer_stop(tr);
2610
2611         /*
2612          * If this is the last instance, reset timerlat_tracer allowing
2613          * osnoise to be started.
2614          */
2615         if (!osnoise_has_registered_instances())
2616                 osnoise_data.timerlat_tracer = 0;
2617 }
2618
2619 static struct tracer timerlat_tracer __read_mostly = {
2620         .name           = "timerlat",
2621         .init           = timerlat_tracer_init,
2622         .reset          = timerlat_tracer_reset,
2623         .start          = timerlat_tracer_start,
2624         .stop           = timerlat_tracer_stop,
2625         .print_header   = print_timerlat_headers,
2626         .allow_instances = true,
2627 };
2628
2629 __init static int init_timerlat_tracer(void)
2630 {
2631         return register_tracer(&timerlat_tracer);
2632 }
2633 #else /* CONFIG_TIMERLAT_TRACER */
2634 __init static int init_timerlat_tracer(void)
2635 {
2636         return 0;
2637 }
2638 #endif /* CONFIG_TIMERLAT_TRACER */
2639
2640 __init static int init_osnoise_tracer(void)
2641 {
2642         int ret;
2643
2644         mutex_init(&interface_lock);
2645
2646         cpumask_copy(&osnoise_cpumask, cpu_all_mask);
2647
2648         ret = register_tracer(&osnoise_tracer);
2649         if (ret) {
2650                 pr_err(BANNER "Error registering osnoise!\n");
2651                 return ret;
2652         }
2653
2654         ret = init_timerlat_tracer();
2655         if (ret) {
2656                 pr_err(BANNER "Error registering timerlat!\n");
2657                 return ret;
2658         }
2659
2660         osnoise_init_hotplug_support();
2661
2662         INIT_LIST_HEAD_RCU(&osnoise_instances);
2663
2664         init_tracefs();
2665
2666         return 0;
2667 }
2668 late_initcall(init_osnoise_tracer);