ksmbd: fix race condition between tree conn lookup and disconnect
[platform/kernel/linux-starfive.git] / tools / tracing / rtla / src / timerlat_aa.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
4  */
5
6 #include <stdlib.h>
7 #include <errno.h>
8 #include "utils.h"
9 #include "osnoise.h"
10 #include "timerlat.h"
11 #include <unistd.h>
12
13 enum timelat_state {
14         TIMERLAT_INIT = 0,
15         TIMERLAT_WAITING_IRQ,
16         TIMERLAT_WAITING_THREAD,
17 };
18
19 #define MAX_COMM                24
20
21 /*
22  * Per-cpu data statistics and data.
23  */
24 struct timerlat_aa_data {
25         /* Current CPU state */
26         int                     curr_state;
27
28         /* timerlat IRQ latency */
29         unsigned long long      tlat_irq_seqnum;
30         unsigned long long      tlat_irq_latency;
31         unsigned long long      tlat_irq_timstamp;
32
33         /* timerlat Thread latency */
34         unsigned long long      tlat_thread_seqnum;
35         unsigned long long      tlat_thread_latency;
36         unsigned long long      tlat_thread_timstamp;
37
38         /*
39          * Information about the thread running when the IRQ
40          * arrived.
41          *
42          * This can be blocking or interference, depending on the
43          * priority of the thread. Assuming timerlat is the highest
44          * prio, it is blocking. If timerlat has a lower prio, it is
45          * interference.
46          * note: "unsigned long long" because they are fetch using tep_get_field_val();
47          */
48         unsigned long long      run_thread_pid;
49         char                    run_thread_comm[MAX_COMM];
50         unsigned long long      thread_blocking_duration;
51         unsigned long long      max_exit_idle_latency;
52
53         /* Information about the timerlat timer irq */
54         unsigned long long      timer_irq_start_time;
55         unsigned long long      timer_irq_start_delay;
56         unsigned long long      timer_irq_duration;
57         unsigned long long      timer_exit_from_idle;
58
59         /*
60          * Information about the last IRQ before the timerlat irq
61          * arrived.
62          *
63          * If now - timestamp is <= latency, it might have influenced
64          * in the timerlat irq latency. Otherwise, ignore it.
65          */
66         unsigned long long      prev_irq_duration;
67         unsigned long long      prev_irq_timstamp;
68
69         /*
70          * Interference sum.
71          */
72         unsigned long long      thread_nmi_sum;
73         unsigned long long      thread_irq_sum;
74         unsigned long long      thread_softirq_sum;
75         unsigned long long      thread_thread_sum;
76
77         /*
78          * Interference task information.
79          */
80         struct trace_seq        *prev_irqs_seq;
81         struct trace_seq        *nmi_seq;
82         struct trace_seq        *irqs_seq;
83         struct trace_seq        *softirqs_seq;
84         struct trace_seq        *threads_seq;
85         struct trace_seq        *stack_seq;
86
87         /*
88          * Current thread.
89          */
90         char                    current_comm[MAX_COMM];
91         unsigned long long      current_pid;
92
93         /*
94          * Is the system running a kworker?
95          */
96         unsigned long long      kworker;
97         unsigned long long      kworker_func;
98 };
99
100 /*
101  * The analysis context and system wide view
102  */
103 struct timerlat_aa_context {
104         int nr_cpus;
105         int dump_tasks;
106
107         /* per CPU data */
108         struct timerlat_aa_data *taa_data;
109
110         /*
111          * required to translate function names and register
112          * events.
113          */
114         struct osnoise_tool *tool;
115 };
116
117 /*
118  * The data is stored as a local variable, but accessed via a helper function.
119  *
120  * It could be stored inside the trace context. But every access would
121  * require container_of() + a series of pointers. Do we need it? Not sure.
122  *
123  * For now keep it simple. If needed, store it in the tool, add the *context
124  * as a parameter in timerlat_aa_get_ctx() and do the magic there.
125  */
126 static struct timerlat_aa_context *__timerlat_aa_ctx;
127
128 static struct timerlat_aa_context *timerlat_aa_get_ctx(void)
129 {
130         return __timerlat_aa_ctx;
131 }
132
133 /*
134  * timerlat_aa_get_data - Get the per-cpu data from the timerlat context
135  */
136 static struct timerlat_aa_data
137 *timerlat_aa_get_data(struct timerlat_aa_context *taa_ctx, int cpu)
138 {
139         return &taa_ctx->taa_data[cpu];
140 }
141
142 /*
143  * timerlat_aa_irq_latency - Handles timerlat IRQ event
144  */
145 static int timerlat_aa_irq_latency(struct timerlat_aa_data *taa_data,
146                                    struct trace_seq *s, struct tep_record *record,
147                                    struct tep_event *event)
148 {
149         /*
150          * For interference, we start now looking for things that can delay
151          * the thread.
152          */
153         taa_data->curr_state = TIMERLAT_WAITING_THREAD;
154         taa_data->tlat_irq_timstamp = record->ts;
155
156         /*
157          * Zero values.
158          */
159         taa_data->thread_nmi_sum = 0;
160         taa_data->thread_irq_sum = 0;
161         taa_data->thread_softirq_sum = 0;
162         taa_data->thread_blocking_duration = 0;
163         taa_data->timer_irq_start_time = 0;
164         taa_data->timer_irq_duration = 0;
165         taa_data->timer_exit_from_idle = 0;
166
167         /*
168          * Zero interference tasks.
169          */
170         trace_seq_reset(taa_data->nmi_seq);
171         trace_seq_reset(taa_data->irqs_seq);
172         trace_seq_reset(taa_data->softirqs_seq);
173         trace_seq_reset(taa_data->threads_seq);
174
175         /* IRQ latency values */
176         tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_irq_latency, 1);
177         tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_irq_seqnum, 1);
178
179         /* The thread that can cause blocking */
180         tep_get_common_field_val(s, event, "common_pid", record, &taa_data->run_thread_pid, 1);
181
182         /*
183          * Get exit from idle case.
184          *
185          * If it is not idle thread:
186          */
187         if (taa_data->run_thread_pid)
188                 return 0;
189
190         /*
191          * if the latency is shorter than the known exit from idle:
192          */
193         if (taa_data->tlat_irq_latency < taa_data->max_exit_idle_latency)
194                 return 0;
195
196         /*
197          * To be safe, ignore the cases in which an IRQ/NMI could have
198          * interfered with the timerlat IRQ.
199          */
200         if (taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency
201             < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
202                 return 0;
203
204         taa_data->max_exit_idle_latency = taa_data->tlat_irq_latency;
205
206         return 0;
207 }
208
209 /*
210  * timerlat_aa_thread_latency - Handles timerlat thread event
211  */
212 static int timerlat_aa_thread_latency(struct timerlat_aa_data *taa_data,
213                                       struct trace_seq *s, struct tep_record *record,
214                                       struct tep_event *event)
215 {
216         /*
217          * For interference, we start now looking for things that can delay
218          * the IRQ of the next cycle.
219          */
220         taa_data->curr_state = TIMERLAT_WAITING_IRQ;
221         taa_data->tlat_thread_timstamp = record->ts;
222
223         /* Thread latency values */
224         tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_thread_latency, 1);
225         tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_thread_seqnum, 1);
226
227         return 0;
228 }
229
230 /*
231  * timerlat_aa_handler - Handle timerlat events
232  *
233  * This function is called to handle timerlat events recording statistics.
234  *
235  * Returns 0 on success, -1 otherwise.
236  */
237 static int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record,
238                         struct tep_event *event, void *context)
239 {
240         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
241         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
242         unsigned long long thread;
243
244         if (!taa_data)
245                 return -1;
246
247         tep_get_field_val(s, event, "context", record, &thread, 1);
248         if (!thread)
249                 return timerlat_aa_irq_latency(taa_data, s, record, event);
250         else
251                 return timerlat_aa_thread_latency(taa_data, s, record, event);
252 }
253
254 /*
255  * timerlat_aa_nmi_handler - Handles NMI noise
256  *
257  * It is used to collect information about interferences from NMI. It is
258  * hooked to the osnoise:nmi_noise event.
259  */
260 static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *record,
261                                    struct tep_event *event, void *context)
262 {
263         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
264         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
265         unsigned long long duration;
266         unsigned long long start;
267
268         tep_get_field_val(s, event, "duration", record, &duration, 1);
269         tep_get_field_val(s, event, "start", record, &start, 1);
270
271         if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
272                 taa_data->prev_irq_duration = duration;
273                 taa_data->prev_irq_timstamp = start;
274
275                 trace_seq_reset(taa_data->prev_irqs_seq);
276                 trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s       \t\t\t%9.2f us\n",
277                          "nmi", ns_to_usf(duration));
278                 return 0;
279         }
280
281         taa_data->thread_nmi_sum += duration;
282         trace_seq_printf(taa_data->nmi_seq, "   %24s    \t\t\t%9.2f us\n",
283                  "nmi", ns_to_usf(duration));
284
285         return 0;
286 }
287
288 /*
289  * timerlat_aa_irq_handler - Handles IRQ noise
290  *
291  * It is used to collect information about interferences from IRQ. It is
292  * hooked to the osnoise:irq_noise event.
293  *
294  * It is a little bit more complex than the other because it measures:
295  *      - The IRQs that can delay the timer IRQ before it happened.
296  *      - The Timerlat IRQ handler
297  *      - The IRQs that happened between the timerlat IRQ and the timerlat thread
298  *        (IRQ interference).
299  */
300 static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *record,
301                                    struct tep_event *event, void *context)
302 {
303         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
304         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
305         unsigned long long expected_start;
306         unsigned long long duration;
307         unsigned long long vector;
308         unsigned long long start;
309         char *desc;
310         int val;
311
312         tep_get_field_val(s, event, "duration", record, &duration, 1);
313         tep_get_field_val(s, event, "start", record, &start, 1);
314         tep_get_field_val(s, event, "vector", record, &vector, 1);
315         desc = tep_get_field_raw(s, event, "desc", record, &val, 1);
316
317         /*
318          * Before the timerlat IRQ.
319          */
320         if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
321                 taa_data->prev_irq_duration = duration;
322                 taa_data->prev_irq_timstamp = start;
323
324                 trace_seq_reset(taa_data->prev_irqs_seq);
325                 trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s:%-3llu        \t\t%9.2f us\n",
326                                  desc, vector, ns_to_usf(duration));
327                 return 0;
328         }
329
330         /*
331          * The timerlat IRQ: taa_data->timer_irq_start_time is zeroed at
332          * the timerlat irq handler.
333          */
334         if (!taa_data->timer_irq_start_time) {
335                 expected_start = taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency;
336
337                 taa_data->timer_irq_start_time = start;
338                 taa_data->timer_irq_duration = duration;
339
340                 taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start;
341
342                 /*
343                  * not exit from idle.
344                  */
345                 if (taa_data->run_thread_pid)
346                         return 0;
347
348                 if (expected_start > taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
349                         taa_data->timer_exit_from_idle = taa_data->timer_irq_start_delay;
350
351                 return 0;
352         }
353
354         /*
355          * IRQ interference.
356          */
357         taa_data->thread_irq_sum += duration;
358         trace_seq_printf(taa_data->irqs_seq, "  %24s:%-3llu     \t      %9.2f us\n",
359                          desc, vector, ns_to_usf(duration));
360
361         return 0;
362 }
363
364 static char *softirq_name[] = { "HI", "TIMER",  "NET_TX", "NET_RX", "BLOCK",
365                                 "IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" };
366
367
368 /*
369  * timerlat_aa_softirq_handler - Handles Softirq noise
370  *
371  * It is used to collect information about interferences from Softirq. It is
372  * hooked to the osnoise:softirq_noise event.
373  *
374  * It is only printed in the non-rt kernel, as softirqs become thread on RT.
375  */
376 static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *record,
377                                        struct tep_event *event, void *context)
378 {
379         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
380         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
381         unsigned long long duration;
382         unsigned long long vector;
383         unsigned long long start;
384
385         if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
386                 return 0;
387
388         tep_get_field_val(s, event, "duration", record, &duration, 1);
389         tep_get_field_val(s, event, "start", record, &start, 1);
390         tep_get_field_val(s, event, "vector", record, &vector, 1);
391
392         taa_data->thread_softirq_sum += duration;
393
394         trace_seq_printf(taa_data->softirqs_seq, "\t%24s:%-3llu \t      %9.2f us\n",
395                          softirq_name[vector], vector, ns_to_usf(duration));
396         return 0;
397 }
398
399 /*
400  * timerlat_aa_softirq_handler - Handles thread noise
401  *
402  * It is used to collect information about interferences from threads. It is
403  * hooked to the osnoise:thread_noise event.
404  *
405  * Note: if you see thread noise, your timerlat thread was not the highest prio one.
406  */
407 static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *record,
408                                       struct tep_event *event, void *context)
409 {
410         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
411         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
412         unsigned long long duration;
413         unsigned long long start;
414         unsigned long long pid;
415         const char *comm;
416         int val;
417
418         if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
419                 return 0;
420
421         tep_get_field_val(s, event, "duration", record, &duration, 1);
422         tep_get_field_val(s, event, "start", record, &start, 1);
423
424         tep_get_common_field_val(s, event, "common_pid", record, &pid, 1);
425         comm = tep_get_field_raw(s, event, "comm", record, &val, 1);
426
427         if (pid == taa_data->run_thread_pid && !taa_data->thread_blocking_duration) {
428                 taa_data->thread_blocking_duration = duration;
429
430                 if (comm)
431                         strncpy(taa_data->run_thread_comm, comm, MAX_COMM);
432                 else
433                         sprintf(taa_data->run_thread_comm, "<...>");
434
435         } else {
436                 taa_data->thread_thread_sum += duration;
437
438                 trace_seq_printf(taa_data->threads_seq, "\t%24s:%-3llu  \t\t%9.2f us\n",
439                          comm, pid, ns_to_usf(duration));
440         }
441
442         return 0;
443 }
444
445 /*
446  * timerlat_aa_stack_handler - Handles timerlat IRQ stack trace
447  *
448  * Saves and parse the stack trace generated by the timerlat IRQ.
449  */
450 static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *record,
451                               struct tep_event *event, void *context)
452 {
453         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
454         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
455         unsigned long *caller;
456         const char *function;
457         int val, i;
458
459         trace_seq_reset(taa_data->stack_seq);
460
461         trace_seq_printf(taa_data->stack_seq, "    Blocking thread stack trace\n");
462         caller = tep_get_field_raw(s, event, "caller", record, &val, 1);
463         if (caller) {
464                 for (i = 0; ; i++) {
465                         function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]);
466                         if (!function)
467                                 break;
468                         trace_seq_printf(taa_data->stack_seq, "\t\t-> %s\n", function);
469                 }
470         }
471         return 0;
472 }
473
474 /*
475  * timerlat_aa_sched_switch_handler - Tracks the current thread running on the CPU
476  *
477  * Handles the sched:sched_switch event to trace the current thread running on the
478  * CPU. It is used to display the threads running on the other CPUs when the trace
479  * stops.
480  */
481 static int timerlat_aa_sched_switch_handler(struct trace_seq *s, struct tep_record *record,
482                                             struct tep_event *event, void *context)
483 {
484         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
485         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
486         const char *comm;
487         int val;
488
489         tep_get_field_val(s, event, "next_pid", record, &taa_data->current_pid, 1);
490         comm = tep_get_field_raw(s, event, "next_comm", record, &val, 1);
491
492         strncpy(taa_data->current_comm, comm, MAX_COMM);
493
494         /*
495          * If this was a kworker, clean the last kworkers that ran.
496          */
497         taa_data->kworker = 0;
498         taa_data->kworker_func = 0;
499
500         return 0;
501 }
502
503 /*
504  * timerlat_aa_kworker_start_handler - Tracks a kworker running on the CPU
505  *
506  * Handles workqueue:workqueue_execute_start event, keeping track of
507  * the job that a kworker could be doing in the CPU.
508  *
509  * We already catch problems of hardware related latencies caused by work queues
510  * running driver code that causes hardware stall. For example, with DRM drivers.
511  */
512 static int timerlat_aa_kworker_start_handler(struct trace_seq *s, struct tep_record *record,
513                                              struct tep_event *event, void *context)
514 {
515         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
516         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
517
518         tep_get_field_val(s, event, "work", record, &taa_data->kworker, 1);
519         tep_get_field_val(s, event, "function", record, &taa_data->kworker_func, 1);
520         return 0;
521 }
522
523 /*
524  * timerlat_thread_analysis - Prints the analysis of a CPU that hit a stop tracing
525  *
526  * This is the core of the analysis.
527  */
528 static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu,
529                                      int irq_thresh, int thread_thresh)
530 {
531         unsigned long long exp_irq_ts;
532         int total;
533         int irq;
534
535         /*
536          * IRQ latency or Thread latency?
537          */
538         if (taa_data->tlat_irq_seqnum > taa_data->tlat_thread_seqnum) {
539                 irq = 1;
540                 total = taa_data->tlat_irq_latency;
541         } else {
542                 irq = 0;
543                 total = taa_data->tlat_thread_latency;
544         }
545
546         /*
547          * Expected IRQ arrival time using the trace clock as the base.
548          */
549         exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay;
550
551         if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
552                 printf("  Previous IRQ interference:    \t\t up to  %9.2f us\n",
553                         ns_to_usf(taa_data->prev_irq_duration));
554
555         /*
556          * The delay that the IRQ suffered before starting.
557          */
558         printf("  IRQ handler delay:            %16s    %9.2f us (%.2f %%)\n",
559                 (ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "",
560                 ns_to_usf(taa_data->timer_irq_start_delay),
561                 ns_to_per(total, taa_data->timer_irq_start_delay));
562
563         /*
564          * Timerlat IRQ.
565          */
566         printf("  IRQ latency:  \t\t\t\t        %9.2f us\n",
567                 ns_to_usf(taa_data->tlat_irq_latency));
568
569         if (irq) {
570                 /*
571                  * If the trace stopped due to IRQ, the other events will not happen
572                  * because... the trace stopped :-).
573                  *
574                  * That is all folks, the stack trace was printed before the stop,
575                  * so it will be displayed, it is the key.
576                  */
577                 printf("  Blocking thread:\n");
578                 printf("        %24s:%-9llu\n",
579                         taa_data->run_thread_comm, taa_data->run_thread_pid);
580         } else  {
581                 /*
582                  * The duration of the IRQ handler that handled the timerlat IRQ.
583                  */
584                 printf("  Timerlat IRQ duration:        \t\t    %9.2f us (%.2f %%)\n",
585                         ns_to_usf(taa_data->timer_irq_duration),
586                         ns_to_per(total, taa_data->timer_irq_duration));
587
588                 /*
589                  * The amount of time that the current thread postponed the scheduler.
590                  *
591                  * Recalling that it is net from NMI/IRQ/Softirq interference, so there
592                  * is no need to compute values here.
593                  */
594                 printf("  Blocking thread:      \t\t\t  %9.2f us (%.2f %%)\n",
595                         ns_to_usf(taa_data->thread_blocking_duration),
596                         ns_to_per(total, taa_data->thread_blocking_duration));
597
598                 printf("        %24s:%-9llu             %9.2f us\n",
599                         taa_data->run_thread_comm, taa_data->run_thread_pid,
600                         ns_to_usf(taa_data->thread_blocking_duration));
601         }
602
603         /*
604          * Print the stack trace!
605          */
606         trace_seq_do_printf(taa_data->stack_seq);
607
608         /*
609          * NMIs can happen during the IRQ, so they are always possible.
610          */
611         if (taa_data->thread_nmi_sum)
612                 printf("  NMI interference      \t\t\t  %9.2f us (%.2f %%)\n",
613                         ns_to_usf(taa_data->thread_nmi_sum),
614                         ns_to_per(total, taa_data->thread_nmi_sum));
615
616         /*
617          * If it is an IRQ latency, the other factors can be skipped.
618          */
619         if (irq)
620                 goto print_total;
621
622         /*
623          * Prints the interference caused by IRQs to the thread latency.
624          */
625         if (taa_data->thread_irq_sum) {
626                 printf("  IRQ interference      \t\t\t  %9.2f us (%.2f %%)\n",
627                         ns_to_usf(taa_data->thread_irq_sum),
628                         ns_to_per(total, taa_data->thread_irq_sum));
629
630                 trace_seq_do_printf(taa_data->irqs_seq);
631         }
632
633         /*
634          * Prints the interference caused by Softirqs to the thread latency.
635          */
636         if (taa_data->thread_softirq_sum) {
637                 printf("  Softirq interference  \t\t\t  %9.2f us (%.2f %%)\n",
638                         ns_to_usf(taa_data->thread_softirq_sum),
639                         ns_to_per(total, taa_data->thread_softirq_sum));
640
641                 trace_seq_do_printf(taa_data->softirqs_seq);
642         }
643
644         /*
645          * Prints the interference caused by other threads to the thread latency.
646          *
647          * If this happens, your timerlat is not the highest prio. OK, migration
648          * thread can happen. But otherwise, you are not measuring the "scheduling
649          * latency" only, and here is the difference from scheduling latency and
650          * timer handling latency.
651          */
652         if (taa_data->thread_thread_sum) {
653                 printf("  Thread interference   \t\t\t  %9.2f us (%.2f %%)\n",
654                         ns_to_usf(taa_data->thread_thread_sum),
655                         ns_to_per(total, taa_data->thread_thread_sum));
656
657                 trace_seq_do_printf(taa_data->threads_seq);
658         }
659
660         /*
661          * Done.
662          */
663 print_total:
664         printf("------------------------------------------------------------------------\n");
665         printf("  %s latency:   \t\t\t  %9.2f us (100%%)\n", irq ? "IRQ" : "Thread",
666                 ns_to_usf(total));
667 }
668
669 static int timerlat_auto_analysis_collect_trace(struct timerlat_aa_context *taa_ctx)
670 {
671         struct trace_instance *trace = &taa_ctx->tool->trace;
672         int retval;
673
674         retval = tracefs_iterate_raw_events(trace->tep,
675                                             trace->inst,
676                                             NULL,
677                                             0,
678                                             collect_registered_events,
679                                             trace);
680                 if (retval < 0) {
681                         err_msg("Error iterating on events\n");
682                         return 0;
683                 }
684
685         return 1;
686 }
687
688 /**
689  * timerlat_auto_analysis - Analyze the collected data
690  */
691 void timerlat_auto_analysis(int irq_thresh, int thread_thresh)
692 {
693         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
694         unsigned long long max_exit_from_idle = 0;
695         struct timerlat_aa_data *taa_data;
696         int max_exit_from_idle_cpu;
697         struct tep_handle *tep;
698         int cpu;
699
700         timerlat_auto_analysis_collect_trace(taa_ctx);
701
702         /* bring stop tracing to the ns scale */
703         irq_thresh = irq_thresh * 1000;
704         thread_thresh = thread_thresh * 1000;
705
706         for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
707                 taa_data = timerlat_aa_get_data(taa_ctx, cpu);
708
709                 if (irq_thresh && taa_data->tlat_irq_latency >= irq_thresh) {
710                         printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
711                         timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
712                 } else if (thread_thresh && (taa_data->tlat_thread_latency) >= thread_thresh) {
713                         printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
714                         timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
715                 }
716
717                 if (taa_data->max_exit_idle_latency > max_exit_from_idle) {
718                         max_exit_from_idle = taa_data->max_exit_idle_latency;
719                         max_exit_from_idle_cpu = cpu;
720                 }
721
722         }
723
724         if (max_exit_from_idle) {
725                 printf("\n");
726                 printf("Max timerlat IRQ latency from idle: %.2f us in cpu %d\n",
727                         ns_to_usf(max_exit_from_idle), max_exit_from_idle_cpu);
728         }
729         if (!taa_ctx->dump_tasks)
730                 return;
731
732         printf("\n");
733         printf("Printing CPU tasks:\n");
734         for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
735                 taa_data = timerlat_aa_get_data(taa_ctx, cpu);
736                 tep = taa_ctx->tool->trace.tep;
737
738                 printf("    [%.3d] %24s:%llu", cpu, taa_data->current_comm, taa_data->current_pid);
739
740                 if (taa_data->kworker_func)
741                         printf(" kworker:%s:%s",
742                                 tep_find_function(tep, taa_data->kworker) ? : "<...>",
743                                 tep_find_function(tep, taa_data->kworker_func));
744                 printf("\n");
745         }
746
747 }
748
749 /*
750  * timerlat_aa_destroy_seqs - Destroy seq files used to store parsed data
751  */
752 static void timerlat_aa_destroy_seqs(struct timerlat_aa_context *taa_ctx)
753 {
754         struct timerlat_aa_data *taa_data;
755         int i;
756
757         if (!taa_ctx->taa_data)
758                 return;
759
760         for (i = 0; i < taa_ctx->nr_cpus; i++) {
761                 taa_data = timerlat_aa_get_data(taa_ctx, i);
762
763                 if (taa_data->prev_irqs_seq) {
764                         trace_seq_destroy(taa_data->prev_irqs_seq);
765                         free(taa_data->prev_irqs_seq);
766                 }
767
768                 if (taa_data->nmi_seq) {
769                         trace_seq_destroy(taa_data->nmi_seq);
770                         free(taa_data->nmi_seq);
771                 }
772
773                 if (taa_data->irqs_seq) {
774                         trace_seq_destroy(taa_data->irqs_seq);
775                         free(taa_data->irqs_seq);
776                 }
777
778                 if (taa_data->softirqs_seq) {
779                         trace_seq_destroy(taa_data->softirqs_seq);
780                         free(taa_data->softirqs_seq);
781                 }
782
783                 if (taa_data->threads_seq) {
784                         trace_seq_destroy(taa_data->threads_seq);
785                         free(taa_data->threads_seq);
786                 }
787
788                 if (taa_data->stack_seq) {
789                         trace_seq_destroy(taa_data->stack_seq);
790                         free(taa_data->stack_seq);
791                 }
792         }
793 }
794
795 /*
796  * timerlat_aa_init_seqs - Init seq files used to store parsed information
797  *
798  * Instead of keeping data structures to store raw data, use seq files to
799  * store parsed data.
800  *
801  * Allocates and initialize seq files.
802  *
803  * Returns 0 on success, -1 otherwise.
804  */
805 static int timerlat_aa_init_seqs(struct timerlat_aa_context *taa_ctx)
806 {
807         struct timerlat_aa_data *taa_data;
808         int i;
809
810         for (i = 0; i < taa_ctx->nr_cpus; i++) {
811
812                 taa_data = timerlat_aa_get_data(taa_ctx, i);
813
814                 taa_data->prev_irqs_seq = calloc(1, sizeof(*taa_data->prev_irqs_seq));
815                 if (!taa_data->prev_irqs_seq)
816                         goto out_err;
817
818                 trace_seq_init(taa_data->prev_irqs_seq);
819
820                 taa_data->nmi_seq = calloc(1, sizeof(*taa_data->nmi_seq));
821                 if (!taa_data->nmi_seq)
822                         goto out_err;
823
824                 trace_seq_init(taa_data->nmi_seq);
825
826                 taa_data->irqs_seq = calloc(1, sizeof(*taa_data->irqs_seq));
827                 if (!taa_data->irqs_seq)
828                         goto out_err;
829
830                 trace_seq_init(taa_data->irqs_seq);
831
832                 taa_data->softirqs_seq = calloc(1, sizeof(*taa_data->softirqs_seq));
833                 if (!taa_data->softirqs_seq)
834                         goto out_err;
835
836                 trace_seq_init(taa_data->softirqs_seq);
837
838                 taa_data->threads_seq = calloc(1, sizeof(*taa_data->threads_seq));
839                 if (!taa_data->threads_seq)
840                         goto out_err;
841
842                 trace_seq_init(taa_data->threads_seq);
843
844                 taa_data->stack_seq = calloc(1, sizeof(*taa_data->stack_seq));
845                 if (!taa_data->stack_seq)
846                         goto out_err;
847
848                 trace_seq_init(taa_data->stack_seq);
849         }
850
851         return 0;
852
853 out_err:
854         timerlat_aa_destroy_seqs(taa_ctx);
855         return -1;
856 }
857
858 /*
859  * timerlat_aa_unregister_events - Unregister events used in the auto-analysis
860  */
861 static void timerlat_aa_unregister_events(struct osnoise_tool *tool, int dump_tasks)
862 {
863
864         tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
865                                      timerlat_aa_handler, tool);
866
867         tracefs_event_disable(tool->trace.inst, "osnoise", NULL);
868
869         tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
870                                      timerlat_aa_nmi_handler, tool);
871
872         tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
873                                      timerlat_aa_irq_handler, tool);
874
875         tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
876                                      timerlat_aa_softirq_handler, tool);
877
878         tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
879                                      timerlat_aa_thread_handler, tool);
880
881         tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
882                                      timerlat_aa_stack_handler, tool);
883         if (!dump_tasks)
884                 return;
885
886         tracefs_event_disable(tool->trace.inst, "sched", "sched_switch");
887         tep_unregister_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
888                                      timerlat_aa_sched_switch_handler, tool);
889
890         tracefs_event_disable(tool->trace.inst, "workqueue", "workqueue_execute_start");
891         tep_unregister_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
892                                      timerlat_aa_kworker_start_handler, tool);
893 }
894
895 /*
896  * timerlat_aa_register_events - Register events used in the auto-analysis
897  *
898  * Returns 0 on success, -1 otherwise.
899  */
900 static int timerlat_aa_register_events(struct osnoise_tool *tool, int dump_tasks)
901 {
902         int retval;
903
904         tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
905                                 timerlat_aa_handler, tool);
906
907
908         /*
909          * register auto-analysis handlers.
910          */
911         retval = tracefs_event_enable(tool->trace.inst, "osnoise", NULL);
912         if (retval < 0 && !errno) {
913                 err_msg("Could not find osnoise events\n");
914                 goto out_err;
915         }
916
917         tep_register_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
918                                    timerlat_aa_nmi_handler, tool);
919
920         tep_register_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
921                                    timerlat_aa_irq_handler, tool);
922
923         tep_register_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
924                                    timerlat_aa_softirq_handler, tool);
925
926         tep_register_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
927                                    timerlat_aa_thread_handler, tool);
928
929         tep_register_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
930                                    timerlat_aa_stack_handler, tool);
931
932         if (!dump_tasks)
933                 return 0;
934
935         /*
936          * Dump task events.
937          */
938         retval = tracefs_event_enable(tool->trace.inst, "sched", "sched_switch");
939         if (retval < 0 && !errno) {
940                 err_msg("Could not find sched_switch\n");
941                 goto out_err;
942         }
943
944         tep_register_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
945                                    timerlat_aa_sched_switch_handler, tool);
946
947         retval = tracefs_event_enable(tool->trace.inst, "workqueue", "workqueue_execute_start");
948         if (retval < 0 && !errno) {
949                 err_msg("Could not find workqueue_execute_start\n");
950                 goto out_err;
951         }
952
953         tep_register_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
954                                    timerlat_aa_kworker_start_handler, tool);
955
956         return 0;
957
958 out_err:
959         timerlat_aa_unregister_events(tool, dump_tasks);
960         return -1;
961 }
962
963 /**
964  * timerlat_aa_destroy - Destroy timerlat auto-analysis
965  */
966 void timerlat_aa_destroy(void)
967 {
968         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
969
970         if (!taa_ctx)
971                 return;
972
973         if (!taa_ctx->taa_data)
974                 goto out_ctx;
975
976         timerlat_aa_unregister_events(taa_ctx->tool, taa_ctx->dump_tasks);
977         timerlat_aa_destroy_seqs(taa_ctx);
978         free(taa_ctx->taa_data);
979 out_ctx:
980         free(taa_ctx);
981 }
982
983 /**
984  * timerlat_aa_init - Initialize timerlat auto-analysis
985  *
986  * Returns 0 on success, -1 otherwise.
987  */
988 int timerlat_aa_init(struct osnoise_tool *tool, int dump_tasks)
989 {
990         int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
991         struct timerlat_aa_context *taa_ctx;
992         int retval;
993
994         taa_ctx = calloc(1, sizeof(*taa_ctx));
995         if (!taa_ctx)
996                 return -1;
997
998         __timerlat_aa_ctx = taa_ctx;
999
1000         taa_ctx->nr_cpus = nr_cpus;
1001         taa_ctx->tool = tool;
1002         taa_ctx->dump_tasks = dump_tasks;
1003
1004         taa_ctx->taa_data = calloc(nr_cpus, sizeof(*taa_ctx->taa_data));
1005         if (!taa_ctx->taa_data)
1006                 goto out_err;
1007
1008         retval = timerlat_aa_init_seqs(taa_ctx);
1009         if (retval)
1010                 goto out_err;
1011
1012         retval = timerlat_aa_register_events(tool, dump_tasks);
1013         if (retval)
1014                 goto out_err;
1015
1016         return 0;
1017
1018 out_err:
1019         timerlat_aa_destroy();
1020         return -1;
1021 }