1ea3f969c36b0b8e0e376afbdcf0db7f1040e564
[platform/kernel/linux-starfive.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include "trace.h"
54 #include "trace_output.h"
55
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80         if (!tracing_selftest_disabled) {
81                 tracing_selftest_disabled = true;
82                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
83         }
84 }
85 #endif
86
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95         { }
96 };
97
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101         return 0;
102 }
103
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118
119 cpumask_var_t __read_mostly     tracing_buffer_mask;
120
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145         struct module                   *mod;
146         unsigned long                   length;
147 };
148
149 union trace_eval_map_item;
150
151 struct trace_eval_map_tail {
152         /*
153          * "end" is first and points to NULL as it must be different
154          * than "mod" or "eval_string"
155          */
156         union trace_eval_map_item       *next;
157         const char                      *end;   /* points to NULL */
158 };
159
160 static DEFINE_MUTEX(trace_eval_mutex);
161
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170         struct trace_eval_map           map;
171         struct trace_eval_map_head      head;
172         struct trace_eval_map_tail      tail;
173 };
174
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180                                    struct trace_buffer *buffer,
181                                    unsigned int trace_ctx);
182
183 #define MAX_TRACER_SIZE         100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186
187 static bool allocate_snapshot;
188 static bool snapshot_at_boot;
189
190 static int __init set_cmdline_ftrace(char *str)
191 {
192         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
193         default_bootup_tracer = bootup_tracer_buf;
194         /* We are using ftrace early, expand it */
195         ring_buffer_expanded = true;
196         return 1;
197 }
198 __setup("ftrace=", set_cmdline_ftrace);
199
200 static int __init set_ftrace_dump_on_oops(char *str)
201 {
202         if (*str++ != '=' || !*str || !strcmp("1", str)) {
203                 ftrace_dump_on_oops = DUMP_ALL;
204                 return 1;
205         }
206
207         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
208                 ftrace_dump_on_oops = DUMP_ORIG;
209                 return 1;
210         }
211
212         return 0;
213 }
214 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
215
216 static int __init stop_trace_on_warning(char *str)
217 {
218         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
219                 __disable_trace_on_warning = 1;
220         return 1;
221 }
222 __setup("traceoff_on_warning", stop_trace_on_warning);
223
224 static int __init boot_alloc_snapshot(char *str)
225 {
226         allocate_snapshot = true;
227         /* We also need the main ring buffer expanded */
228         ring_buffer_expanded = true;
229         return 1;
230 }
231 __setup("alloc_snapshot", boot_alloc_snapshot);
232
233
234 static int __init boot_snapshot(char *str)
235 {
236         snapshot_at_boot = true;
237         boot_alloc_snapshot(str);
238         return 1;
239 }
240 __setup("ftrace_boot_snapshot", boot_snapshot);
241
242
243 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
244
245 static int __init set_trace_boot_options(char *str)
246 {
247         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
248         return 1;
249 }
250 __setup("trace_options=", set_trace_boot_options);
251
252 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
253 static char *trace_boot_clock __initdata;
254
255 static int __init set_trace_boot_clock(char *str)
256 {
257         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
258         trace_boot_clock = trace_boot_clock_buf;
259         return 1;
260 }
261 __setup("trace_clock=", set_trace_boot_clock);
262
263 static int __init set_tracepoint_printk(char *str)
264 {
265         /* Ignore the "tp_printk_stop_on_boot" param */
266         if (*str == '_')
267                 return 0;
268
269         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
270                 tracepoint_printk = 1;
271         return 1;
272 }
273 __setup("tp_printk", set_tracepoint_printk);
274
275 static int __init set_tracepoint_printk_stop(char *str)
276 {
277         tracepoint_printk_stop_on_boot = true;
278         return 1;
279 }
280 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
281
282 unsigned long long ns2usecs(u64 nsec)
283 {
284         nsec += 500;
285         do_div(nsec, 1000);
286         return nsec;
287 }
288
289 static void
290 trace_process_export(struct trace_export *export,
291                struct ring_buffer_event *event, int flag)
292 {
293         struct trace_entry *entry;
294         unsigned int size = 0;
295
296         if (export->flags & flag) {
297                 entry = ring_buffer_event_data(event);
298                 size = ring_buffer_event_length(event);
299                 export->write(export, entry, size);
300         }
301 }
302
303 static DEFINE_MUTEX(ftrace_export_lock);
304
305 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
306
307 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
308 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
309 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
310
311 static inline void ftrace_exports_enable(struct trace_export *export)
312 {
313         if (export->flags & TRACE_EXPORT_FUNCTION)
314                 static_branch_inc(&trace_function_exports_enabled);
315
316         if (export->flags & TRACE_EXPORT_EVENT)
317                 static_branch_inc(&trace_event_exports_enabled);
318
319         if (export->flags & TRACE_EXPORT_MARKER)
320                 static_branch_inc(&trace_marker_exports_enabled);
321 }
322
323 static inline void ftrace_exports_disable(struct trace_export *export)
324 {
325         if (export->flags & TRACE_EXPORT_FUNCTION)
326                 static_branch_dec(&trace_function_exports_enabled);
327
328         if (export->flags & TRACE_EXPORT_EVENT)
329                 static_branch_dec(&trace_event_exports_enabled);
330
331         if (export->flags & TRACE_EXPORT_MARKER)
332                 static_branch_dec(&trace_marker_exports_enabled);
333 }
334
335 static void ftrace_exports(struct ring_buffer_event *event, int flag)
336 {
337         struct trace_export *export;
338
339         preempt_disable_notrace();
340
341         export = rcu_dereference_raw_check(ftrace_exports_list);
342         while (export) {
343                 trace_process_export(export, event, flag);
344                 export = rcu_dereference_raw_check(export->next);
345         }
346
347         preempt_enable_notrace();
348 }
349
350 static inline void
351 add_trace_export(struct trace_export **list, struct trace_export *export)
352 {
353         rcu_assign_pointer(export->next, *list);
354         /*
355          * We are entering export into the list but another
356          * CPU might be walking that list. We need to make sure
357          * the export->next pointer is valid before another CPU sees
358          * the export pointer included into the list.
359          */
360         rcu_assign_pointer(*list, export);
361 }
362
363 static inline int
364 rm_trace_export(struct trace_export **list, struct trace_export *export)
365 {
366         struct trace_export **p;
367
368         for (p = list; *p != NULL; p = &(*p)->next)
369                 if (*p == export)
370                         break;
371
372         if (*p != export)
373                 return -1;
374
375         rcu_assign_pointer(*p, (*p)->next);
376
377         return 0;
378 }
379
380 static inline void
381 add_ftrace_export(struct trace_export **list, struct trace_export *export)
382 {
383         ftrace_exports_enable(export);
384
385         add_trace_export(list, export);
386 }
387
388 static inline int
389 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
390 {
391         int ret;
392
393         ret = rm_trace_export(list, export);
394         ftrace_exports_disable(export);
395
396         return ret;
397 }
398
399 int register_ftrace_export(struct trace_export *export)
400 {
401         if (WARN_ON_ONCE(!export->write))
402                 return -1;
403
404         mutex_lock(&ftrace_export_lock);
405
406         add_ftrace_export(&ftrace_exports_list, export);
407
408         mutex_unlock(&ftrace_export_lock);
409
410         return 0;
411 }
412 EXPORT_SYMBOL_GPL(register_ftrace_export);
413
414 int unregister_ftrace_export(struct trace_export *export)
415 {
416         int ret;
417
418         mutex_lock(&ftrace_export_lock);
419
420         ret = rm_ftrace_export(&ftrace_exports_list, export);
421
422         mutex_unlock(&ftrace_export_lock);
423
424         return ret;
425 }
426 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
427
428 /* trace_flags holds trace_options default values */
429 #define TRACE_DEFAULT_FLAGS                                             \
430         (FUNCTION_DEFAULT_FLAGS |                                       \
431          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
432          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
433          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
434          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
435          TRACE_ITER_HASH_PTR)
436
437 /* trace_options that are only supported by global_trace */
438 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
439                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
440
441 /* trace_flags that are default zero for instances */
442 #define ZEROED_TRACE_FLAGS \
443         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
444
445 /*
446  * The global_trace is the descriptor that holds the top-level tracing
447  * buffers for the live tracing.
448  */
449 static struct trace_array global_trace = {
450         .trace_flags = TRACE_DEFAULT_FLAGS,
451 };
452
453 LIST_HEAD(ftrace_trace_arrays);
454
455 int trace_array_get(struct trace_array *this_tr)
456 {
457         struct trace_array *tr;
458         int ret = -ENODEV;
459
460         mutex_lock(&trace_types_lock);
461         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
462                 if (tr == this_tr) {
463                         tr->ref++;
464                         ret = 0;
465                         break;
466                 }
467         }
468         mutex_unlock(&trace_types_lock);
469
470         return ret;
471 }
472
473 static void __trace_array_put(struct trace_array *this_tr)
474 {
475         WARN_ON(!this_tr->ref);
476         this_tr->ref--;
477 }
478
479 /**
480  * trace_array_put - Decrement the reference counter for this trace array.
481  * @this_tr : pointer to the trace array
482  *
483  * NOTE: Use this when we no longer need the trace array returned by
484  * trace_array_get_by_name(). This ensures the trace array can be later
485  * destroyed.
486  *
487  */
488 void trace_array_put(struct trace_array *this_tr)
489 {
490         if (!this_tr)
491                 return;
492
493         mutex_lock(&trace_types_lock);
494         __trace_array_put(this_tr);
495         mutex_unlock(&trace_types_lock);
496 }
497 EXPORT_SYMBOL_GPL(trace_array_put);
498
499 int tracing_check_open_get_tr(struct trace_array *tr)
500 {
501         int ret;
502
503         ret = security_locked_down(LOCKDOWN_TRACEFS);
504         if (ret)
505                 return ret;
506
507         if (tracing_disabled)
508                 return -ENODEV;
509
510         if (tr && trace_array_get(tr) < 0)
511                 return -ENODEV;
512
513         return 0;
514 }
515
516 int call_filter_check_discard(struct trace_event_call *call, void *rec,
517                               struct trace_buffer *buffer,
518                               struct ring_buffer_event *event)
519 {
520         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
521             !filter_match_preds(call->filter, rec)) {
522                 __trace_event_discard_commit(buffer, event);
523                 return 1;
524         }
525
526         return 0;
527 }
528
529 /**
530  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
531  * @filtered_pids: The list of pids to check
532  * @search_pid: The PID to find in @filtered_pids
533  *
534  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
535  */
536 bool
537 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
538 {
539         return trace_pid_list_is_set(filtered_pids, search_pid);
540 }
541
542 /**
543  * trace_ignore_this_task - should a task be ignored for tracing
544  * @filtered_pids: The list of pids to check
545  * @filtered_no_pids: The list of pids not to be traced
546  * @task: The task that should be ignored if not filtered
547  *
548  * Checks if @task should be traced or not from @filtered_pids.
549  * Returns true if @task should *NOT* be traced.
550  * Returns false if @task should be traced.
551  */
552 bool
553 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
554                        struct trace_pid_list *filtered_no_pids,
555                        struct task_struct *task)
556 {
557         /*
558          * If filtered_no_pids is not empty, and the task's pid is listed
559          * in filtered_no_pids, then return true.
560          * Otherwise, if filtered_pids is empty, that means we can
561          * trace all tasks. If it has content, then only trace pids
562          * within filtered_pids.
563          */
564
565         return (filtered_pids &&
566                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
567                 (filtered_no_pids &&
568                  trace_find_filtered_pid(filtered_no_pids, task->pid));
569 }
570
571 /**
572  * trace_filter_add_remove_task - Add or remove a task from a pid_list
573  * @pid_list: The list to modify
574  * @self: The current task for fork or NULL for exit
575  * @task: The task to add or remove
576  *
577  * If adding a task, if @self is defined, the task is only added if @self
578  * is also included in @pid_list. This happens on fork and tasks should
579  * only be added when the parent is listed. If @self is NULL, then the
580  * @task pid will be removed from the list, which would happen on exit
581  * of a task.
582  */
583 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
584                                   struct task_struct *self,
585                                   struct task_struct *task)
586 {
587         if (!pid_list)
588                 return;
589
590         /* For forks, we only add if the forking task is listed */
591         if (self) {
592                 if (!trace_find_filtered_pid(pid_list, self->pid))
593                         return;
594         }
595
596         /* "self" is set for forks, and NULL for exits */
597         if (self)
598                 trace_pid_list_set(pid_list, task->pid);
599         else
600                 trace_pid_list_clear(pid_list, task->pid);
601 }
602
603 /**
604  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
605  * @pid_list: The pid list to show
606  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
607  * @pos: The position of the file
608  *
609  * This is used by the seq_file "next" operation to iterate the pids
610  * listed in a trace_pid_list structure.
611  *
612  * Returns the pid+1 as we want to display pid of zero, but NULL would
613  * stop the iteration.
614  */
615 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
616 {
617         long pid = (unsigned long)v;
618         unsigned int next;
619
620         (*pos)++;
621
622         /* pid already is +1 of the actual previous bit */
623         if (trace_pid_list_next(pid_list, pid, &next) < 0)
624                 return NULL;
625
626         pid = next;
627
628         /* Return pid + 1 to allow zero to be represented */
629         return (void *)(pid + 1);
630 }
631
632 /**
633  * trace_pid_start - Used for seq_file to start reading pid lists
634  * @pid_list: The pid list to show
635  * @pos: The position of the file
636  *
637  * This is used by seq_file "start" operation to start the iteration
638  * of listing pids.
639  *
640  * Returns the pid+1 as we want to display pid of zero, but NULL would
641  * stop the iteration.
642  */
643 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
644 {
645         unsigned long pid;
646         unsigned int first;
647         loff_t l = 0;
648
649         if (trace_pid_list_first(pid_list, &first) < 0)
650                 return NULL;
651
652         pid = first;
653
654         /* Return pid + 1 so that zero can be the exit value */
655         for (pid++; pid && l < *pos;
656              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
657                 ;
658         return (void *)pid;
659 }
660
661 /**
662  * trace_pid_show - show the current pid in seq_file processing
663  * @m: The seq_file structure to write into
664  * @v: A void pointer of the pid (+1) value to display
665  *
666  * Can be directly used by seq_file operations to display the current
667  * pid value.
668  */
669 int trace_pid_show(struct seq_file *m, void *v)
670 {
671         unsigned long pid = (unsigned long)v - 1;
672
673         seq_printf(m, "%lu\n", pid);
674         return 0;
675 }
676
677 /* 128 should be much more than enough */
678 #define PID_BUF_SIZE            127
679
680 int trace_pid_write(struct trace_pid_list *filtered_pids,
681                     struct trace_pid_list **new_pid_list,
682                     const char __user *ubuf, size_t cnt)
683 {
684         struct trace_pid_list *pid_list;
685         struct trace_parser parser;
686         unsigned long val;
687         int nr_pids = 0;
688         ssize_t read = 0;
689         ssize_t ret;
690         loff_t pos;
691         pid_t pid;
692
693         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
694                 return -ENOMEM;
695
696         /*
697          * Always recreate a new array. The write is an all or nothing
698          * operation. Always create a new array when adding new pids by
699          * the user. If the operation fails, then the current list is
700          * not modified.
701          */
702         pid_list = trace_pid_list_alloc();
703         if (!pid_list) {
704                 trace_parser_put(&parser);
705                 return -ENOMEM;
706         }
707
708         if (filtered_pids) {
709                 /* copy the current bits to the new max */
710                 ret = trace_pid_list_first(filtered_pids, &pid);
711                 while (!ret) {
712                         trace_pid_list_set(pid_list, pid);
713                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
714                         nr_pids++;
715                 }
716         }
717
718         ret = 0;
719         while (cnt > 0) {
720
721                 pos = 0;
722
723                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
724                 if (ret < 0)
725                         break;
726
727                 read += ret;
728                 ubuf += ret;
729                 cnt -= ret;
730
731                 if (!trace_parser_loaded(&parser))
732                         break;
733
734                 ret = -EINVAL;
735                 if (kstrtoul(parser.buffer, 0, &val))
736                         break;
737
738                 pid = (pid_t)val;
739
740                 if (trace_pid_list_set(pid_list, pid) < 0) {
741                         ret = -1;
742                         break;
743                 }
744                 nr_pids++;
745
746                 trace_parser_clear(&parser);
747                 ret = 0;
748         }
749         trace_parser_put(&parser);
750
751         if (ret < 0) {
752                 trace_pid_list_free(pid_list);
753                 return ret;
754         }
755
756         if (!nr_pids) {
757                 /* Cleared the list of pids */
758                 trace_pid_list_free(pid_list);
759                 pid_list = NULL;
760         }
761
762         *new_pid_list = pid_list;
763
764         return read;
765 }
766
767 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
768 {
769         u64 ts;
770
771         /* Early boot up does not have a buffer yet */
772         if (!buf->buffer)
773                 return trace_clock_local();
774
775         ts = ring_buffer_time_stamp(buf->buffer);
776         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
777
778         return ts;
779 }
780
781 u64 ftrace_now(int cpu)
782 {
783         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
784 }
785
786 /**
787  * tracing_is_enabled - Show if global_trace has been enabled
788  *
789  * Shows if the global trace has been enabled or not. It uses the
790  * mirror flag "buffer_disabled" to be used in fast paths such as for
791  * the irqsoff tracer. But it may be inaccurate due to races. If you
792  * need to know the accurate state, use tracing_is_on() which is a little
793  * slower, but accurate.
794  */
795 int tracing_is_enabled(void)
796 {
797         /*
798          * For quick access (irqsoff uses this in fast path), just
799          * return the mirror variable of the state of the ring buffer.
800          * It's a little racy, but we don't really care.
801          */
802         smp_rmb();
803         return !global_trace.buffer_disabled;
804 }
805
806 /*
807  * trace_buf_size is the size in bytes that is allocated
808  * for a buffer. Note, the number of bytes is always rounded
809  * to page size.
810  *
811  * This number is purposely set to a low number of 16384.
812  * If the dump on oops happens, it will be much appreciated
813  * to not have to wait for all that output. Anyway this can be
814  * boot time and run time configurable.
815  */
816 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
817
818 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
819
820 /* trace_types holds a link list of available tracers. */
821 static struct tracer            *trace_types __read_mostly;
822
823 /*
824  * trace_types_lock is used to protect the trace_types list.
825  */
826 DEFINE_MUTEX(trace_types_lock);
827
828 /*
829  * serialize the access of the ring buffer
830  *
831  * ring buffer serializes readers, but it is low level protection.
832  * The validity of the events (which returns by ring_buffer_peek() ..etc)
833  * are not protected by ring buffer.
834  *
835  * The content of events may become garbage if we allow other process consumes
836  * these events concurrently:
837  *   A) the page of the consumed events may become a normal page
838  *      (not reader page) in ring buffer, and this page will be rewritten
839  *      by events producer.
840  *   B) The page of the consumed events may become a page for splice_read,
841  *      and this page will be returned to system.
842  *
843  * These primitives allow multi process access to different cpu ring buffer
844  * concurrently.
845  *
846  * These primitives don't distinguish read-only and read-consume access.
847  * Multi read-only access are also serialized.
848  */
849
850 #ifdef CONFIG_SMP
851 static DECLARE_RWSEM(all_cpu_access_lock);
852 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
853
854 static inline void trace_access_lock(int cpu)
855 {
856         if (cpu == RING_BUFFER_ALL_CPUS) {
857                 /* gain it for accessing the whole ring buffer. */
858                 down_write(&all_cpu_access_lock);
859         } else {
860                 /* gain it for accessing a cpu ring buffer. */
861
862                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
863                 down_read(&all_cpu_access_lock);
864
865                 /* Secondly block other access to this @cpu ring buffer. */
866                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
867         }
868 }
869
870 static inline void trace_access_unlock(int cpu)
871 {
872         if (cpu == RING_BUFFER_ALL_CPUS) {
873                 up_write(&all_cpu_access_lock);
874         } else {
875                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
876                 up_read(&all_cpu_access_lock);
877         }
878 }
879
880 static inline void trace_access_lock_init(void)
881 {
882         int cpu;
883
884         for_each_possible_cpu(cpu)
885                 mutex_init(&per_cpu(cpu_access_lock, cpu));
886 }
887
888 #else
889
890 static DEFINE_MUTEX(access_lock);
891
892 static inline void trace_access_lock(int cpu)
893 {
894         (void)cpu;
895         mutex_lock(&access_lock);
896 }
897
898 static inline void trace_access_unlock(int cpu)
899 {
900         (void)cpu;
901         mutex_unlock(&access_lock);
902 }
903
904 static inline void trace_access_lock_init(void)
905 {
906 }
907
908 #endif
909
910 #ifdef CONFIG_STACKTRACE
911 static void __ftrace_trace_stack(struct trace_buffer *buffer,
912                                  unsigned int trace_ctx,
913                                  int skip, struct pt_regs *regs);
914 static inline void ftrace_trace_stack(struct trace_array *tr,
915                                       struct trace_buffer *buffer,
916                                       unsigned int trace_ctx,
917                                       int skip, struct pt_regs *regs);
918
919 #else
920 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
921                                         unsigned int trace_ctx,
922                                         int skip, struct pt_regs *regs)
923 {
924 }
925 static inline void ftrace_trace_stack(struct trace_array *tr,
926                                       struct trace_buffer *buffer,
927                                       unsigned long trace_ctx,
928                                       int skip, struct pt_regs *regs)
929 {
930 }
931
932 #endif
933
934 static __always_inline void
935 trace_event_setup(struct ring_buffer_event *event,
936                   int type, unsigned int trace_ctx)
937 {
938         struct trace_entry *ent = ring_buffer_event_data(event);
939
940         tracing_generic_entry_update(ent, type, trace_ctx);
941 }
942
943 static __always_inline struct ring_buffer_event *
944 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
945                           int type,
946                           unsigned long len,
947                           unsigned int trace_ctx)
948 {
949         struct ring_buffer_event *event;
950
951         event = ring_buffer_lock_reserve(buffer, len);
952         if (event != NULL)
953                 trace_event_setup(event, type, trace_ctx);
954
955         return event;
956 }
957
958 void tracer_tracing_on(struct trace_array *tr)
959 {
960         if (tr->array_buffer.buffer)
961                 ring_buffer_record_on(tr->array_buffer.buffer);
962         /*
963          * This flag is looked at when buffers haven't been allocated
964          * yet, or by some tracers (like irqsoff), that just want to
965          * know if the ring buffer has been disabled, but it can handle
966          * races of where it gets disabled but we still do a record.
967          * As the check is in the fast path of the tracers, it is more
968          * important to be fast than accurate.
969          */
970         tr->buffer_disabled = 0;
971         /* Make the flag seen by readers */
972         smp_wmb();
973 }
974
975 /**
976  * tracing_on - enable tracing buffers
977  *
978  * This function enables tracing buffers that may have been
979  * disabled with tracing_off.
980  */
981 void tracing_on(void)
982 {
983         tracer_tracing_on(&global_trace);
984 }
985 EXPORT_SYMBOL_GPL(tracing_on);
986
987
988 static __always_inline void
989 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
990 {
991         __this_cpu_write(trace_taskinfo_save, true);
992
993         /* If this is the temp buffer, we need to commit fully */
994         if (this_cpu_read(trace_buffered_event) == event) {
995                 /* Length is in event->array[0] */
996                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
997                 /* Release the temp buffer */
998                 this_cpu_dec(trace_buffered_event_cnt);
999                 /* ring_buffer_unlock_commit() enables preemption */
1000                 preempt_enable_notrace();
1001         } else
1002                 ring_buffer_unlock_commit(buffer, event);
1003 }
1004
1005 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1006                        const char *str, int size)
1007 {
1008         struct ring_buffer_event *event;
1009         struct trace_buffer *buffer;
1010         struct print_entry *entry;
1011         unsigned int trace_ctx;
1012         int alloc;
1013
1014         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1015                 return 0;
1016
1017         if (unlikely(tracing_selftest_running || tracing_disabled))
1018                 return 0;
1019
1020         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1021
1022         trace_ctx = tracing_gen_ctx();
1023         buffer = tr->array_buffer.buffer;
1024         ring_buffer_nest_start(buffer);
1025         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1026                                             trace_ctx);
1027         if (!event) {
1028                 size = 0;
1029                 goto out;
1030         }
1031
1032         entry = ring_buffer_event_data(event);
1033         entry->ip = ip;
1034
1035         memcpy(&entry->buf, str, size);
1036
1037         /* Add a newline if necessary */
1038         if (entry->buf[size - 1] != '\n') {
1039                 entry->buf[size] = '\n';
1040                 entry->buf[size + 1] = '\0';
1041         } else
1042                 entry->buf[size] = '\0';
1043
1044         __buffer_unlock_commit(buffer, event);
1045         ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1046  out:
1047         ring_buffer_nest_end(buffer);
1048         return size;
1049 }
1050 EXPORT_SYMBOL_GPL(__trace_array_puts);
1051
1052 /**
1053  * __trace_puts - write a constant string into the trace buffer.
1054  * @ip:    The address of the caller
1055  * @str:   The constant string to write
1056  * @size:  The size of the string.
1057  */
1058 int __trace_puts(unsigned long ip, const char *str, int size)
1059 {
1060         return __trace_array_puts(&global_trace, ip, str, size);
1061 }
1062 EXPORT_SYMBOL_GPL(__trace_puts);
1063
1064 /**
1065  * __trace_bputs - write the pointer to a constant string into trace buffer
1066  * @ip:    The address of the caller
1067  * @str:   The constant string to write to the buffer to
1068  */
1069 int __trace_bputs(unsigned long ip, const char *str)
1070 {
1071         struct ring_buffer_event *event;
1072         struct trace_buffer *buffer;
1073         struct bputs_entry *entry;
1074         unsigned int trace_ctx;
1075         int size = sizeof(struct bputs_entry);
1076         int ret = 0;
1077
1078         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1079                 return 0;
1080
1081         if (unlikely(tracing_selftest_running || tracing_disabled))
1082                 return 0;
1083
1084         trace_ctx = tracing_gen_ctx();
1085         buffer = global_trace.array_buffer.buffer;
1086
1087         ring_buffer_nest_start(buffer);
1088         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1089                                             trace_ctx);
1090         if (!event)
1091                 goto out;
1092
1093         entry = ring_buffer_event_data(event);
1094         entry->ip                       = ip;
1095         entry->str                      = str;
1096
1097         __buffer_unlock_commit(buffer, event);
1098         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1099
1100         ret = 1;
1101  out:
1102         ring_buffer_nest_end(buffer);
1103         return ret;
1104 }
1105 EXPORT_SYMBOL_GPL(__trace_bputs);
1106
1107 #ifdef CONFIG_TRACER_SNAPSHOT
1108 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1109                                            void *cond_data)
1110 {
1111         struct tracer *tracer = tr->current_trace;
1112         unsigned long flags;
1113
1114         if (in_nmi()) {
1115                 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1116                 trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1117                 return;
1118         }
1119
1120         if (!tr->allocated_snapshot) {
1121                 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1122                 trace_array_puts(tr, "*** stopping trace here!   ***\n");
1123                 tracer_tracing_off(tr);
1124                 return;
1125         }
1126
1127         /* Note, snapshot can not be used when the tracer uses it */
1128         if (tracer->use_max_tr) {
1129                 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1130                 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1131                 return;
1132         }
1133
1134         local_irq_save(flags);
1135         update_max_tr(tr, current, smp_processor_id(), cond_data);
1136         local_irq_restore(flags);
1137 }
1138
1139 void tracing_snapshot_instance(struct trace_array *tr)
1140 {
1141         tracing_snapshot_instance_cond(tr, NULL);
1142 }
1143
1144 /**
1145  * tracing_snapshot - take a snapshot of the current buffer.
1146  *
1147  * This causes a swap between the snapshot buffer and the current live
1148  * tracing buffer. You can use this to take snapshots of the live
1149  * trace when some condition is triggered, but continue to trace.
1150  *
1151  * Note, make sure to allocate the snapshot with either
1152  * a tracing_snapshot_alloc(), or by doing it manually
1153  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1154  *
1155  * If the snapshot buffer is not allocated, it will stop tracing.
1156  * Basically making a permanent snapshot.
1157  */
1158 void tracing_snapshot(void)
1159 {
1160         struct trace_array *tr = &global_trace;
1161
1162         tracing_snapshot_instance(tr);
1163 }
1164 EXPORT_SYMBOL_GPL(tracing_snapshot);
1165
1166 /**
1167  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1168  * @tr:         The tracing instance to snapshot
1169  * @cond_data:  The data to be tested conditionally, and possibly saved
1170  *
1171  * This is the same as tracing_snapshot() except that the snapshot is
1172  * conditional - the snapshot will only happen if the
1173  * cond_snapshot.update() implementation receiving the cond_data
1174  * returns true, which means that the trace array's cond_snapshot
1175  * update() operation used the cond_data to determine whether the
1176  * snapshot should be taken, and if it was, presumably saved it along
1177  * with the snapshot.
1178  */
1179 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1180 {
1181         tracing_snapshot_instance_cond(tr, cond_data);
1182 }
1183 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1184
1185 /**
1186  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1187  * @tr:         The tracing instance
1188  *
1189  * When the user enables a conditional snapshot using
1190  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1191  * with the snapshot.  This accessor is used to retrieve it.
1192  *
1193  * Should not be called from cond_snapshot.update(), since it takes
1194  * the tr->max_lock lock, which the code calling
1195  * cond_snapshot.update() has already done.
1196  *
1197  * Returns the cond_data associated with the trace array's snapshot.
1198  */
1199 void *tracing_cond_snapshot_data(struct trace_array *tr)
1200 {
1201         void *cond_data = NULL;
1202
1203         local_irq_disable();
1204         arch_spin_lock(&tr->max_lock);
1205
1206         if (tr->cond_snapshot)
1207                 cond_data = tr->cond_snapshot->cond_data;
1208
1209         arch_spin_unlock(&tr->max_lock);
1210         local_irq_enable();
1211
1212         return cond_data;
1213 }
1214 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1215
1216 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1217                                         struct array_buffer *size_buf, int cpu_id);
1218 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1219
1220 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1221 {
1222         int ret;
1223
1224         if (!tr->allocated_snapshot) {
1225
1226                 /* allocate spare buffer */
1227                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1228                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1229                 if (ret < 0)
1230                         return ret;
1231
1232                 tr->allocated_snapshot = true;
1233         }
1234
1235         return 0;
1236 }
1237
1238 static void free_snapshot(struct trace_array *tr)
1239 {
1240         /*
1241          * We don't free the ring buffer. instead, resize it because
1242          * The max_tr ring buffer has some state (e.g. ring->clock) and
1243          * we want preserve it.
1244          */
1245         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1246         set_buffer_entries(&tr->max_buffer, 1);
1247         tracing_reset_online_cpus(&tr->max_buffer);
1248         tr->allocated_snapshot = false;
1249 }
1250
1251 /**
1252  * tracing_alloc_snapshot - allocate snapshot buffer.
1253  *
1254  * This only allocates the snapshot buffer if it isn't already
1255  * allocated - it doesn't also take a snapshot.
1256  *
1257  * This is meant to be used in cases where the snapshot buffer needs
1258  * to be set up for events that can't sleep but need to be able to
1259  * trigger a snapshot.
1260  */
1261 int tracing_alloc_snapshot(void)
1262 {
1263         struct trace_array *tr = &global_trace;
1264         int ret;
1265
1266         ret = tracing_alloc_snapshot_instance(tr);
1267         WARN_ON(ret < 0);
1268
1269         return ret;
1270 }
1271 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1272
1273 /**
1274  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1275  *
1276  * This is similar to tracing_snapshot(), but it will allocate the
1277  * snapshot buffer if it isn't already allocated. Use this only
1278  * where it is safe to sleep, as the allocation may sleep.
1279  *
1280  * This causes a swap between the snapshot buffer and the current live
1281  * tracing buffer. You can use this to take snapshots of the live
1282  * trace when some condition is triggered, but continue to trace.
1283  */
1284 void tracing_snapshot_alloc(void)
1285 {
1286         int ret;
1287
1288         ret = tracing_alloc_snapshot();
1289         if (ret < 0)
1290                 return;
1291
1292         tracing_snapshot();
1293 }
1294 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1295
1296 /**
1297  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1298  * @tr:         The tracing instance
1299  * @cond_data:  User data to associate with the snapshot
1300  * @update:     Implementation of the cond_snapshot update function
1301  *
1302  * Check whether the conditional snapshot for the given instance has
1303  * already been enabled, or if the current tracer is already using a
1304  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1305  * save the cond_data and update function inside.
1306  *
1307  * Returns 0 if successful, error otherwise.
1308  */
1309 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1310                                  cond_update_fn_t update)
1311 {
1312         struct cond_snapshot *cond_snapshot;
1313         int ret = 0;
1314
1315         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1316         if (!cond_snapshot)
1317                 return -ENOMEM;
1318
1319         cond_snapshot->cond_data = cond_data;
1320         cond_snapshot->update = update;
1321
1322         mutex_lock(&trace_types_lock);
1323
1324         ret = tracing_alloc_snapshot_instance(tr);
1325         if (ret)
1326                 goto fail_unlock;
1327
1328         if (tr->current_trace->use_max_tr) {
1329                 ret = -EBUSY;
1330                 goto fail_unlock;
1331         }
1332
1333         /*
1334          * The cond_snapshot can only change to NULL without the
1335          * trace_types_lock. We don't care if we race with it going
1336          * to NULL, but we want to make sure that it's not set to
1337          * something other than NULL when we get here, which we can
1338          * do safely with only holding the trace_types_lock and not
1339          * having to take the max_lock.
1340          */
1341         if (tr->cond_snapshot) {
1342                 ret = -EBUSY;
1343                 goto fail_unlock;
1344         }
1345
1346         local_irq_disable();
1347         arch_spin_lock(&tr->max_lock);
1348         tr->cond_snapshot = cond_snapshot;
1349         arch_spin_unlock(&tr->max_lock);
1350         local_irq_enable();
1351
1352         mutex_unlock(&trace_types_lock);
1353
1354         return ret;
1355
1356  fail_unlock:
1357         mutex_unlock(&trace_types_lock);
1358         kfree(cond_snapshot);
1359         return ret;
1360 }
1361 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1362
1363 /**
1364  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1365  * @tr:         The tracing instance
1366  *
1367  * Check whether the conditional snapshot for the given instance is
1368  * enabled; if so, free the cond_snapshot associated with it,
1369  * otherwise return -EINVAL.
1370  *
1371  * Returns 0 if successful, error otherwise.
1372  */
1373 int tracing_snapshot_cond_disable(struct trace_array *tr)
1374 {
1375         int ret = 0;
1376
1377         local_irq_disable();
1378         arch_spin_lock(&tr->max_lock);
1379
1380         if (!tr->cond_snapshot)
1381                 ret = -EINVAL;
1382         else {
1383                 kfree(tr->cond_snapshot);
1384                 tr->cond_snapshot = NULL;
1385         }
1386
1387         arch_spin_unlock(&tr->max_lock);
1388         local_irq_enable();
1389
1390         return ret;
1391 }
1392 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1393 #else
1394 void tracing_snapshot(void)
1395 {
1396         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1397 }
1398 EXPORT_SYMBOL_GPL(tracing_snapshot);
1399 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1400 {
1401         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1402 }
1403 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1404 int tracing_alloc_snapshot(void)
1405 {
1406         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1407         return -ENODEV;
1408 }
1409 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1410 void tracing_snapshot_alloc(void)
1411 {
1412         /* Give warning */
1413         tracing_snapshot();
1414 }
1415 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1416 void *tracing_cond_snapshot_data(struct trace_array *tr)
1417 {
1418         return NULL;
1419 }
1420 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1421 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1422 {
1423         return -ENODEV;
1424 }
1425 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1426 int tracing_snapshot_cond_disable(struct trace_array *tr)
1427 {
1428         return false;
1429 }
1430 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1431 #define free_snapshot(tr)       do { } while (0)
1432 #endif /* CONFIG_TRACER_SNAPSHOT */
1433
1434 void tracer_tracing_off(struct trace_array *tr)
1435 {
1436         if (tr->array_buffer.buffer)
1437                 ring_buffer_record_off(tr->array_buffer.buffer);
1438         /*
1439          * This flag is looked at when buffers haven't been allocated
1440          * yet, or by some tracers (like irqsoff), that just want to
1441          * know if the ring buffer has been disabled, but it can handle
1442          * races of where it gets disabled but we still do a record.
1443          * As the check is in the fast path of the tracers, it is more
1444          * important to be fast than accurate.
1445          */
1446         tr->buffer_disabled = 1;
1447         /* Make the flag seen by readers */
1448         smp_wmb();
1449 }
1450
1451 /**
1452  * tracing_off - turn off tracing buffers
1453  *
1454  * This function stops the tracing buffers from recording data.
1455  * It does not disable any overhead the tracers themselves may
1456  * be causing. This function simply causes all recording to
1457  * the ring buffers to fail.
1458  */
1459 void tracing_off(void)
1460 {
1461         tracer_tracing_off(&global_trace);
1462 }
1463 EXPORT_SYMBOL_GPL(tracing_off);
1464
1465 void disable_trace_on_warning(void)
1466 {
1467         if (__disable_trace_on_warning) {
1468                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1469                         "Disabling tracing due to warning\n");
1470                 tracing_off();
1471         }
1472 }
1473
1474 /**
1475  * tracer_tracing_is_on - show real state of ring buffer enabled
1476  * @tr : the trace array to know if ring buffer is enabled
1477  *
1478  * Shows real state of the ring buffer if it is enabled or not.
1479  */
1480 bool tracer_tracing_is_on(struct trace_array *tr)
1481 {
1482         if (tr->array_buffer.buffer)
1483                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1484         return !tr->buffer_disabled;
1485 }
1486
1487 /**
1488  * tracing_is_on - show state of ring buffers enabled
1489  */
1490 int tracing_is_on(void)
1491 {
1492         return tracer_tracing_is_on(&global_trace);
1493 }
1494 EXPORT_SYMBOL_GPL(tracing_is_on);
1495
1496 static int __init set_buf_size(char *str)
1497 {
1498         unsigned long buf_size;
1499
1500         if (!str)
1501                 return 0;
1502         buf_size = memparse(str, &str);
1503         /*
1504          * nr_entries can not be zero and the startup
1505          * tests require some buffer space. Therefore
1506          * ensure we have at least 4096 bytes of buffer.
1507          */
1508         trace_buf_size = max(4096UL, buf_size);
1509         return 1;
1510 }
1511 __setup("trace_buf_size=", set_buf_size);
1512
1513 static int __init set_tracing_thresh(char *str)
1514 {
1515         unsigned long threshold;
1516         int ret;
1517
1518         if (!str)
1519                 return 0;
1520         ret = kstrtoul(str, 0, &threshold);
1521         if (ret < 0)
1522                 return 0;
1523         tracing_thresh = threshold * 1000;
1524         return 1;
1525 }
1526 __setup("tracing_thresh=", set_tracing_thresh);
1527
1528 unsigned long nsecs_to_usecs(unsigned long nsecs)
1529 {
1530         return nsecs / 1000;
1531 }
1532
1533 /*
1534  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1535  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1536  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1537  * of strings in the order that the evals (enum) were defined.
1538  */
1539 #undef C
1540 #define C(a, b) b
1541
1542 /* These must match the bit positions in trace_iterator_flags */
1543 static const char *trace_options[] = {
1544         TRACE_FLAGS
1545         NULL
1546 };
1547
1548 static struct {
1549         u64 (*func)(void);
1550         const char *name;
1551         int in_ns;              /* is this clock in nanoseconds? */
1552 } trace_clocks[] = {
1553         { trace_clock_local,            "local",        1 },
1554         { trace_clock_global,           "global",       1 },
1555         { trace_clock_counter,          "counter",      0 },
1556         { trace_clock_jiffies,          "uptime",       0 },
1557         { trace_clock,                  "perf",         1 },
1558         { ktime_get_mono_fast_ns,       "mono",         1 },
1559         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1560         { ktime_get_boot_fast_ns,       "boot",         1 },
1561         { ktime_get_tai_fast_ns,        "tai",          1 },
1562         ARCH_TRACE_CLOCKS
1563 };
1564
1565 bool trace_clock_in_ns(struct trace_array *tr)
1566 {
1567         if (trace_clocks[tr->clock_id].in_ns)
1568                 return true;
1569
1570         return false;
1571 }
1572
1573 /*
1574  * trace_parser_get_init - gets the buffer for trace parser
1575  */
1576 int trace_parser_get_init(struct trace_parser *parser, int size)
1577 {
1578         memset(parser, 0, sizeof(*parser));
1579
1580         parser->buffer = kmalloc(size, GFP_KERNEL);
1581         if (!parser->buffer)
1582                 return 1;
1583
1584         parser->size = size;
1585         return 0;
1586 }
1587
1588 /*
1589  * trace_parser_put - frees the buffer for trace parser
1590  */
1591 void trace_parser_put(struct trace_parser *parser)
1592 {
1593         kfree(parser->buffer);
1594         parser->buffer = NULL;
1595 }
1596
1597 /*
1598  * trace_get_user - reads the user input string separated by  space
1599  * (matched by isspace(ch))
1600  *
1601  * For each string found the 'struct trace_parser' is updated,
1602  * and the function returns.
1603  *
1604  * Returns number of bytes read.
1605  *
1606  * See kernel/trace/trace.h for 'struct trace_parser' details.
1607  */
1608 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1609         size_t cnt, loff_t *ppos)
1610 {
1611         char ch;
1612         size_t read = 0;
1613         ssize_t ret;
1614
1615         if (!*ppos)
1616                 trace_parser_clear(parser);
1617
1618         ret = get_user(ch, ubuf++);
1619         if (ret)
1620                 goto out;
1621
1622         read++;
1623         cnt--;
1624
1625         /*
1626          * The parser is not finished with the last write,
1627          * continue reading the user input without skipping spaces.
1628          */
1629         if (!parser->cont) {
1630                 /* skip white space */
1631                 while (cnt && isspace(ch)) {
1632                         ret = get_user(ch, ubuf++);
1633                         if (ret)
1634                                 goto out;
1635                         read++;
1636                         cnt--;
1637                 }
1638
1639                 parser->idx = 0;
1640
1641                 /* only spaces were written */
1642                 if (isspace(ch) || !ch) {
1643                         *ppos += read;
1644                         ret = read;
1645                         goto out;
1646                 }
1647         }
1648
1649         /* read the non-space input */
1650         while (cnt && !isspace(ch) && ch) {
1651                 if (parser->idx < parser->size - 1)
1652                         parser->buffer[parser->idx++] = ch;
1653                 else {
1654                         ret = -EINVAL;
1655                         goto out;
1656                 }
1657                 ret = get_user(ch, ubuf++);
1658                 if (ret)
1659                         goto out;
1660                 read++;
1661                 cnt--;
1662         }
1663
1664         /* We either got finished input or we have to wait for another call. */
1665         if (isspace(ch) || !ch) {
1666                 parser->buffer[parser->idx] = 0;
1667                 parser->cont = false;
1668         } else if (parser->idx < parser->size - 1) {
1669                 parser->cont = true;
1670                 parser->buffer[parser->idx++] = ch;
1671                 /* Make sure the parsed string always terminates with '\0'. */
1672                 parser->buffer[parser->idx] = 0;
1673         } else {
1674                 ret = -EINVAL;
1675                 goto out;
1676         }
1677
1678         *ppos += read;
1679         ret = read;
1680
1681 out:
1682         return ret;
1683 }
1684
1685 /* TODO add a seq_buf_to_buffer() */
1686 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1687 {
1688         int len;
1689
1690         if (trace_seq_used(s) <= s->seq.readpos)
1691                 return -EBUSY;
1692
1693         len = trace_seq_used(s) - s->seq.readpos;
1694         if (cnt > len)
1695                 cnt = len;
1696         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1697
1698         s->seq.readpos += cnt;
1699         return cnt;
1700 }
1701
1702 unsigned long __read_mostly     tracing_thresh;
1703
1704 #ifdef CONFIG_TRACER_MAX_TRACE
1705 static const struct file_operations tracing_max_lat_fops;
1706
1707 #ifdef LATENCY_FS_NOTIFY
1708
1709 static struct workqueue_struct *fsnotify_wq;
1710
1711 static void latency_fsnotify_workfn(struct work_struct *work)
1712 {
1713         struct trace_array *tr = container_of(work, struct trace_array,
1714                                               fsnotify_work);
1715         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1716 }
1717
1718 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1719 {
1720         struct trace_array *tr = container_of(iwork, struct trace_array,
1721                                               fsnotify_irqwork);
1722         queue_work(fsnotify_wq, &tr->fsnotify_work);
1723 }
1724
1725 static void trace_create_maxlat_file(struct trace_array *tr,
1726                                      struct dentry *d_tracer)
1727 {
1728         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1729         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1730         tr->d_max_latency = trace_create_file("tracing_max_latency",
1731                                               TRACE_MODE_WRITE,
1732                                               d_tracer, tr,
1733                                               &tracing_max_lat_fops);
1734 }
1735
1736 __init static int latency_fsnotify_init(void)
1737 {
1738         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1739                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1740         if (!fsnotify_wq) {
1741                 pr_err("Unable to allocate tr_max_lat_wq\n");
1742                 return -ENOMEM;
1743         }
1744         return 0;
1745 }
1746
1747 late_initcall_sync(latency_fsnotify_init);
1748
1749 void latency_fsnotify(struct trace_array *tr)
1750 {
1751         if (!fsnotify_wq)
1752                 return;
1753         /*
1754          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1755          * possible that we are called from __schedule() or do_idle(), which
1756          * could cause a deadlock.
1757          */
1758         irq_work_queue(&tr->fsnotify_irqwork);
1759 }
1760
1761 #else /* !LATENCY_FS_NOTIFY */
1762
1763 #define trace_create_maxlat_file(tr, d_tracer)                          \
1764         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1765                           d_tracer, tr, &tracing_max_lat_fops)
1766
1767 #endif
1768
1769 /*
1770  * Copy the new maximum trace into the separate maximum-trace
1771  * structure. (this way the maximum trace is permanently saved,
1772  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1773  */
1774 static void
1775 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1776 {
1777         struct array_buffer *trace_buf = &tr->array_buffer;
1778         struct array_buffer *max_buf = &tr->max_buffer;
1779         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1780         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1781
1782         max_buf->cpu = cpu;
1783         max_buf->time_start = data->preempt_timestamp;
1784
1785         max_data->saved_latency = tr->max_latency;
1786         max_data->critical_start = data->critical_start;
1787         max_data->critical_end = data->critical_end;
1788
1789         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1790         max_data->pid = tsk->pid;
1791         /*
1792          * If tsk == current, then use current_uid(), as that does not use
1793          * RCU. The irq tracer can be called out of RCU scope.
1794          */
1795         if (tsk == current)
1796                 max_data->uid = current_uid();
1797         else
1798                 max_data->uid = task_uid(tsk);
1799
1800         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1801         max_data->policy = tsk->policy;
1802         max_data->rt_priority = tsk->rt_priority;
1803
1804         /* record this tasks comm */
1805         tracing_record_cmdline(tsk);
1806         latency_fsnotify(tr);
1807 }
1808
1809 /**
1810  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1811  * @tr: tracer
1812  * @tsk: the task with the latency
1813  * @cpu: The cpu that initiated the trace.
1814  * @cond_data: User data associated with a conditional snapshot
1815  *
1816  * Flip the buffers between the @tr and the max_tr and record information
1817  * about which task was the cause of this latency.
1818  */
1819 void
1820 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1821               void *cond_data)
1822 {
1823         if (tr->stop_count)
1824                 return;
1825
1826         WARN_ON_ONCE(!irqs_disabled());
1827
1828         if (!tr->allocated_snapshot) {
1829                 /* Only the nop tracer should hit this when disabling */
1830                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1831                 return;
1832         }
1833
1834         arch_spin_lock(&tr->max_lock);
1835
1836         /* Inherit the recordable setting from array_buffer */
1837         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1838                 ring_buffer_record_on(tr->max_buffer.buffer);
1839         else
1840                 ring_buffer_record_off(tr->max_buffer.buffer);
1841
1842 #ifdef CONFIG_TRACER_SNAPSHOT
1843         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1844                 arch_spin_unlock(&tr->max_lock);
1845                 return;
1846         }
1847 #endif
1848         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1849
1850         __update_max_tr(tr, tsk, cpu);
1851
1852         arch_spin_unlock(&tr->max_lock);
1853 }
1854
1855 /**
1856  * update_max_tr_single - only copy one trace over, and reset the rest
1857  * @tr: tracer
1858  * @tsk: task with the latency
1859  * @cpu: the cpu of the buffer to copy.
1860  *
1861  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1862  */
1863 void
1864 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1865 {
1866         int ret;
1867
1868         if (tr->stop_count)
1869                 return;
1870
1871         WARN_ON_ONCE(!irqs_disabled());
1872         if (!tr->allocated_snapshot) {
1873                 /* Only the nop tracer should hit this when disabling */
1874                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1875                 return;
1876         }
1877
1878         arch_spin_lock(&tr->max_lock);
1879
1880         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1881
1882         if (ret == -EBUSY) {
1883                 /*
1884                  * We failed to swap the buffer due to a commit taking
1885                  * place on this CPU. We fail to record, but we reset
1886                  * the max trace buffer (no one writes directly to it)
1887                  * and flag that it failed.
1888                  * Another reason is resize is in progress.
1889                  */
1890                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1891                         "Failed to swap buffers due to commit or resize in progress\n");
1892         }
1893
1894         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1895
1896         __update_max_tr(tr, tsk, cpu);
1897         arch_spin_unlock(&tr->max_lock);
1898 }
1899
1900 #endif /* CONFIG_TRACER_MAX_TRACE */
1901
1902 static int wait_on_pipe(struct trace_iterator *iter, int full)
1903 {
1904         /* Iterators are static, they should be filled or empty */
1905         if (trace_buffer_iter(iter, iter->cpu_file))
1906                 return 0;
1907
1908         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1909                                 full);
1910 }
1911
1912 #ifdef CONFIG_FTRACE_STARTUP_TEST
1913 static bool selftests_can_run;
1914
1915 struct trace_selftests {
1916         struct list_head                list;
1917         struct tracer                   *type;
1918 };
1919
1920 static LIST_HEAD(postponed_selftests);
1921
1922 static int save_selftest(struct tracer *type)
1923 {
1924         struct trace_selftests *selftest;
1925
1926         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1927         if (!selftest)
1928                 return -ENOMEM;
1929
1930         selftest->type = type;
1931         list_add(&selftest->list, &postponed_selftests);
1932         return 0;
1933 }
1934
1935 static int run_tracer_selftest(struct tracer *type)
1936 {
1937         struct trace_array *tr = &global_trace;
1938         struct tracer *saved_tracer = tr->current_trace;
1939         int ret;
1940
1941         if (!type->selftest || tracing_selftest_disabled)
1942                 return 0;
1943
1944         /*
1945          * If a tracer registers early in boot up (before scheduling is
1946          * initialized and such), then do not run its selftests yet.
1947          * Instead, run it a little later in the boot process.
1948          */
1949         if (!selftests_can_run)
1950                 return save_selftest(type);
1951
1952         if (!tracing_is_on()) {
1953                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1954                         type->name);
1955                 return 0;
1956         }
1957
1958         /*
1959          * Run a selftest on this tracer.
1960          * Here we reset the trace buffer, and set the current
1961          * tracer to be this tracer. The tracer can then run some
1962          * internal tracing to verify that everything is in order.
1963          * If we fail, we do not register this tracer.
1964          */
1965         tracing_reset_online_cpus(&tr->array_buffer);
1966
1967         tr->current_trace = type;
1968
1969 #ifdef CONFIG_TRACER_MAX_TRACE
1970         if (type->use_max_tr) {
1971                 /* If we expanded the buffers, make sure the max is expanded too */
1972                 if (ring_buffer_expanded)
1973                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1974                                            RING_BUFFER_ALL_CPUS);
1975                 tr->allocated_snapshot = true;
1976         }
1977 #endif
1978
1979         /* the test is responsible for initializing and enabling */
1980         pr_info("Testing tracer %s: ", type->name);
1981         ret = type->selftest(type, tr);
1982         /* the test is responsible for resetting too */
1983         tr->current_trace = saved_tracer;
1984         if (ret) {
1985                 printk(KERN_CONT "FAILED!\n");
1986                 /* Add the warning after printing 'FAILED' */
1987                 WARN_ON(1);
1988                 return -1;
1989         }
1990         /* Only reset on passing, to avoid touching corrupted buffers */
1991         tracing_reset_online_cpus(&tr->array_buffer);
1992
1993 #ifdef CONFIG_TRACER_MAX_TRACE
1994         if (type->use_max_tr) {
1995                 tr->allocated_snapshot = false;
1996
1997                 /* Shrink the max buffer again */
1998                 if (ring_buffer_expanded)
1999                         ring_buffer_resize(tr->max_buffer.buffer, 1,
2000                                            RING_BUFFER_ALL_CPUS);
2001         }
2002 #endif
2003
2004         printk(KERN_CONT "PASSED\n");
2005         return 0;
2006 }
2007
2008 static __init int init_trace_selftests(void)
2009 {
2010         struct trace_selftests *p, *n;
2011         struct tracer *t, **last;
2012         int ret;
2013
2014         selftests_can_run = true;
2015
2016         mutex_lock(&trace_types_lock);
2017
2018         if (list_empty(&postponed_selftests))
2019                 goto out;
2020
2021         pr_info("Running postponed tracer tests:\n");
2022
2023         tracing_selftest_running = true;
2024         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2025                 /* This loop can take minutes when sanitizers are enabled, so
2026                  * lets make sure we allow RCU processing.
2027                  */
2028                 cond_resched();
2029                 ret = run_tracer_selftest(p->type);
2030                 /* If the test fails, then warn and remove from available_tracers */
2031                 if (ret < 0) {
2032                         WARN(1, "tracer: %s failed selftest, disabling\n",
2033                              p->type->name);
2034                         last = &trace_types;
2035                         for (t = trace_types; t; t = t->next) {
2036                                 if (t == p->type) {
2037                                         *last = t->next;
2038                                         break;
2039                                 }
2040                                 last = &t->next;
2041                         }
2042                 }
2043                 list_del(&p->list);
2044                 kfree(p);
2045         }
2046         tracing_selftest_running = false;
2047
2048  out:
2049         mutex_unlock(&trace_types_lock);
2050
2051         return 0;
2052 }
2053 core_initcall(init_trace_selftests);
2054 #else
2055 static inline int run_tracer_selftest(struct tracer *type)
2056 {
2057         return 0;
2058 }
2059 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2060
2061 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2062
2063 static void __init apply_trace_boot_options(void);
2064
2065 /**
2066  * register_tracer - register a tracer with the ftrace system.
2067  * @type: the plugin for the tracer
2068  *
2069  * Register a new plugin tracer.
2070  */
2071 int __init register_tracer(struct tracer *type)
2072 {
2073         struct tracer *t;
2074         int ret = 0;
2075
2076         if (!type->name) {
2077                 pr_info("Tracer must have a name\n");
2078                 return -1;
2079         }
2080
2081         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2082                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2083                 return -1;
2084         }
2085
2086         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2087                 pr_warn("Can not register tracer %s due to lockdown\n",
2088                            type->name);
2089                 return -EPERM;
2090         }
2091
2092         mutex_lock(&trace_types_lock);
2093
2094         tracing_selftest_running = true;
2095
2096         for (t = trace_types; t; t = t->next) {
2097                 if (strcmp(type->name, t->name) == 0) {
2098                         /* already found */
2099                         pr_info("Tracer %s already registered\n",
2100                                 type->name);
2101                         ret = -1;
2102                         goto out;
2103                 }
2104         }
2105
2106         if (!type->set_flag)
2107                 type->set_flag = &dummy_set_flag;
2108         if (!type->flags) {
2109                 /*allocate a dummy tracer_flags*/
2110                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2111                 if (!type->flags) {
2112                         ret = -ENOMEM;
2113                         goto out;
2114                 }
2115                 type->flags->val = 0;
2116                 type->flags->opts = dummy_tracer_opt;
2117         } else
2118                 if (!type->flags->opts)
2119                         type->flags->opts = dummy_tracer_opt;
2120
2121         /* store the tracer for __set_tracer_option */
2122         type->flags->trace = type;
2123
2124         ret = run_tracer_selftest(type);
2125         if (ret < 0)
2126                 goto out;
2127
2128         type->next = trace_types;
2129         trace_types = type;
2130         add_tracer_options(&global_trace, type);
2131
2132  out:
2133         tracing_selftest_running = false;
2134         mutex_unlock(&trace_types_lock);
2135
2136         if (ret || !default_bootup_tracer)
2137                 goto out_unlock;
2138
2139         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2140                 goto out_unlock;
2141
2142         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2143         /* Do we want this tracer to start on bootup? */
2144         tracing_set_tracer(&global_trace, type->name);
2145         default_bootup_tracer = NULL;
2146
2147         apply_trace_boot_options();
2148
2149         /* disable other selftests, since this will break it. */
2150         disable_tracing_selftest("running a tracer");
2151
2152  out_unlock:
2153         return ret;
2154 }
2155
2156 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2157 {
2158         struct trace_buffer *buffer = buf->buffer;
2159
2160         if (!buffer)
2161                 return;
2162
2163         ring_buffer_record_disable(buffer);
2164
2165         /* Make sure all commits have finished */
2166         synchronize_rcu();
2167         ring_buffer_reset_cpu(buffer, cpu);
2168
2169         ring_buffer_record_enable(buffer);
2170 }
2171
2172 void tracing_reset_online_cpus(struct array_buffer *buf)
2173 {
2174         struct trace_buffer *buffer = buf->buffer;
2175
2176         if (!buffer)
2177                 return;
2178
2179         ring_buffer_record_disable(buffer);
2180
2181         /* Make sure all commits have finished */
2182         synchronize_rcu();
2183
2184         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2185
2186         ring_buffer_reset_online_cpus(buffer);
2187
2188         ring_buffer_record_enable(buffer);
2189 }
2190
2191 /* Must have trace_types_lock held */
2192 void tracing_reset_all_online_cpus_unlocked(void)
2193 {
2194         struct trace_array *tr;
2195
2196         lockdep_assert_held(&trace_types_lock);
2197
2198         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2199                 if (!tr->clear_trace)
2200                         continue;
2201                 tr->clear_trace = false;
2202                 tracing_reset_online_cpus(&tr->array_buffer);
2203 #ifdef CONFIG_TRACER_MAX_TRACE
2204                 tracing_reset_online_cpus(&tr->max_buffer);
2205 #endif
2206         }
2207 }
2208
2209 void tracing_reset_all_online_cpus(void)
2210 {
2211         mutex_lock(&trace_types_lock);
2212         tracing_reset_all_online_cpus_unlocked();
2213         mutex_unlock(&trace_types_lock);
2214 }
2215
2216 /*
2217  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2218  * is the tgid last observed corresponding to pid=i.
2219  */
2220 static int *tgid_map;
2221
2222 /* The maximum valid index into tgid_map. */
2223 static size_t tgid_map_max;
2224
2225 #define SAVED_CMDLINES_DEFAULT 128
2226 #define NO_CMDLINE_MAP UINT_MAX
2227 /*
2228  * Preemption must be disabled before acquiring trace_cmdline_lock.
2229  * The various trace_arrays' max_lock must be acquired in a context
2230  * where interrupt is disabled.
2231  */
2232 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2233 struct saved_cmdlines_buffer {
2234         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2235         unsigned *map_cmdline_to_pid;
2236         unsigned cmdline_num;
2237         int cmdline_idx;
2238         char *saved_cmdlines;
2239 };
2240 static struct saved_cmdlines_buffer *savedcmd;
2241
2242 static inline char *get_saved_cmdlines(int idx)
2243 {
2244         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2245 }
2246
2247 static inline void set_cmdline(int idx, const char *cmdline)
2248 {
2249         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2250 }
2251
2252 static int allocate_cmdlines_buffer(unsigned int val,
2253                                     struct saved_cmdlines_buffer *s)
2254 {
2255         s->map_cmdline_to_pid = kmalloc_array(val,
2256                                               sizeof(*s->map_cmdline_to_pid),
2257                                               GFP_KERNEL);
2258         if (!s->map_cmdline_to_pid)
2259                 return -ENOMEM;
2260
2261         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2262         if (!s->saved_cmdlines) {
2263                 kfree(s->map_cmdline_to_pid);
2264                 return -ENOMEM;
2265         }
2266
2267         s->cmdline_idx = 0;
2268         s->cmdline_num = val;
2269         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2270                sizeof(s->map_pid_to_cmdline));
2271         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2272                val * sizeof(*s->map_cmdline_to_pid));
2273
2274         return 0;
2275 }
2276
2277 static int trace_create_savedcmd(void)
2278 {
2279         int ret;
2280
2281         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2282         if (!savedcmd)
2283                 return -ENOMEM;
2284
2285         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2286         if (ret < 0) {
2287                 kfree(savedcmd);
2288                 savedcmd = NULL;
2289                 return -ENOMEM;
2290         }
2291
2292         return 0;
2293 }
2294
2295 int is_tracing_stopped(void)
2296 {
2297         return global_trace.stop_count;
2298 }
2299
2300 /**
2301  * tracing_start - quick start of the tracer
2302  *
2303  * If tracing is enabled but was stopped by tracing_stop,
2304  * this will start the tracer back up.
2305  */
2306 void tracing_start(void)
2307 {
2308         struct trace_buffer *buffer;
2309         unsigned long flags;
2310
2311         if (tracing_disabled)
2312                 return;
2313
2314         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2315         if (--global_trace.stop_count) {
2316                 if (global_trace.stop_count < 0) {
2317                         /* Someone screwed up their debugging */
2318                         WARN_ON_ONCE(1);
2319                         global_trace.stop_count = 0;
2320                 }
2321                 goto out;
2322         }
2323
2324         /* Prevent the buffers from switching */
2325         arch_spin_lock(&global_trace.max_lock);
2326
2327         buffer = global_trace.array_buffer.buffer;
2328         if (buffer)
2329                 ring_buffer_record_enable(buffer);
2330
2331 #ifdef CONFIG_TRACER_MAX_TRACE
2332         buffer = global_trace.max_buffer.buffer;
2333         if (buffer)
2334                 ring_buffer_record_enable(buffer);
2335 #endif
2336
2337         arch_spin_unlock(&global_trace.max_lock);
2338
2339  out:
2340         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2341 }
2342
2343 static void tracing_start_tr(struct trace_array *tr)
2344 {
2345         struct trace_buffer *buffer;
2346         unsigned long flags;
2347
2348         if (tracing_disabled)
2349                 return;
2350
2351         /* If global, we need to also start the max tracer */
2352         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2353                 return tracing_start();
2354
2355         raw_spin_lock_irqsave(&tr->start_lock, flags);
2356
2357         if (--tr->stop_count) {
2358                 if (tr->stop_count < 0) {
2359                         /* Someone screwed up their debugging */
2360                         WARN_ON_ONCE(1);
2361                         tr->stop_count = 0;
2362                 }
2363                 goto out;
2364         }
2365
2366         buffer = tr->array_buffer.buffer;
2367         if (buffer)
2368                 ring_buffer_record_enable(buffer);
2369
2370  out:
2371         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2372 }
2373
2374 /**
2375  * tracing_stop - quick stop of the tracer
2376  *
2377  * Light weight way to stop tracing. Use in conjunction with
2378  * tracing_start.
2379  */
2380 void tracing_stop(void)
2381 {
2382         struct trace_buffer *buffer;
2383         unsigned long flags;
2384
2385         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2386         if (global_trace.stop_count++)
2387                 goto out;
2388
2389         /* Prevent the buffers from switching */
2390         arch_spin_lock(&global_trace.max_lock);
2391
2392         buffer = global_trace.array_buffer.buffer;
2393         if (buffer)
2394                 ring_buffer_record_disable(buffer);
2395
2396 #ifdef CONFIG_TRACER_MAX_TRACE
2397         buffer = global_trace.max_buffer.buffer;
2398         if (buffer)
2399                 ring_buffer_record_disable(buffer);
2400 #endif
2401
2402         arch_spin_unlock(&global_trace.max_lock);
2403
2404  out:
2405         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2406 }
2407
2408 static void tracing_stop_tr(struct trace_array *tr)
2409 {
2410         struct trace_buffer *buffer;
2411         unsigned long flags;
2412
2413         /* If global, we need to also stop the max tracer */
2414         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2415                 return tracing_stop();
2416
2417         raw_spin_lock_irqsave(&tr->start_lock, flags);
2418         if (tr->stop_count++)
2419                 goto out;
2420
2421         buffer = tr->array_buffer.buffer;
2422         if (buffer)
2423                 ring_buffer_record_disable(buffer);
2424
2425  out:
2426         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2427 }
2428
2429 static int trace_save_cmdline(struct task_struct *tsk)
2430 {
2431         unsigned tpid, idx;
2432
2433         /* treat recording of idle task as a success */
2434         if (!tsk->pid)
2435                 return 1;
2436
2437         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2438
2439         /*
2440          * It's not the end of the world if we don't get
2441          * the lock, but we also don't want to spin
2442          * nor do we want to disable interrupts,
2443          * so if we miss here, then better luck next time.
2444          *
2445          * This is called within the scheduler and wake up, so interrupts
2446          * had better been disabled and run queue lock been held.
2447          */
2448         lockdep_assert_preemption_disabled();
2449         if (!arch_spin_trylock(&trace_cmdline_lock))
2450                 return 0;
2451
2452         idx = savedcmd->map_pid_to_cmdline[tpid];
2453         if (idx == NO_CMDLINE_MAP) {
2454                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2455
2456                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2457                 savedcmd->cmdline_idx = idx;
2458         }
2459
2460         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2461         set_cmdline(idx, tsk->comm);
2462
2463         arch_spin_unlock(&trace_cmdline_lock);
2464
2465         return 1;
2466 }
2467
2468 static void __trace_find_cmdline(int pid, char comm[])
2469 {
2470         unsigned map;
2471         int tpid;
2472
2473         if (!pid) {
2474                 strcpy(comm, "<idle>");
2475                 return;
2476         }
2477
2478         if (WARN_ON_ONCE(pid < 0)) {
2479                 strcpy(comm, "<XXX>");
2480                 return;
2481         }
2482
2483         tpid = pid & (PID_MAX_DEFAULT - 1);
2484         map = savedcmd->map_pid_to_cmdline[tpid];
2485         if (map != NO_CMDLINE_MAP) {
2486                 tpid = savedcmd->map_cmdline_to_pid[map];
2487                 if (tpid == pid) {
2488                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2489                         return;
2490                 }
2491         }
2492         strcpy(comm, "<...>");
2493 }
2494
2495 void trace_find_cmdline(int pid, char comm[])
2496 {
2497         preempt_disable();
2498         arch_spin_lock(&trace_cmdline_lock);
2499
2500         __trace_find_cmdline(pid, comm);
2501
2502         arch_spin_unlock(&trace_cmdline_lock);
2503         preempt_enable();
2504 }
2505
2506 static int *trace_find_tgid_ptr(int pid)
2507 {
2508         /*
2509          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2510          * if we observe a non-NULL tgid_map then we also observe the correct
2511          * tgid_map_max.
2512          */
2513         int *map = smp_load_acquire(&tgid_map);
2514
2515         if (unlikely(!map || pid > tgid_map_max))
2516                 return NULL;
2517
2518         return &map[pid];
2519 }
2520
2521 int trace_find_tgid(int pid)
2522 {
2523         int *ptr = trace_find_tgid_ptr(pid);
2524
2525         return ptr ? *ptr : 0;
2526 }
2527
2528 static int trace_save_tgid(struct task_struct *tsk)
2529 {
2530         int *ptr;
2531
2532         /* treat recording of idle task as a success */
2533         if (!tsk->pid)
2534                 return 1;
2535
2536         ptr = trace_find_tgid_ptr(tsk->pid);
2537         if (!ptr)
2538                 return 0;
2539
2540         *ptr = tsk->tgid;
2541         return 1;
2542 }
2543
2544 static bool tracing_record_taskinfo_skip(int flags)
2545 {
2546         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2547                 return true;
2548         if (!__this_cpu_read(trace_taskinfo_save))
2549                 return true;
2550         return false;
2551 }
2552
2553 /**
2554  * tracing_record_taskinfo - record the task info of a task
2555  *
2556  * @task:  task to record
2557  * @flags: TRACE_RECORD_CMDLINE for recording comm
2558  *         TRACE_RECORD_TGID for recording tgid
2559  */
2560 void tracing_record_taskinfo(struct task_struct *task, int flags)
2561 {
2562         bool done;
2563
2564         if (tracing_record_taskinfo_skip(flags))
2565                 return;
2566
2567         /*
2568          * Record as much task information as possible. If some fail, continue
2569          * to try to record the others.
2570          */
2571         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2572         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2573
2574         /* If recording any information failed, retry again soon. */
2575         if (!done)
2576                 return;
2577
2578         __this_cpu_write(trace_taskinfo_save, false);
2579 }
2580
2581 /**
2582  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2583  *
2584  * @prev: previous task during sched_switch
2585  * @next: next task during sched_switch
2586  * @flags: TRACE_RECORD_CMDLINE for recording comm
2587  *         TRACE_RECORD_TGID for recording tgid
2588  */
2589 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2590                                           struct task_struct *next, int flags)
2591 {
2592         bool done;
2593
2594         if (tracing_record_taskinfo_skip(flags))
2595                 return;
2596
2597         /*
2598          * Record as much task information as possible. If some fail, continue
2599          * to try to record the others.
2600          */
2601         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2602         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2603         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2604         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2605
2606         /* If recording any information failed, retry again soon. */
2607         if (!done)
2608                 return;
2609
2610         __this_cpu_write(trace_taskinfo_save, false);
2611 }
2612
2613 /* Helpers to record a specific task information */
2614 void tracing_record_cmdline(struct task_struct *task)
2615 {
2616         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2617 }
2618
2619 void tracing_record_tgid(struct task_struct *task)
2620 {
2621         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2622 }
2623
2624 /*
2625  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2626  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2627  * simplifies those functions and keeps them in sync.
2628  */
2629 enum print_line_t trace_handle_return(struct trace_seq *s)
2630 {
2631         return trace_seq_has_overflowed(s) ?
2632                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2633 }
2634 EXPORT_SYMBOL_GPL(trace_handle_return);
2635
2636 static unsigned short migration_disable_value(void)
2637 {
2638 #if defined(CONFIG_SMP)
2639         return current->migration_disabled;
2640 #else
2641         return 0;
2642 #endif
2643 }
2644
2645 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2646 {
2647         unsigned int trace_flags = irqs_status;
2648         unsigned int pc;
2649
2650         pc = preempt_count();
2651
2652         if (pc & NMI_MASK)
2653                 trace_flags |= TRACE_FLAG_NMI;
2654         if (pc & HARDIRQ_MASK)
2655                 trace_flags |= TRACE_FLAG_HARDIRQ;
2656         if (in_serving_softirq())
2657                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2658         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2659                 trace_flags |= TRACE_FLAG_BH_OFF;
2660
2661         if (tif_need_resched())
2662                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2663         if (test_preempt_need_resched())
2664                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2665         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2666                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2667 }
2668
2669 struct ring_buffer_event *
2670 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2671                           int type,
2672                           unsigned long len,
2673                           unsigned int trace_ctx)
2674 {
2675         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2676 }
2677
2678 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2679 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2680 static int trace_buffered_event_ref;
2681
2682 /**
2683  * trace_buffered_event_enable - enable buffering events
2684  *
2685  * When events are being filtered, it is quicker to use a temporary
2686  * buffer to write the event data into if there's a likely chance
2687  * that it will not be committed. The discard of the ring buffer
2688  * is not as fast as committing, and is much slower than copying
2689  * a commit.
2690  *
2691  * When an event is to be filtered, allocate per cpu buffers to
2692  * write the event data into, and if the event is filtered and discarded
2693  * it is simply dropped, otherwise, the entire data is to be committed
2694  * in one shot.
2695  */
2696 void trace_buffered_event_enable(void)
2697 {
2698         struct ring_buffer_event *event;
2699         struct page *page;
2700         int cpu;
2701
2702         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2703
2704         if (trace_buffered_event_ref++)
2705                 return;
2706
2707         for_each_tracing_cpu(cpu) {
2708                 page = alloc_pages_node(cpu_to_node(cpu),
2709                                         GFP_KERNEL | __GFP_NORETRY, 0);
2710                 if (!page)
2711                         goto failed;
2712
2713                 event = page_address(page);
2714                 memset(event, 0, sizeof(*event));
2715
2716                 per_cpu(trace_buffered_event, cpu) = event;
2717
2718                 preempt_disable();
2719                 if (cpu == smp_processor_id() &&
2720                     __this_cpu_read(trace_buffered_event) !=
2721                     per_cpu(trace_buffered_event, cpu))
2722                         WARN_ON_ONCE(1);
2723                 preempt_enable();
2724         }
2725
2726         return;
2727  failed:
2728         trace_buffered_event_disable();
2729 }
2730
2731 static void enable_trace_buffered_event(void *data)
2732 {
2733         /* Probably not needed, but do it anyway */
2734         smp_rmb();
2735         this_cpu_dec(trace_buffered_event_cnt);
2736 }
2737
2738 static void disable_trace_buffered_event(void *data)
2739 {
2740         this_cpu_inc(trace_buffered_event_cnt);
2741 }
2742
2743 /**
2744  * trace_buffered_event_disable - disable buffering events
2745  *
2746  * When a filter is removed, it is faster to not use the buffered
2747  * events, and to commit directly into the ring buffer. Free up
2748  * the temp buffers when there are no more users. This requires
2749  * special synchronization with current events.
2750  */
2751 void trace_buffered_event_disable(void)
2752 {
2753         int cpu;
2754
2755         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2756
2757         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2758                 return;
2759
2760         if (--trace_buffered_event_ref)
2761                 return;
2762
2763         preempt_disable();
2764         /* For each CPU, set the buffer as used. */
2765         smp_call_function_many(tracing_buffer_mask,
2766                                disable_trace_buffered_event, NULL, 1);
2767         preempt_enable();
2768
2769         /* Wait for all current users to finish */
2770         synchronize_rcu();
2771
2772         for_each_tracing_cpu(cpu) {
2773                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2774                 per_cpu(trace_buffered_event, cpu) = NULL;
2775         }
2776         /*
2777          * Make sure trace_buffered_event is NULL before clearing
2778          * trace_buffered_event_cnt.
2779          */
2780         smp_wmb();
2781
2782         preempt_disable();
2783         /* Do the work on each cpu */
2784         smp_call_function_many(tracing_buffer_mask,
2785                                enable_trace_buffered_event, NULL, 1);
2786         preempt_enable();
2787 }
2788
2789 static struct trace_buffer *temp_buffer;
2790
2791 struct ring_buffer_event *
2792 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2793                           struct trace_event_file *trace_file,
2794                           int type, unsigned long len,
2795                           unsigned int trace_ctx)
2796 {
2797         struct ring_buffer_event *entry;
2798         struct trace_array *tr = trace_file->tr;
2799         int val;
2800
2801         *current_rb = tr->array_buffer.buffer;
2802
2803         if (!tr->no_filter_buffering_ref &&
2804             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2805                 preempt_disable_notrace();
2806                 /*
2807                  * Filtering is on, so try to use the per cpu buffer first.
2808                  * This buffer will simulate a ring_buffer_event,
2809                  * where the type_len is zero and the array[0] will
2810                  * hold the full length.
2811                  * (see include/linux/ring-buffer.h for details on
2812                  *  how the ring_buffer_event is structured).
2813                  *
2814                  * Using a temp buffer during filtering and copying it
2815                  * on a matched filter is quicker than writing directly
2816                  * into the ring buffer and then discarding it when
2817                  * it doesn't match. That is because the discard
2818                  * requires several atomic operations to get right.
2819                  * Copying on match and doing nothing on a failed match
2820                  * is still quicker than no copy on match, but having
2821                  * to discard out of the ring buffer on a failed match.
2822                  */
2823                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2824                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2825
2826                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2827
2828                         /*
2829                          * Preemption is disabled, but interrupts and NMIs
2830                          * can still come in now. If that happens after
2831                          * the above increment, then it will have to go
2832                          * back to the old method of allocating the event
2833                          * on the ring buffer, and if the filter fails, it
2834                          * will have to call ring_buffer_discard_commit()
2835                          * to remove it.
2836                          *
2837                          * Need to also check the unlikely case that the
2838                          * length is bigger than the temp buffer size.
2839                          * If that happens, then the reserve is pretty much
2840                          * guaranteed to fail, as the ring buffer currently
2841                          * only allows events less than a page. But that may
2842                          * change in the future, so let the ring buffer reserve
2843                          * handle the failure in that case.
2844                          */
2845                         if (val == 1 && likely(len <= max_len)) {
2846                                 trace_event_setup(entry, type, trace_ctx);
2847                                 entry->array[0] = len;
2848                                 /* Return with preemption disabled */
2849                                 return entry;
2850                         }
2851                         this_cpu_dec(trace_buffered_event_cnt);
2852                 }
2853                 /* __trace_buffer_lock_reserve() disables preemption */
2854                 preempt_enable_notrace();
2855         }
2856
2857         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2858                                             trace_ctx);
2859         /*
2860          * If tracing is off, but we have triggers enabled
2861          * we still need to look at the event data. Use the temp_buffer
2862          * to store the trace event for the trigger to use. It's recursive
2863          * safe and will not be recorded anywhere.
2864          */
2865         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2866                 *current_rb = temp_buffer;
2867                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2868                                                     trace_ctx);
2869         }
2870         return entry;
2871 }
2872 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2873
2874 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2875 static DEFINE_MUTEX(tracepoint_printk_mutex);
2876
2877 static void output_printk(struct trace_event_buffer *fbuffer)
2878 {
2879         struct trace_event_call *event_call;
2880         struct trace_event_file *file;
2881         struct trace_event *event;
2882         unsigned long flags;
2883         struct trace_iterator *iter = tracepoint_print_iter;
2884
2885         /* We should never get here if iter is NULL */
2886         if (WARN_ON_ONCE(!iter))
2887                 return;
2888
2889         event_call = fbuffer->trace_file->event_call;
2890         if (!event_call || !event_call->event.funcs ||
2891             !event_call->event.funcs->trace)
2892                 return;
2893
2894         file = fbuffer->trace_file;
2895         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2896             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2897              !filter_match_preds(file->filter, fbuffer->entry)))
2898                 return;
2899
2900         event = &fbuffer->trace_file->event_call->event;
2901
2902         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2903         trace_seq_init(&iter->seq);
2904         iter->ent = fbuffer->entry;
2905         event_call->event.funcs->trace(iter, 0, event);
2906         trace_seq_putc(&iter->seq, 0);
2907         printk("%s", iter->seq.buffer);
2908
2909         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2910 }
2911
2912 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2913                              void *buffer, size_t *lenp,
2914                              loff_t *ppos)
2915 {
2916         int save_tracepoint_printk;
2917         int ret;
2918
2919         mutex_lock(&tracepoint_printk_mutex);
2920         save_tracepoint_printk = tracepoint_printk;
2921
2922         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2923
2924         /*
2925          * This will force exiting early, as tracepoint_printk
2926          * is always zero when tracepoint_printk_iter is not allocated
2927          */
2928         if (!tracepoint_print_iter)
2929                 tracepoint_printk = 0;
2930
2931         if (save_tracepoint_printk == tracepoint_printk)
2932                 goto out;
2933
2934         if (tracepoint_printk)
2935                 static_key_enable(&tracepoint_printk_key.key);
2936         else
2937                 static_key_disable(&tracepoint_printk_key.key);
2938
2939  out:
2940         mutex_unlock(&tracepoint_printk_mutex);
2941
2942         return ret;
2943 }
2944
2945 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2946 {
2947         enum event_trigger_type tt = ETT_NONE;
2948         struct trace_event_file *file = fbuffer->trace_file;
2949
2950         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2951                         fbuffer->entry, &tt))
2952                 goto discard;
2953
2954         if (static_key_false(&tracepoint_printk_key.key))
2955                 output_printk(fbuffer);
2956
2957         if (static_branch_unlikely(&trace_event_exports_enabled))
2958                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2959
2960         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2961                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2962
2963 discard:
2964         if (tt)
2965                 event_triggers_post_call(file, tt);
2966
2967 }
2968 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2969
2970 /*
2971  * Skip 3:
2972  *
2973  *   trace_buffer_unlock_commit_regs()
2974  *   trace_event_buffer_commit()
2975  *   trace_event_raw_event_xxx()
2976  */
2977 # define STACK_SKIP 3
2978
2979 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2980                                      struct trace_buffer *buffer,
2981                                      struct ring_buffer_event *event,
2982                                      unsigned int trace_ctx,
2983                                      struct pt_regs *regs)
2984 {
2985         __buffer_unlock_commit(buffer, event);
2986
2987         /*
2988          * If regs is not set, then skip the necessary functions.
2989          * Note, we can still get here via blktrace, wakeup tracer
2990          * and mmiotrace, but that's ok if they lose a function or
2991          * two. They are not that meaningful.
2992          */
2993         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2994         ftrace_trace_userstack(tr, buffer, trace_ctx);
2995 }
2996
2997 /*
2998  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2999  */
3000 void
3001 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3002                                    struct ring_buffer_event *event)
3003 {
3004         __buffer_unlock_commit(buffer, event);
3005 }
3006
3007 void
3008 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3009                parent_ip, unsigned int trace_ctx)
3010 {
3011         struct trace_event_call *call = &event_function;
3012         struct trace_buffer *buffer = tr->array_buffer.buffer;
3013         struct ring_buffer_event *event;
3014         struct ftrace_entry *entry;
3015
3016         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3017                                             trace_ctx);
3018         if (!event)
3019                 return;
3020         entry   = ring_buffer_event_data(event);
3021         entry->ip                       = ip;
3022         entry->parent_ip                = parent_ip;
3023
3024         if (!call_filter_check_discard(call, entry, buffer, event)) {
3025                 if (static_branch_unlikely(&trace_function_exports_enabled))
3026                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3027                 __buffer_unlock_commit(buffer, event);
3028         }
3029 }
3030
3031 #ifdef CONFIG_STACKTRACE
3032
3033 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3034 #define FTRACE_KSTACK_NESTING   4
3035
3036 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3037
3038 struct ftrace_stack {
3039         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3040 };
3041
3042
3043 struct ftrace_stacks {
3044         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3045 };
3046
3047 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3048 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3049
3050 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3051                                  unsigned int trace_ctx,
3052                                  int skip, struct pt_regs *regs)
3053 {
3054         struct trace_event_call *call = &event_kernel_stack;
3055         struct ring_buffer_event *event;
3056         unsigned int size, nr_entries;
3057         struct ftrace_stack *fstack;
3058         struct stack_entry *entry;
3059         int stackidx;
3060
3061         /*
3062          * Add one, for this function and the call to save_stack_trace()
3063          * If regs is set, then these functions will not be in the way.
3064          */
3065 #ifndef CONFIG_UNWINDER_ORC
3066         if (!regs)
3067                 skip++;
3068 #endif
3069
3070         preempt_disable_notrace();
3071
3072         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3073
3074         /* This should never happen. If it does, yell once and skip */
3075         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3076                 goto out;
3077
3078         /*
3079          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3080          * interrupt will either see the value pre increment or post
3081          * increment. If the interrupt happens pre increment it will have
3082          * restored the counter when it returns.  We just need a barrier to
3083          * keep gcc from moving things around.
3084          */
3085         barrier();
3086
3087         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3088         size = ARRAY_SIZE(fstack->calls);
3089
3090         if (regs) {
3091                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3092                                                    size, skip);
3093         } else {
3094                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3095         }
3096
3097         size = nr_entries * sizeof(unsigned long);
3098         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3099                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3100                                     trace_ctx);
3101         if (!event)
3102                 goto out;
3103         entry = ring_buffer_event_data(event);
3104
3105         memcpy(&entry->caller, fstack->calls, size);
3106         entry->size = nr_entries;
3107
3108         if (!call_filter_check_discard(call, entry, buffer, event))
3109                 __buffer_unlock_commit(buffer, event);
3110
3111  out:
3112         /* Again, don't let gcc optimize things here */
3113         barrier();
3114         __this_cpu_dec(ftrace_stack_reserve);
3115         preempt_enable_notrace();
3116
3117 }
3118
3119 static inline void ftrace_trace_stack(struct trace_array *tr,
3120                                       struct trace_buffer *buffer,
3121                                       unsigned int trace_ctx,
3122                                       int skip, struct pt_regs *regs)
3123 {
3124         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3125                 return;
3126
3127         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3128 }
3129
3130 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3131                    int skip)
3132 {
3133         struct trace_buffer *buffer = tr->array_buffer.buffer;
3134
3135         if (rcu_is_watching()) {
3136                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3137                 return;
3138         }
3139
3140         /*
3141          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3142          * but if the above rcu_is_watching() failed, then the NMI
3143          * triggered someplace critical, and ct_irq_enter() should
3144          * not be called from NMI.
3145          */
3146         if (unlikely(in_nmi()))
3147                 return;
3148
3149         ct_irq_enter_irqson();
3150         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3151         ct_irq_exit_irqson();
3152 }
3153
3154 /**
3155  * trace_dump_stack - record a stack back trace in the trace buffer
3156  * @skip: Number of functions to skip (helper handlers)
3157  */
3158 void trace_dump_stack(int skip)
3159 {
3160         if (tracing_disabled || tracing_selftest_running)
3161                 return;
3162
3163 #ifndef CONFIG_UNWINDER_ORC
3164         /* Skip 1 to skip this function. */
3165         skip++;
3166 #endif
3167         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3168                              tracing_gen_ctx(), skip, NULL);
3169 }
3170 EXPORT_SYMBOL_GPL(trace_dump_stack);
3171
3172 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3173 static DEFINE_PER_CPU(int, user_stack_count);
3174
3175 static void
3176 ftrace_trace_userstack(struct trace_array *tr,
3177                        struct trace_buffer *buffer, unsigned int trace_ctx)
3178 {
3179         struct trace_event_call *call = &event_user_stack;
3180         struct ring_buffer_event *event;
3181         struct userstack_entry *entry;
3182
3183         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3184                 return;
3185
3186         /*
3187          * NMIs can not handle page faults, even with fix ups.
3188          * The save user stack can (and often does) fault.
3189          */
3190         if (unlikely(in_nmi()))
3191                 return;
3192
3193         /*
3194          * prevent recursion, since the user stack tracing may
3195          * trigger other kernel events.
3196          */
3197         preempt_disable();
3198         if (__this_cpu_read(user_stack_count))
3199                 goto out;
3200
3201         __this_cpu_inc(user_stack_count);
3202
3203         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3204                                             sizeof(*entry), trace_ctx);
3205         if (!event)
3206                 goto out_drop_count;
3207         entry   = ring_buffer_event_data(event);
3208
3209         entry->tgid             = current->tgid;
3210         memset(&entry->caller, 0, sizeof(entry->caller));
3211
3212         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3213         if (!call_filter_check_discard(call, entry, buffer, event))
3214                 __buffer_unlock_commit(buffer, event);
3215
3216  out_drop_count:
3217         __this_cpu_dec(user_stack_count);
3218  out:
3219         preempt_enable();
3220 }
3221 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3222 static void ftrace_trace_userstack(struct trace_array *tr,
3223                                    struct trace_buffer *buffer,
3224                                    unsigned int trace_ctx)
3225 {
3226 }
3227 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3228
3229 #endif /* CONFIG_STACKTRACE */
3230
3231 static inline void
3232 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3233                           unsigned long long delta)
3234 {
3235         entry->bottom_delta_ts = delta & U32_MAX;
3236         entry->top_delta_ts = (delta >> 32);
3237 }
3238
3239 void trace_last_func_repeats(struct trace_array *tr,
3240                              struct trace_func_repeats *last_info,
3241                              unsigned int trace_ctx)
3242 {
3243         struct trace_buffer *buffer = tr->array_buffer.buffer;
3244         struct func_repeats_entry *entry;
3245         struct ring_buffer_event *event;
3246         u64 delta;
3247
3248         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3249                                             sizeof(*entry), trace_ctx);
3250         if (!event)
3251                 return;
3252
3253         delta = ring_buffer_event_time_stamp(buffer, event) -
3254                 last_info->ts_last_call;
3255
3256         entry = ring_buffer_event_data(event);
3257         entry->ip = last_info->ip;
3258         entry->parent_ip = last_info->parent_ip;
3259         entry->count = last_info->count;
3260         func_repeats_set_delta_ts(entry, delta);
3261
3262         __buffer_unlock_commit(buffer, event);
3263 }
3264
3265 /* created for use with alloc_percpu */
3266 struct trace_buffer_struct {
3267         int nesting;
3268         char buffer[4][TRACE_BUF_SIZE];
3269 };
3270
3271 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3272
3273 /*
3274  * This allows for lockless recording.  If we're nested too deeply, then
3275  * this returns NULL.
3276  */
3277 static char *get_trace_buf(void)
3278 {
3279         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3280
3281         if (!trace_percpu_buffer || buffer->nesting >= 4)
3282                 return NULL;
3283
3284         buffer->nesting++;
3285
3286         /* Interrupts must see nesting incremented before we use the buffer */
3287         barrier();
3288         return &buffer->buffer[buffer->nesting - 1][0];
3289 }
3290
3291 static void put_trace_buf(void)
3292 {
3293         /* Don't let the decrement of nesting leak before this */
3294         barrier();
3295         this_cpu_dec(trace_percpu_buffer->nesting);
3296 }
3297
3298 static int alloc_percpu_trace_buffer(void)
3299 {
3300         struct trace_buffer_struct __percpu *buffers;
3301
3302         if (trace_percpu_buffer)
3303                 return 0;
3304
3305         buffers = alloc_percpu(struct trace_buffer_struct);
3306         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3307                 return -ENOMEM;
3308
3309         trace_percpu_buffer = buffers;
3310         return 0;
3311 }
3312
3313 static int buffers_allocated;
3314
3315 void trace_printk_init_buffers(void)
3316 {
3317         if (buffers_allocated)
3318                 return;
3319
3320         if (alloc_percpu_trace_buffer())
3321                 return;
3322
3323         /* trace_printk() is for debug use only. Don't use it in production. */
3324
3325         pr_warn("\n");
3326         pr_warn("**********************************************************\n");
3327         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3328         pr_warn("**                                                      **\n");
3329         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3330         pr_warn("**                                                      **\n");
3331         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3332         pr_warn("** unsafe for production use.                           **\n");
3333         pr_warn("**                                                      **\n");
3334         pr_warn("** If you see this message and you are not debugging    **\n");
3335         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3336         pr_warn("**                                                      **\n");
3337         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3338         pr_warn("**********************************************************\n");
3339
3340         /* Expand the buffers to set size */
3341         tracing_update_buffers();
3342
3343         buffers_allocated = 1;
3344
3345         /*
3346          * trace_printk_init_buffers() can be called by modules.
3347          * If that happens, then we need to start cmdline recording
3348          * directly here. If the global_trace.buffer is already
3349          * allocated here, then this was called by module code.
3350          */
3351         if (global_trace.array_buffer.buffer)
3352                 tracing_start_cmdline_record();
3353 }
3354 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3355
3356 void trace_printk_start_comm(void)
3357 {
3358         /* Start tracing comms if trace printk is set */
3359         if (!buffers_allocated)
3360                 return;
3361         tracing_start_cmdline_record();
3362 }
3363
3364 static void trace_printk_start_stop_comm(int enabled)
3365 {
3366         if (!buffers_allocated)
3367                 return;
3368
3369         if (enabled)
3370                 tracing_start_cmdline_record();
3371         else
3372                 tracing_stop_cmdline_record();
3373 }
3374
3375 /**
3376  * trace_vbprintk - write binary msg to tracing buffer
3377  * @ip:    The address of the caller
3378  * @fmt:   The string format to write to the buffer
3379  * @args:  Arguments for @fmt
3380  */
3381 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3382 {
3383         struct trace_event_call *call = &event_bprint;
3384         struct ring_buffer_event *event;
3385         struct trace_buffer *buffer;
3386         struct trace_array *tr = &global_trace;
3387         struct bprint_entry *entry;
3388         unsigned int trace_ctx;
3389         char *tbuffer;
3390         int len = 0, size;
3391
3392         if (unlikely(tracing_selftest_running || tracing_disabled))
3393                 return 0;
3394
3395         /* Don't pollute graph traces with trace_vprintk internals */
3396         pause_graph_tracing();
3397
3398         trace_ctx = tracing_gen_ctx();
3399         preempt_disable_notrace();
3400
3401         tbuffer = get_trace_buf();
3402         if (!tbuffer) {
3403                 len = 0;
3404                 goto out_nobuffer;
3405         }
3406
3407         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3408
3409         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3410                 goto out_put;
3411
3412         size = sizeof(*entry) + sizeof(u32) * len;
3413         buffer = tr->array_buffer.buffer;
3414         ring_buffer_nest_start(buffer);
3415         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3416                                             trace_ctx);
3417         if (!event)
3418                 goto out;
3419         entry = ring_buffer_event_data(event);
3420         entry->ip                       = ip;
3421         entry->fmt                      = fmt;
3422
3423         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3424         if (!call_filter_check_discard(call, entry, buffer, event)) {
3425                 __buffer_unlock_commit(buffer, event);
3426                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3427         }
3428
3429 out:
3430         ring_buffer_nest_end(buffer);
3431 out_put:
3432         put_trace_buf();
3433
3434 out_nobuffer:
3435         preempt_enable_notrace();
3436         unpause_graph_tracing();
3437
3438         return len;
3439 }
3440 EXPORT_SYMBOL_GPL(trace_vbprintk);
3441
3442 __printf(3, 0)
3443 static int
3444 __trace_array_vprintk(struct trace_buffer *buffer,
3445                       unsigned long ip, const char *fmt, va_list args)
3446 {
3447         struct trace_event_call *call = &event_print;
3448         struct ring_buffer_event *event;
3449         int len = 0, size;
3450         struct print_entry *entry;
3451         unsigned int trace_ctx;
3452         char *tbuffer;
3453
3454         if (tracing_disabled || tracing_selftest_running)
3455                 return 0;
3456
3457         /* Don't pollute graph traces with trace_vprintk internals */
3458         pause_graph_tracing();
3459
3460         trace_ctx = tracing_gen_ctx();
3461         preempt_disable_notrace();
3462
3463
3464         tbuffer = get_trace_buf();
3465         if (!tbuffer) {
3466                 len = 0;
3467                 goto out_nobuffer;
3468         }
3469
3470         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3471
3472         size = sizeof(*entry) + len + 1;
3473         ring_buffer_nest_start(buffer);
3474         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3475                                             trace_ctx);
3476         if (!event)
3477                 goto out;
3478         entry = ring_buffer_event_data(event);
3479         entry->ip = ip;
3480
3481         memcpy(&entry->buf, tbuffer, len + 1);
3482         if (!call_filter_check_discard(call, entry, buffer, event)) {
3483                 __buffer_unlock_commit(buffer, event);
3484                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3485         }
3486
3487 out:
3488         ring_buffer_nest_end(buffer);
3489         put_trace_buf();
3490
3491 out_nobuffer:
3492         preempt_enable_notrace();
3493         unpause_graph_tracing();
3494
3495         return len;
3496 }
3497
3498 __printf(3, 0)
3499 int trace_array_vprintk(struct trace_array *tr,
3500                         unsigned long ip, const char *fmt, va_list args)
3501 {
3502         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3503 }
3504
3505 /**
3506  * trace_array_printk - Print a message to a specific instance
3507  * @tr: The instance trace_array descriptor
3508  * @ip: The instruction pointer that this is called from.
3509  * @fmt: The format to print (printf format)
3510  *
3511  * If a subsystem sets up its own instance, they have the right to
3512  * printk strings into their tracing instance buffer using this
3513  * function. Note, this function will not write into the top level
3514  * buffer (use trace_printk() for that), as writing into the top level
3515  * buffer should only have events that can be individually disabled.
3516  * trace_printk() is only used for debugging a kernel, and should not
3517  * be ever incorporated in normal use.
3518  *
3519  * trace_array_printk() can be used, as it will not add noise to the
3520  * top level tracing buffer.
3521  *
3522  * Note, trace_array_init_printk() must be called on @tr before this
3523  * can be used.
3524  */
3525 __printf(3, 0)
3526 int trace_array_printk(struct trace_array *tr,
3527                        unsigned long ip, const char *fmt, ...)
3528 {
3529         int ret;
3530         va_list ap;
3531
3532         if (!tr)
3533                 return -ENOENT;
3534
3535         /* This is only allowed for created instances */
3536         if (tr == &global_trace)
3537                 return 0;
3538
3539         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3540                 return 0;
3541
3542         va_start(ap, fmt);
3543         ret = trace_array_vprintk(tr, ip, fmt, ap);
3544         va_end(ap);
3545         return ret;
3546 }
3547 EXPORT_SYMBOL_GPL(trace_array_printk);
3548
3549 /**
3550  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3551  * @tr: The trace array to initialize the buffers for
3552  *
3553  * As trace_array_printk() only writes into instances, they are OK to
3554  * have in the kernel (unlike trace_printk()). This needs to be called
3555  * before trace_array_printk() can be used on a trace_array.
3556  */
3557 int trace_array_init_printk(struct trace_array *tr)
3558 {
3559         if (!tr)
3560                 return -ENOENT;
3561
3562         /* This is only allowed for created instances */
3563         if (tr == &global_trace)
3564                 return -EINVAL;
3565
3566         return alloc_percpu_trace_buffer();
3567 }
3568 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3569
3570 __printf(3, 4)
3571 int trace_array_printk_buf(struct trace_buffer *buffer,
3572                            unsigned long ip, const char *fmt, ...)
3573 {
3574         int ret;
3575         va_list ap;
3576
3577         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3578                 return 0;
3579
3580         va_start(ap, fmt);
3581         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3582         va_end(ap);
3583         return ret;
3584 }
3585
3586 __printf(2, 0)
3587 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3588 {
3589         return trace_array_vprintk(&global_trace, ip, fmt, args);
3590 }
3591 EXPORT_SYMBOL_GPL(trace_vprintk);
3592
3593 static void trace_iterator_increment(struct trace_iterator *iter)
3594 {
3595         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3596
3597         iter->idx++;
3598         if (buf_iter)
3599                 ring_buffer_iter_advance(buf_iter);
3600 }
3601
3602 static struct trace_entry *
3603 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3604                 unsigned long *lost_events)
3605 {
3606         struct ring_buffer_event *event;
3607         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3608
3609         if (buf_iter) {
3610                 event = ring_buffer_iter_peek(buf_iter, ts);
3611                 if (lost_events)
3612                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3613                                 (unsigned long)-1 : 0;
3614         } else {
3615                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3616                                          lost_events);
3617         }
3618
3619         if (event) {
3620                 iter->ent_size = ring_buffer_event_length(event);
3621                 return ring_buffer_event_data(event);
3622         }
3623         iter->ent_size = 0;
3624         return NULL;
3625 }
3626
3627 static struct trace_entry *
3628 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3629                   unsigned long *missing_events, u64 *ent_ts)
3630 {
3631         struct trace_buffer *buffer = iter->array_buffer->buffer;
3632         struct trace_entry *ent, *next = NULL;
3633         unsigned long lost_events = 0, next_lost = 0;
3634         int cpu_file = iter->cpu_file;
3635         u64 next_ts = 0, ts;
3636         int next_cpu = -1;
3637         int next_size = 0;
3638         int cpu;
3639
3640         /*
3641          * If we are in a per_cpu trace file, don't bother by iterating over
3642          * all cpu and peek directly.
3643          */
3644         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3645                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3646                         return NULL;
3647                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3648                 if (ent_cpu)
3649                         *ent_cpu = cpu_file;
3650
3651                 return ent;
3652         }
3653
3654         for_each_tracing_cpu(cpu) {
3655
3656                 if (ring_buffer_empty_cpu(buffer, cpu))
3657                         continue;
3658
3659                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3660
3661                 /*
3662                  * Pick the entry with the smallest timestamp:
3663                  */
3664                 if (ent && (!next || ts < next_ts)) {
3665                         next = ent;
3666                         next_cpu = cpu;
3667                         next_ts = ts;
3668                         next_lost = lost_events;
3669                         next_size = iter->ent_size;
3670                 }
3671         }
3672
3673         iter->ent_size = next_size;
3674
3675         if (ent_cpu)
3676                 *ent_cpu = next_cpu;
3677
3678         if (ent_ts)
3679                 *ent_ts = next_ts;
3680
3681         if (missing_events)
3682                 *missing_events = next_lost;
3683
3684         return next;
3685 }
3686
3687 #define STATIC_FMT_BUF_SIZE     128
3688 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3689
3690 static char *trace_iter_expand_format(struct trace_iterator *iter)
3691 {
3692         char *tmp;
3693
3694         /*
3695          * iter->tr is NULL when used with tp_printk, which makes
3696          * this get called where it is not safe to call krealloc().
3697          */
3698         if (!iter->tr || iter->fmt == static_fmt_buf)
3699                 return NULL;
3700
3701         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3702                        GFP_KERNEL);
3703         if (tmp) {
3704                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3705                 iter->fmt = tmp;
3706         }
3707
3708         return tmp;
3709 }
3710
3711 /* Returns true if the string is safe to dereference from an event */
3712 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3713                            bool star, int len)
3714 {
3715         unsigned long addr = (unsigned long)str;
3716         struct trace_event *trace_event;
3717         struct trace_event_call *event;
3718
3719         /* Ignore strings with no length */
3720         if (star && !len)
3721                 return true;
3722
3723         /* OK if part of the event data */
3724         if ((addr >= (unsigned long)iter->ent) &&
3725             (addr < (unsigned long)iter->ent + iter->ent_size))
3726                 return true;
3727
3728         /* OK if part of the temp seq buffer */
3729         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3730             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3731                 return true;
3732
3733         /* Core rodata can not be freed */
3734         if (is_kernel_rodata(addr))
3735                 return true;
3736
3737         if (trace_is_tracepoint_string(str))
3738                 return true;
3739
3740         /*
3741          * Now this could be a module event, referencing core module
3742          * data, which is OK.
3743          */
3744         if (!iter->ent)
3745                 return false;
3746
3747         trace_event = ftrace_find_event(iter->ent->type);
3748         if (!trace_event)
3749                 return false;
3750
3751         event = container_of(trace_event, struct trace_event_call, event);
3752         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3753                 return false;
3754
3755         /* Would rather have rodata, but this will suffice */
3756         if (within_module_core(addr, event->module))
3757                 return true;
3758
3759         return false;
3760 }
3761
3762 static const char *show_buffer(struct trace_seq *s)
3763 {
3764         struct seq_buf *seq = &s->seq;
3765
3766         seq_buf_terminate(seq);
3767
3768         return seq->buffer;
3769 }
3770
3771 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3772
3773 static int test_can_verify_check(const char *fmt, ...)
3774 {
3775         char buf[16];
3776         va_list ap;
3777         int ret;
3778
3779         /*
3780          * The verifier is dependent on vsnprintf() modifies the va_list
3781          * passed to it, where it is sent as a reference. Some architectures
3782          * (like x86_32) passes it by value, which means that vsnprintf()
3783          * does not modify the va_list passed to it, and the verifier
3784          * would then need to be able to understand all the values that
3785          * vsnprintf can use. If it is passed by value, then the verifier
3786          * is disabled.
3787          */
3788         va_start(ap, fmt);
3789         vsnprintf(buf, 16, "%d", ap);
3790         ret = va_arg(ap, int);
3791         va_end(ap);
3792
3793         return ret;
3794 }
3795
3796 static void test_can_verify(void)
3797 {
3798         if (!test_can_verify_check("%d %d", 0, 1)) {
3799                 pr_info("trace event string verifier disabled\n");
3800                 static_branch_inc(&trace_no_verify);
3801         }
3802 }
3803
3804 /**
3805  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3806  * @iter: The iterator that holds the seq buffer and the event being printed
3807  * @fmt: The format used to print the event
3808  * @ap: The va_list holding the data to print from @fmt.
3809  *
3810  * This writes the data into the @iter->seq buffer using the data from
3811  * @fmt and @ap. If the format has a %s, then the source of the string
3812  * is examined to make sure it is safe to print, otherwise it will
3813  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3814  * pointer.
3815  */
3816 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3817                          va_list ap)
3818 {
3819         const char *p = fmt;
3820         const char *str;
3821         int i, j;
3822
3823         if (WARN_ON_ONCE(!fmt))
3824                 return;
3825
3826         if (static_branch_unlikely(&trace_no_verify))
3827                 goto print;
3828
3829         /* Don't bother checking when doing a ftrace_dump() */
3830         if (iter->fmt == static_fmt_buf)
3831                 goto print;
3832
3833         while (*p) {
3834                 bool star = false;
3835                 int len = 0;
3836
3837                 j = 0;
3838
3839                 /* We only care about %s and variants */
3840                 for (i = 0; p[i]; i++) {
3841                         if (i + 1 >= iter->fmt_size) {
3842                                 /*
3843                                  * If we can't expand the copy buffer,
3844                                  * just print it.
3845                                  */
3846                                 if (!trace_iter_expand_format(iter))
3847                                         goto print;
3848                         }
3849
3850                         if (p[i] == '\\' && p[i+1]) {
3851                                 i++;
3852                                 continue;
3853                         }
3854                         if (p[i] == '%') {
3855                                 /* Need to test cases like %08.*s */
3856                                 for (j = 1; p[i+j]; j++) {
3857                                         if (isdigit(p[i+j]) ||
3858                                             p[i+j] == '.')
3859                                                 continue;
3860                                         if (p[i+j] == '*') {
3861                                                 star = true;
3862                                                 continue;
3863                                         }
3864                                         break;
3865                                 }
3866                                 if (p[i+j] == 's')
3867                                         break;
3868                                 star = false;
3869                         }
3870                         j = 0;
3871                 }
3872                 /* If no %s found then just print normally */
3873                 if (!p[i])
3874                         break;
3875
3876                 /* Copy up to the %s, and print that */
3877                 strncpy(iter->fmt, p, i);
3878                 iter->fmt[i] = '\0';
3879                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3880
3881                 /*
3882                  * If iter->seq is full, the above call no longer guarantees
3883                  * that ap is in sync with fmt processing, and further calls
3884                  * to va_arg() can return wrong positional arguments.
3885                  *
3886                  * Ensure that ap is no longer used in this case.
3887                  */
3888                 if (iter->seq.full) {
3889                         p = "";
3890                         break;
3891                 }
3892
3893                 if (star)
3894                         len = va_arg(ap, int);
3895
3896                 /* The ap now points to the string data of the %s */
3897                 str = va_arg(ap, const char *);
3898
3899                 /*
3900                  * If you hit this warning, it is likely that the
3901                  * trace event in question used %s on a string that
3902                  * was saved at the time of the event, but may not be
3903                  * around when the trace is read. Use __string(),
3904                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3905                  * instead. See samples/trace_events/trace-events-sample.h
3906                  * for reference.
3907                  */
3908                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3909                               "fmt: '%s' current_buffer: '%s'",
3910                               fmt, show_buffer(&iter->seq))) {
3911                         int ret;
3912
3913                         /* Try to safely read the string */
3914                         if (star) {
3915                                 if (len + 1 > iter->fmt_size)
3916                                         len = iter->fmt_size - 1;
3917                                 if (len < 0)
3918                                         len = 0;
3919                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3920                                 iter->fmt[len] = 0;
3921                                 star = false;
3922                         } else {
3923                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3924                                                                   iter->fmt_size);
3925                         }
3926                         if (ret < 0)
3927                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3928                         else
3929                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3930                                                  str, iter->fmt);
3931                         str = "[UNSAFE-MEMORY]";
3932                         strcpy(iter->fmt, "%s");
3933                 } else {
3934                         strncpy(iter->fmt, p + i, j + 1);
3935                         iter->fmt[j+1] = '\0';
3936                 }
3937                 if (star)
3938                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3939                 else
3940                         trace_seq_printf(&iter->seq, iter->fmt, str);
3941
3942                 p += i + j + 1;
3943         }
3944  print:
3945         if (*p)
3946                 trace_seq_vprintf(&iter->seq, p, ap);
3947 }
3948
3949 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3950 {
3951         const char *p, *new_fmt;
3952         char *q;
3953
3954         if (WARN_ON_ONCE(!fmt))
3955                 return fmt;
3956
3957         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3958                 return fmt;
3959
3960         p = fmt;
3961         new_fmt = q = iter->fmt;
3962         while (*p) {
3963                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3964                         if (!trace_iter_expand_format(iter))
3965                                 return fmt;
3966
3967                         q += iter->fmt - new_fmt;
3968                         new_fmt = iter->fmt;
3969                 }
3970
3971                 *q++ = *p++;
3972
3973                 /* Replace %p with %px */
3974                 if (p[-1] == '%') {
3975                         if (p[0] == '%') {
3976                                 *q++ = *p++;
3977                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3978                                 *q++ = *p++;
3979                                 *q++ = 'x';
3980                         }
3981                 }
3982         }
3983         *q = '\0';
3984
3985         return new_fmt;
3986 }
3987
3988 #define STATIC_TEMP_BUF_SIZE    128
3989 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3990
3991 /* Find the next real entry, without updating the iterator itself */
3992 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3993                                           int *ent_cpu, u64 *ent_ts)
3994 {
3995         /* __find_next_entry will reset ent_size */
3996         int ent_size = iter->ent_size;
3997         struct trace_entry *entry;
3998
3999         /*
4000          * If called from ftrace_dump(), then the iter->temp buffer
4001          * will be the static_temp_buf and not created from kmalloc.
4002          * If the entry size is greater than the buffer, we can
4003          * not save it. Just return NULL in that case. This is only
4004          * used to add markers when two consecutive events' time
4005          * stamps have a large delta. See trace_print_lat_context()
4006          */
4007         if (iter->temp == static_temp_buf &&
4008             STATIC_TEMP_BUF_SIZE < ent_size)
4009                 return NULL;
4010
4011         /*
4012          * The __find_next_entry() may call peek_next_entry(), which may
4013          * call ring_buffer_peek() that may make the contents of iter->ent
4014          * undefined. Need to copy iter->ent now.
4015          */
4016         if (iter->ent && iter->ent != iter->temp) {
4017                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4018                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4019                         void *temp;
4020                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
4021                         if (!temp)
4022                                 return NULL;
4023                         kfree(iter->temp);
4024                         iter->temp = temp;
4025                         iter->temp_size = iter->ent_size;
4026                 }
4027                 memcpy(iter->temp, iter->ent, iter->ent_size);
4028                 iter->ent = iter->temp;
4029         }
4030         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4031         /* Put back the original ent_size */
4032         iter->ent_size = ent_size;
4033
4034         return entry;
4035 }
4036
4037 /* Find the next real entry, and increment the iterator to the next entry */
4038 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4039 {
4040         iter->ent = __find_next_entry(iter, &iter->cpu,
4041                                       &iter->lost_events, &iter->ts);
4042
4043         if (iter->ent)
4044                 trace_iterator_increment(iter);
4045
4046         return iter->ent ? iter : NULL;
4047 }
4048
4049 static void trace_consume(struct trace_iterator *iter)
4050 {
4051         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4052                             &iter->lost_events);
4053 }
4054
4055 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4056 {
4057         struct trace_iterator *iter = m->private;
4058         int i = (int)*pos;
4059         void *ent;
4060
4061         WARN_ON_ONCE(iter->leftover);
4062
4063         (*pos)++;
4064
4065         /* can't go backwards */
4066         if (iter->idx > i)
4067                 return NULL;
4068
4069         if (iter->idx < 0)
4070                 ent = trace_find_next_entry_inc(iter);
4071         else
4072                 ent = iter;
4073
4074         while (ent && iter->idx < i)
4075                 ent = trace_find_next_entry_inc(iter);
4076
4077         iter->pos = *pos;
4078
4079         return ent;
4080 }
4081
4082 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4083 {
4084         struct ring_buffer_iter *buf_iter;
4085         unsigned long entries = 0;
4086         u64 ts;
4087
4088         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4089
4090         buf_iter = trace_buffer_iter(iter, cpu);
4091         if (!buf_iter)
4092                 return;
4093
4094         ring_buffer_iter_reset(buf_iter);
4095
4096         /*
4097          * We could have the case with the max latency tracers
4098          * that a reset never took place on a cpu. This is evident
4099          * by the timestamp being before the start of the buffer.
4100          */
4101         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4102                 if (ts >= iter->array_buffer->time_start)
4103                         break;
4104                 entries++;
4105                 ring_buffer_iter_advance(buf_iter);
4106         }
4107
4108         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4109 }
4110
4111 /*
4112  * The current tracer is copied to avoid a global locking
4113  * all around.
4114  */
4115 static void *s_start(struct seq_file *m, loff_t *pos)
4116 {
4117         struct trace_iterator *iter = m->private;
4118         struct trace_array *tr = iter->tr;
4119         int cpu_file = iter->cpu_file;
4120         void *p = NULL;
4121         loff_t l = 0;
4122         int cpu;
4123
4124         /*
4125          * copy the tracer to avoid using a global lock all around.
4126          * iter->trace is a copy of current_trace, the pointer to the
4127          * name may be used instead of a strcmp(), as iter->trace->name
4128          * will point to the same string as current_trace->name.
4129          */
4130         mutex_lock(&trace_types_lock);
4131         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) {
4132                 /* Close iter->trace before switching to the new current tracer */
4133                 if (iter->trace->close)
4134                         iter->trace->close(iter);
4135                 *iter->trace = *tr->current_trace;
4136                 /* Reopen the new current tracer */
4137                 if (iter->trace->open)
4138                         iter->trace->open(iter);
4139         }
4140         mutex_unlock(&trace_types_lock);
4141
4142 #ifdef CONFIG_TRACER_MAX_TRACE
4143         if (iter->snapshot && iter->trace->use_max_tr)
4144                 return ERR_PTR(-EBUSY);
4145 #endif
4146
4147         if (*pos != iter->pos) {
4148                 iter->ent = NULL;
4149                 iter->cpu = 0;
4150                 iter->idx = -1;
4151
4152                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4153                         for_each_tracing_cpu(cpu)
4154                                 tracing_iter_reset(iter, cpu);
4155                 } else
4156                         tracing_iter_reset(iter, cpu_file);
4157
4158                 iter->leftover = 0;
4159                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4160                         ;
4161
4162         } else {
4163                 /*
4164                  * If we overflowed the seq_file before, then we want
4165                  * to just reuse the trace_seq buffer again.
4166                  */
4167                 if (iter->leftover)
4168                         p = iter;
4169                 else {
4170                         l = *pos - 1;
4171                         p = s_next(m, p, &l);
4172                 }
4173         }
4174
4175         trace_event_read_lock();
4176         trace_access_lock(cpu_file);
4177         return p;
4178 }
4179
4180 static void s_stop(struct seq_file *m, void *p)
4181 {
4182         struct trace_iterator *iter = m->private;
4183
4184 #ifdef CONFIG_TRACER_MAX_TRACE
4185         if (iter->snapshot && iter->trace->use_max_tr)
4186                 return;
4187 #endif
4188
4189         trace_access_unlock(iter->cpu_file);
4190         trace_event_read_unlock();
4191 }
4192
4193 static void
4194 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4195                       unsigned long *entries, int cpu)
4196 {
4197         unsigned long count;
4198
4199         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4200         /*
4201          * If this buffer has skipped entries, then we hold all
4202          * entries for the trace and we need to ignore the
4203          * ones before the time stamp.
4204          */
4205         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4206                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4207                 /* total is the same as the entries */
4208                 *total = count;
4209         } else
4210                 *total = count +
4211                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4212         *entries = count;
4213 }
4214
4215 static void
4216 get_total_entries(struct array_buffer *buf,
4217                   unsigned long *total, unsigned long *entries)
4218 {
4219         unsigned long t, e;
4220         int cpu;
4221
4222         *total = 0;
4223         *entries = 0;
4224
4225         for_each_tracing_cpu(cpu) {
4226                 get_total_entries_cpu(buf, &t, &e, cpu);
4227                 *total += t;
4228                 *entries += e;
4229         }
4230 }
4231
4232 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4233 {
4234         unsigned long total, entries;
4235
4236         if (!tr)
4237                 tr = &global_trace;
4238
4239         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4240
4241         return entries;
4242 }
4243
4244 unsigned long trace_total_entries(struct trace_array *tr)
4245 {
4246         unsigned long total, entries;
4247
4248         if (!tr)
4249                 tr = &global_trace;
4250
4251         get_total_entries(&tr->array_buffer, &total, &entries);
4252
4253         return entries;
4254 }
4255
4256 static void print_lat_help_header(struct seq_file *m)
4257 {
4258         seq_puts(m, "#                    _------=> CPU#            \n"
4259                     "#                   / _-----=> irqs-off/BH-disabled\n"
4260                     "#                  | / _----=> need-resched    \n"
4261                     "#                  || / _---=> hardirq/softirq \n"
4262                     "#                  ||| / _--=> preempt-depth   \n"
4263                     "#                  |||| / _-=> migrate-disable \n"
4264                     "#                  ||||| /     delay           \n"
4265                     "#  cmd     pid     |||||| time  |   caller     \n"
4266                     "#     \\   /        ||||||  \\    |    /       \n");
4267 }
4268
4269 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4270 {
4271         unsigned long total;
4272         unsigned long entries;
4273
4274         get_total_entries(buf, &total, &entries);
4275         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4276                    entries, total, num_online_cpus());
4277         seq_puts(m, "#\n");
4278 }
4279
4280 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4281                                    unsigned int flags)
4282 {
4283         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4284
4285         print_event_info(buf, m);
4286
4287         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4288         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4289 }
4290
4291 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4292                                        unsigned int flags)
4293 {
4294         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4295         static const char space[] = "            ";
4296         int prec = tgid ? 12 : 2;
4297
4298         print_event_info(buf, m);
4299
4300         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4301         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4302         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4303         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4304         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4305         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4306         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4307         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4308 }
4309
4310 void
4311 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4312 {
4313         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4314         struct array_buffer *buf = iter->array_buffer;
4315         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4316         struct tracer *type = iter->trace;
4317         unsigned long entries;
4318         unsigned long total;
4319         const char *name = type->name;
4320
4321         get_total_entries(buf, &total, &entries);
4322
4323         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4324                    name, UTS_RELEASE);
4325         seq_puts(m, "# -----------------------------------"
4326                  "---------------------------------\n");
4327         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4328                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4329                    nsecs_to_usecs(data->saved_latency),
4330                    entries,
4331                    total,
4332                    buf->cpu,
4333                    preempt_model_none()      ? "server" :
4334                    preempt_model_voluntary() ? "desktop" :
4335                    preempt_model_full()      ? "preempt" :
4336                    preempt_model_rt()        ? "preempt_rt" :
4337                    "unknown",
4338                    /* These are reserved for later use */
4339                    0, 0, 0, 0);
4340 #ifdef CONFIG_SMP
4341         seq_printf(m, " #P:%d)\n", num_online_cpus());
4342 #else
4343         seq_puts(m, ")\n");
4344 #endif
4345         seq_puts(m, "#    -----------------\n");
4346         seq_printf(m, "#    | task: %.16s-%d "
4347                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4348                    data->comm, data->pid,
4349                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4350                    data->policy, data->rt_priority);
4351         seq_puts(m, "#    -----------------\n");
4352
4353         if (data->critical_start) {
4354                 seq_puts(m, "#  => started at: ");
4355                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4356                 trace_print_seq(m, &iter->seq);
4357                 seq_puts(m, "\n#  => ended at:   ");
4358                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4359                 trace_print_seq(m, &iter->seq);
4360                 seq_puts(m, "\n#\n");
4361         }
4362
4363         seq_puts(m, "#\n");
4364 }
4365
4366 static void test_cpu_buff_start(struct trace_iterator *iter)
4367 {
4368         struct trace_seq *s = &iter->seq;
4369         struct trace_array *tr = iter->tr;
4370
4371         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4372                 return;
4373
4374         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4375                 return;
4376
4377         if (cpumask_available(iter->started) &&
4378             cpumask_test_cpu(iter->cpu, iter->started))
4379                 return;
4380
4381         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4382                 return;
4383
4384         if (cpumask_available(iter->started))
4385                 cpumask_set_cpu(iter->cpu, iter->started);
4386
4387         /* Don't print started cpu buffer for the first entry of the trace */
4388         if (iter->idx > 1)
4389                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4390                                 iter->cpu);
4391 }
4392
4393 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4394 {
4395         struct trace_array *tr = iter->tr;
4396         struct trace_seq *s = &iter->seq;
4397         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4398         struct trace_entry *entry;
4399         struct trace_event *event;
4400
4401         entry = iter->ent;
4402
4403         test_cpu_buff_start(iter);
4404
4405         event = ftrace_find_event(entry->type);
4406
4407         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4408                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4409                         trace_print_lat_context(iter);
4410                 else
4411                         trace_print_context(iter);
4412         }
4413
4414         if (trace_seq_has_overflowed(s))
4415                 return TRACE_TYPE_PARTIAL_LINE;
4416
4417         if (event)
4418                 return event->funcs->trace(iter, sym_flags, event);
4419
4420         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4421
4422         return trace_handle_return(s);
4423 }
4424
4425 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4426 {
4427         struct trace_array *tr = iter->tr;
4428         struct trace_seq *s = &iter->seq;
4429         struct trace_entry *entry;
4430         struct trace_event *event;
4431
4432         entry = iter->ent;
4433
4434         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4435                 trace_seq_printf(s, "%d %d %llu ",
4436                                  entry->pid, iter->cpu, iter->ts);
4437
4438         if (trace_seq_has_overflowed(s))
4439                 return TRACE_TYPE_PARTIAL_LINE;
4440
4441         event = ftrace_find_event(entry->type);
4442         if (event)
4443                 return event->funcs->raw(iter, 0, event);
4444
4445         trace_seq_printf(s, "%d ?\n", entry->type);
4446
4447         return trace_handle_return(s);
4448 }
4449
4450 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4451 {
4452         struct trace_array *tr = iter->tr;
4453         struct trace_seq *s = &iter->seq;
4454         unsigned char newline = '\n';
4455         struct trace_entry *entry;
4456         struct trace_event *event;
4457
4458         entry = iter->ent;
4459
4460         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4461                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4462                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4463                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4464                 if (trace_seq_has_overflowed(s))
4465                         return TRACE_TYPE_PARTIAL_LINE;
4466         }
4467
4468         event = ftrace_find_event(entry->type);
4469         if (event) {
4470                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4471                 if (ret != TRACE_TYPE_HANDLED)
4472                         return ret;
4473         }
4474
4475         SEQ_PUT_FIELD(s, newline);
4476
4477         return trace_handle_return(s);
4478 }
4479
4480 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4481 {
4482         struct trace_array *tr = iter->tr;
4483         struct trace_seq *s = &iter->seq;
4484         struct trace_entry *entry;
4485         struct trace_event *event;
4486
4487         entry = iter->ent;
4488
4489         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4490                 SEQ_PUT_FIELD(s, entry->pid);
4491                 SEQ_PUT_FIELD(s, iter->cpu);
4492                 SEQ_PUT_FIELD(s, iter->ts);
4493                 if (trace_seq_has_overflowed(s))
4494                         return TRACE_TYPE_PARTIAL_LINE;
4495         }
4496
4497         event = ftrace_find_event(entry->type);
4498         return event ? event->funcs->binary(iter, 0, event) :
4499                 TRACE_TYPE_HANDLED;
4500 }
4501
4502 int trace_empty(struct trace_iterator *iter)
4503 {
4504         struct ring_buffer_iter *buf_iter;
4505         int cpu;
4506
4507         /* If we are looking at one CPU buffer, only check that one */
4508         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4509                 cpu = iter->cpu_file;
4510                 buf_iter = trace_buffer_iter(iter, cpu);
4511                 if (buf_iter) {
4512                         if (!ring_buffer_iter_empty(buf_iter))
4513                                 return 0;
4514                 } else {
4515                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4516                                 return 0;
4517                 }
4518                 return 1;
4519         }
4520
4521         for_each_tracing_cpu(cpu) {
4522                 buf_iter = trace_buffer_iter(iter, cpu);
4523                 if (buf_iter) {
4524                         if (!ring_buffer_iter_empty(buf_iter))
4525                                 return 0;
4526                 } else {
4527                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4528                                 return 0;
4529                 }
4530         }
4531
4532         return 1;
4533 }
4534
4535 /*  Called with trace_event_read_lock() held. */
4536 enum print_line_t print_trace_line(struct trace_iterator *iter)
4537 {
4538         struct trace_array *tr = iter->tr;
4539         unsigned long trace_flags = tr->trace_flags;
4540         enum print_line_t ret;
4541
4542         if (iter->lost_events) {
4543                 if (iter->lost_events == (unsigned long)-1)
4544                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4545                                          iter->cpu);
4546                 else
4547                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4548                                          iter->cpu, iter->lost_events);
4549                 if (trace_seq_has_overflowed(&iter->seq))
4550                         return TRACE_TYPE_PARTIAL_LINE;
4551         }
4552
4553         if (iter->trace && iter->trace->print_line) {
4554                 ret = iter->trace->print_line(iter);
4555                 if (ret != TRACE_TYPE_UNHANDLED)
4556                         return ret;
4557         }
4558
4559         if (iter->ent->type == TRACE_BPUTS &&
4560                         trace_flags & TRACE_ITER_PRINTK &&
4561                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4562                 return trace_print_bputs_msg_only(iter);
4563
4564         if (iter->ent->type == TRACE_BPRINT &&
4565                         trace_flags & TRACE_ITER_PRINTK &&
4566                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4567                 return trace_print_bprintk_msg_only(iter);
4568
4569         if (iter->ent->type == TRACE_PRINT &&
4570                         trace_flags & TRACE_ITER_PRINTK &&
4571                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4572                 return trace_print_printk_msg_only(iter);
4573
4574         if (trace_flags & TRACE_ITER_BIN)
4575                 return print_bin_fmt(iter);
4576
4577         if (trace_flags & TRACE_ITER_HEX)
4578                 return print_hex_fmt(iter);
4579
4580         if (trace_flags & TRACE_ITER_RAW)
4581                 return print_raw_fmt(iter);
4582
4583         return print_trace_fmt(iter);
4584 }
4585
4586 void trace_latency_header(struct seq_file *m)
4587 {
4588         struct trace_iterator *iter = m->private;
4589         struct trace_array *tr = iter->tr;
4590
4591         /* print nothing if the buffers are empty */
4592         if (trace_empty(iter))
4593                 return;
4594
4595         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4596                 print_trace_header(m, iter);
4597
4598         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4599                 print_lat_help_header(m);
4600 }
4601
4602 void trace_default_header(struct seq_file *m)
4603 {
4604         struct trace_iterator *iter = m->private;
4605         struct trace_array *tr = iter->tr;
4606         unsigned long trace_flags = tr->trace_flags;
4607
4608         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4609                 return;
4610
4611         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4612                 /* print nothing if the buffers are empty */
4613                 if (trace_empty(iter))
4614                         return;
4615                 print_trace_header(m, iter);
4616                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4617                         print_lat_help_header(m);
4618         } else {
4619                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4620                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4621                                 print_func_help_header_irq(iter->array_buffer,
4622                                                            m, trace_flags);
4623                         else
4624                                 print_func_help_header(iter->array_buffer, m,
4625                                                        trace_flags);
4626                 }
4627         }
4628 }
4629
4630 static void test_ftrace_alive(struct seq_file *m)
4631 {
4632         if (!ftrace_is_dead())
4633                 return;
4634         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4635                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4636 }
4637
4638 #ifdef CONFIG_TRACER_MAX_TRACE
4639 static void show_snapshot_main_help(struct seq_file *m)
4640 {
4641         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4642                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4643                     "#                      Takes a snapshot of the main buffer.\n"
4644                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4645                     "#                      (Doesn't have to be '2' works with any number that\n"
4646                     "#                       is not a '0' or '1')\n");
4647 }
4648
4649 static void show_snapshot_percpu_help(struct seq_file *m)
4650 {
4651         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4652 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4653         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4654                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4655 #else
4656         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4657                     "#                     Must use main snapshot file to allocate.\n");
4658 #endif
4659         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4660                     "#                      (Doesn't have to be '2' works with any number that\n"
4661                     "#                       is not a '0' or '1')\n");
4662 }
4663
4664 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4665 {
4666         if (iter->tr->allocated_snapshot)
4667                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4668         else
4669                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4670
4671         seq_puts(m, "# Snapshot commands:\n");
4672         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4673                 show_snapshot_main_help(m);
4674         else
4675                 show_snapshot_percpu_help(m);
4676 }
4677 #else
4678 /* Should never be called */
4679 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4680 #endif
4681
4682 static int s_show(struct seq_file *m, void *v)
4683 {
4684         struct trace_iterator *iter = v;
4685         int ret;
4686
4687         if (iter->ent == NULL) {
4688                 if (iter->tr) {
4689                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4690                         seq_puts(m, "#\n");
4691                         test_ftrace_alive(m);
4692                 }
4693                 if (iter->snapshot && trace_empty(iter))
4694                         print_snapshot_help(m, iter);
4695                 else if (iter->trace && iter->trace->print_header)
4696                         iter->trace->print_header(m);
4697                 else
4698                         trace_default_header(m);
4699
4700         } else if (iter->leftover) {
4701                 /*
4702                  * If we filled the seq_file buffer earlier, we
4703                  * want to just show it now.
4704                  */
4705                 ret = trace_print_seq(m, &iter->seq);
4706
4707                 /* ret should this time be zero, but you never know */
4708                 iter->leftover = ret;
4709
4710         } else {
4711                 print_trace_line(iter);
4712                 ret = trace_print_seq(m, &iter->seq);
4713                 /*
4714                  * If we overflow the seq_file buffer, then it will
4715                  * ask us for this data again at start up.
4716                  * Use that instead.
4717                  *  ret is 0 if seq_file write succeeded.
4718                  *        -1 otherwise.
4719                  */
4720                 iter->leftover = ret;
4721         }
4722
4723         return 0;
4724 }
4725
4726 /*
4727  * Should be used after trace_array_get(), trace_types_lock
4728  * ensures that i_cdev was already initialized.
4729  */
4730 static inline int tracing_get_cpu(struct inode *inode)
4731 {
4732         if (inode->i_cdev) /* See trace_create_cpu_file() */
4733                 return (long)inode->i_cdev - 1;
4734         return RING_BUFFER_ALL_CPUS;
4735 }
4736
4737 static const struct seq_operations tracer_seq_ops = {
4738         .start          = s_start,
4739         .next           = s_next,
4740         .stop           = s_stop,
4741         .show           = s_show,
4742 };
4743
4744 static struct trace_iterator *
4745 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4746 {
4747         struct trace_array *tr = inode->i_private;
4748         struct trace_iterator *iter;
4749         int cpu;
4750
4751         if (tracing_disabled)
4752                 return ERR_PTR(-ENODEV);
4753
4754         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4755         if (!iter)
4756                 return ERR_PTR(-ENOMEM);
4757
4758         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4759                                     GFP_KERNEL);
4760         if (!iter->buffer_iter)
4761                 goto release;
4762
4763         /*
4764          * trace_find_next_entry() may need to save off iter->ent.
4765          * It will place it into the iter->temp buffer. As most
4766          * events are less than 128, allocate a buffer of that size.
4767          * If one is greater, then trace_find_next_entry() will
4768          * allocate a new buffer to adjust for the bigger iter->ent.
4769          * It's not critical if it fails to get allocated here.
4770          */
4771         iter->temp = kmalloc(128, GFP_KERNEL);
4772         if (iter->temp)
4773                 iter->temp_size = 128;
4774
4775         /*
4776          * trace_event_printf() may need to modify given format
4777          * string to replace %p with %px so that it shows real address
4778          * instead of hash value. However, that is only for the event
4779          * tracing, other tracer may not need. Defer the allocation
4780          * until it is needed.
4781          */
4782         iter->fmt = NULL;
4783         iter->fmt_size = 0;
4784
4785         /*
4786          * We make a copy of the current tracer to avoid concurrent
4787          * changes on it while we are reading.
4788          */
4789         mutex_lock(&trace_types_lock);
4790         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4791         if (!iter->trace)
4792                 goto fail;
4793
4794         *iter->trace = *tr->current_trace;
4795
4796         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4797                 goto fail;
4798
4799         iter->tr = tr;
4800
4801 #ifdef CONFIG_TRACER_MAX_TRACE
4802         /* Currently only the top directory has a snapshot */
4803         if (tr->current_trace->print_max || snapshot)
4804                 iter->array_buffer = &tr->max_buffer;
4805         else
4806 #endif
4807                 iter->array_buffer = &tr->array_buffer;
4808         iter->snapshot = snapshot;
4809         iter->pos = -1;
4810         iter->cpu_file = tracing_get_cpu(inode);
4811         mutex_init(&iter->mutex);
4812
4813         /* Notify the tracer early; before we stop tracing. */
4814         if (iter->trace->open)
4815                 iter->trace->open(iter);
4816
4817         /* Annotate start of buffers if we had overruns */
4818         if (ring_buffer_overruns(iter->array_buffer->buffer))
4819                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4820
4821         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4822         if (trace_clocks[tr->clock_id].in_ns)
4823                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4824
4825         /*
4826          * If pause-on-trace is enabled, then stop the trace while
4827          * dumping, unless this is the "snapshot" file
4828          */
4829         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4830                 tracing_stop_tr(tr);
4831
4832         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4833                 for_each_tracing_cpu(cpu) {
4834                         iter->buffer_iter[cpu] =
4835                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4836                                                          cpu, GFP_KERNEL);
4837                 }
4838                 ring_buffer_read_prepare_sync();
4839                 for_each_tracing_cpu(cpu) {
4840                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4841                         tracing_iter_reset(iter, cpu);
4842                 }
4843         } else {
4844                 cpu = iter->cpu_file;
4845                 iter->buffer_iter[cpu] =
4846                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4847                                                  cpu, GFP_KERNEL);
4848                 ring_buffer_read_prepare_sync();
4849                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4850                 tracing_iter_reset(iter, cpu);
4851         }
4852
4853         mutex_unlock(&trace_types_lock);
4854
4855         return iter;
4856
4857  fail:
4858         mutex_unlock(&trace_types_lock);
4859         kfree(iter->trace);
4860         kfree(iter->temp);
4861         kfree(iter->buffer_iter);
4862 release:
4863         seq_release_private(inode, file);
4864         return ERR_PTR(-ENOMEM);
4865 }
4866
4867 int tracing_open_generic(struct inode *inode, struct file *filp)
4868 {
4869         int ret;
4870
4871         ret = tracing_check_open_get_tr(NULL);
4872         if (ret)
4873                 return ret;
4874
4875         filp->private_data = inode->i_private;
4876         return 0;
4877 }
4878
4879 bool tracing_is_disabled(void)
4880 {
4881         return (tracing_disabled) ? true: false;
4882 }
4883
4884 /*
4885  * Open and update trace_array ref count.
4886  * Must have the current trace_array passed to it.
4887  */
4888 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4889 {
4890         struct trace_array *tr = inode->i_private;
4891         int ret;
4892
4893         ret = tracing_check_open_get_tr(tr);
4894         if (ret)
4895                 return ret;
4896
4897         filp->private_data = inode->i_private;
4898
4899         return 0;
4900 }
4901
4902 /*
4903  * The private pointer of the inode is the trace_event_file.
4904  * Update the tr ref count associated to it.
4905  */
4906 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4907 {
4908         struct trace_event_file *file = inode->i_private;
4909         int ret;
4910
4911         ret = tracing_check_open_get_tr(file->tr);
4912         if (ret)
4913                 return ret;
4914
4915         filp->private_data = inode->i_private;
4916
4917         return 0;
4918 }
4919
4920 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4921 {
4922         struct trace_event_file *file = inode->i_private;
4923
4924         trace_array_put(file->tr);
4925
4926         return 0;
4927 }
4928
4929 static int tracing_mark_open(struct inode *inode, struct file *filp)
4930 {
4931         stream_open(inode, filp);
4932         return tracing_open_generic_tr(inode, filp);
4933 }
4934
4935 static int tracing_release(struct inode *inode, struct file *file)
4936 {
4937         struct trace_array *tr = inode->i_private;
4938         struct seq_file *m = file->private_data;
4939         struct trace_iterator *iter;
4940         int cpu;
4941
4942         if (!(file->f_mode & FMODE_READ)) {
4943                 trace_array_put(tr);
4944                 return 0;
4945         }
4946
4947         /* Writes do not use seq_file */
4948         iter = m->private;
4949         mutex_lock(&trace_types_lock);
4950
4951         for_each_tracing_cpu(cpu) {
4952                 if (iter->buffer_iter[cpu])
4953                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4954         }
4955
4956         if (iter->trace && iter->trace->close)
4957                 iter->trace->close(iter);
4958
4959         if (!iter->snapshot && tr->stop_count)
4960                 /* reenable tracing if it was previously enabled */
4961                 tracing_start_tr(tr);
4962
4963         __trace_array_put(tr);
4964
4965         mutex_unlock(&trace_types_lock);
4966
4967         mutex_destroy(&iter->mutex);
4968         free_cpumask_var(iter->started);
4969         kfree(iter->fmt);
4970         kfree(iter->temp);
4971         kfree(iter->trace);
4972         kfree(iter->buffer_iter);
4973         seq_release_private(inode, file);
4974
4975         return 0;
4976 }
4977
4978 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4979 {
4980         struct trace_array *tr = inode->i_private;
4981
4982         trace_array_put(tr);
4983         return 0;
4984 }
4985
4986 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4987 {
4988         struct trace_array *tr = inode->i_private;
4989
4990         trace_array_put(tr);
4991
4992         return single_release(inode, file);
4993 }
4994
4995 static int tracing_open(struct inode *inode, struct file *file)
4996 {
4997         struct trace_array *tr = inode->i_private;
4998         struct trace_iterator *iter;
4999         int ret;
5000
5001         ret = tracing_check_open_get_tr(tr);
5002         if (ret)
5003                 return ret;
5004
5005         /* If this file was open for write, then erase contents */
5006         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5007                 int cpu = tracing_get_cpu(inode);
5008                 struct array_buffer *trace_buf = &tr->array_buffer;
5009
5010 #ifdef CONFIG_TRACER_MAX_TRACE
5011                 if (tr->current_trace->print_max)
5012                         trace_buf = &tr->max_buffer;
5013 #endif
5014
5015                 if (cpu == RING_BUFFER_ALL_CPUS)
5016                         tracing_reset_online_cpus(trace_buf);
5017                 else
5018                         tracing_reset_cpu(trace_buf, cpu);
5019         }
5020
5021         if (file->f_mode & FMODE_READ) {
5022                 iter = __tracing_open(inode, file, false);
5023                 if (IS_ERR(iter))
5024                         ret = PTR_ERR(iter);
5025                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5026                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
5027         }
5028
5029         if (ret < 0)
5030                 trace_array_put(tr);
5031
5032         return ret;
5033 }
5034
5035 /*
5036  * Some tracers are not suitable for instance buffers.
5037  * A tracer is always available for the global array (toplevel)
5038  * or if it explicitly states that it is.
5039  */
5040 static bool
5041 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5042 {
5043         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5044 }
5045
5046 /* Find the next tracer that this trace array may use */
5047 static struct tracer *
5048 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5049 {
5050         while (t && !trace_ok_for_array(t, tr))
5051                 t = t->next;
5052
5053         return t;
5054 }
5055
5056 static void *
5057 t_next(struct seq_file *m, void *v, loff_t *pos)
5058 {
5059         struct trace_array *tr = m->private;
5060         struct tracer *t = v;
5061
5062         (*pos)++;
5063
5064         if (t)
5065                 t = get_tracer_for_array(tr, t->next);
5066
5067         return t;
5068 }
5069
5070 static void *t_start(struct seq_file *m, loff_t *pos)
5071 {
5072         struct trace_array *tr = m->private;
5073         struct tracer *t;
5074         loff_t l = 0;
5075
5076         mutex_lock(&trace_types_lock);
5077
5078         t = get_tracer_for_array(tr, trace_types);
5079         for (; t && l < *pos; t = t_next(m, t, &l))
5080                         ;
5081
5082         return t;
5083 }
5084
5085 static void t_stop(struct seq_file *m, void *p)
5086 {
5087         mutex_unlock(&trace_types_lock);
5088 }
5089
5090 static int t_show(struct seq_file *m, void *v)
5091 {
5092         struct tracer *t = v;
5093
5094         if (!t)
5095                 return 0;
5096
5097         seq_puts(m, t->name);
5098         if (t->next)
5099                 seq_putc(m, ' ');
5100         else
5101                 seq_putc(m, '\n');
5102
5103         return 0;
5104 }
5105
5106 static const struct seq_operations show_traces_seq_ops = {
5107         .start          = t_start,
5108         .next           = t_next,
5109         .stop           = t_stop,
5110         .show           = t_show,
5111 };
5112
5113 static int show_traces_open(struct inode *inode, struct file *file)
5114 {
5115         struct trace_array *tr = inode->i_private;
5116         struct seq_file *m;
5117         int ret;
5118
5119         ret = tracing_check_open_get_tr(tr);
5120         if (ret)
5121                 return ret;
5122
5123         ret = seq_open(file, &show_traces_seq_ops);
5124         if (ret) {
5125                 trace_array_put(tr);
5126                 return ret;
5127         }
5128
5129         m = file->private_data;
5130         m->private = tr;
5131
5132         return 0;
5133 }
5134
5135 static int show_traces_release(struct inode *inode, struct file *file)
5136 {
5137         struct trace_array *tr = inode->i_private;
5138
5139         trace_array_put(tr);
5140         return seq_release(inode, file);
5141 }
5142
5143 static ssize_t
5144 tracing_write_stub(struct file *filp, const char __user *ubuf,
5145                    size_t count, loff_t *ppos)
5146 {
5147         return count;
5148 }
5149
5150 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5151 {
5152         int ret;
5153
5154         if (file->f_mode & FMODE_READ)
5155                 ret = seq_lseek(file, offset, whence);
5156         else
5157                 file->f_pos = ret = 0;
5158
5159         return ret;
5160 }
5161
5162 static const struct file_operations tracing_fops = {
5163         .open           = tracing_open,
5164         .read           = seq_read,
5165         .read_iter      = seq_read_iter,
5166         .splice_read    = generic_file_splice_read,
5167         .write          = tracing_write_stub,
5168         .llseek         = tracing_lseek,
5169         .release        = tracing_release,
5170 };
5171
5172 static const struct file_operations show_traces_fops = {
5173         .open           = show_traces_open,
5174         .read           = seq_read,
5175         .llseek         = seq_lseek,
5176         .release        = show_traces_release,
5177 };
5178
5179 static ssize_t
5180 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5181                      size_t count, loff_t *ppos)
5182 {
5183         struct trace_array *tr = file_inode(filp)->i_private;
5184         char *mask_str;
5185         int len;
5186
5187         len = snprintf(NULL, 0, "%*pb\n",
5188                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5189         mask_str = kmalloc(len, GFP_KERNEL);
5190         if (!mask_str)
5191                 return -ENOMEM;
5192
5193         len = snprintf(mask_str, len, "%*pb\n",
5194                        cpumask_pr_args(tr->tracing_cpumask));
5195         if (len >= count) {
5196                 count = -EINVAL;
5197                 goto out_err;
5198         }
5199         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5200
5201 out_err:
5202         kfree(mask_str);
5203
5204         return count;
5205 }
5206
5207 int tracing_set_cpumask(struct trace_array *tr,
5208                         cpumask_var_t tracing_cpumask_new)
5209 {
5210         int cpu;
5211
5212         if (!tr)
5213                 return -EINVAL;
5214
5215         local_irq_disable();
5216         arch_spin_lock(&tr->max_lock);
5217         for_each_tracing_cpu(cpu) {
5218                 /*
5219                  * Increase/decrease the disabled counter if we are
5220                  * about to flip a bit in the cpumask:
5221                  */
5222                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5223                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5224                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5225                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5226 #ifdef CONFIG_TRACER_MAX_TRACE
5227                         ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5228 #endif
5229                 }
5230                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5231                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5232                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5233                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5234 #ifdef CONFIG_TRACER_MAX_TRACE
5235                         ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5236 #endif
5237                 }
5238         }
5239         arch_spin_unlock(&tr->max_lock);
5240         local_irq_enable();
5241
5242         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5243
5244         return 0;
5245 }
5246
5247 static ssize_t
5248 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5249                       size_t count, loff_t *ppos)
5250 {
5251         struct trace_array *tr = file_inode(filp)->i_private;
5252         cpumask_var_t tracing_cpumask_new;
5253         int err;
5254
5255         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5256                 return -ENOMEM;
5257
5258         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5259         if (err)
5260                 goto err_free;
5261
5262         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5263         if (err)
5264                 goto err_free;
5265
5266         free_cpumask_var(tracing_cpumask_new);
5267
5268         return count;
5269
5270 err_free:
5271         free_cpumask_var(tracing_cpumask_new);
5272
5273         return err;
5274 }
5275
5276 static const struct file_operations tracing_cpumask_fops = {
5277         .open           = tracing_open_generic_tr,
5278         .read           = tracing_cpumask_read,
5279         .write          = tracing_cpumask_write,
5280         .release        = tracing_release_generic_tr,
5281         .llseek         = generic_file_llseek,
5282 };
5283
5284 static int tracing_trace_options_show(struct seq_file *m, void *v)
5285 {
5286         struct tracer_opt *trace_opts;
5287         struct trace_array *tr = m->private;
5288         u32 tracer_flags;
5289         int i;
5290
5291         mutex_lock(&trace_types_lock);
5292         tracer_flags = tr->current_trace->flags->val;
5293         trace_opts = tr->current_trace->flags->opts;
5294
5295         for (i = 0; trace_options[i]; i++) {
5296                 if (tr->trace_flags & (1 << i))
5297                         seq_printf(m, "%s\n", trace_options[i]);
5298                 else
5299                         seq_printf(m, "no%s\n", trace_options[i]);
5300         }
5301
5302         for (i = 0; trace_opts[i].name; i++) {
5303                 if (tracer_flags & trace_opts[i].bit)
5304                         seq_printf(m, "%s\n", trace_opts[i].name);
5305                 else
5306                         seq_printf(m, "no%s\n", trace_opts[i].name);
5307         }
5308         mutex_unlock(&trace_types_lock);
5309
5310         return 0;
5311 }
5312
5313 static int __set_tracer_option(struct trace_array *tr,
5314                                struct tracer_flags *tracer_flags,
5315                                struct tracer_opt *opts, int neg)
5316 {
5317         struct tracer *trace = tracer_flags->trace;
5318         int ret;
5319
5320         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5321         if (ret)
5322                 return ret;
5323
5324         if (neg)
5325                 tracer_flags->val &= ~opts->bit;
5326         else
5327                 tracer_flags->val |= opts->bit;
5328         return 0;
5329 }
5330
5331 /* Try to assign a tracer specific option */
5332 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5333 {
5334         struct tracer *trace = tr->current_trace;
5335         struct tracer_flags *tracer_flags = trace->flags;
5336         struct tracer_opt *opts = NULL;
5337         int i;
5338
5339         for (i = 0; tracer_flags->opts[i].name; i++) {
5340                 opts = &tracer_flags->opts[i];
5341
5342                 if (strcmp(cmp, opts->name) == 0)
5343                         return __set_tracer_option(tr, trace->flags, opts, neg);
5344         }
5345
5346         return -EINVAL;
5347 }
5348
5349 /* Some tracers require overwrite to stay enabled */
5350 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5351 {
5352         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5353                 return -1;
5354
5355         return 0;
5356 }
5357
5358 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5359 {
5360         int *map;
5361
5362         if ((mask == TRACE_ITER_RECORD_TGID) ||
5363             (mask == TRACE_ITER_RECORD_CMD))
5364                 lockdep_assert_held(&event_mutex);
5365
5366         /* do nothing if flag is already set */
5367         if (!!(tr->trace_flags & mask) == !!enabled)
5368                 return 0;
5369
5370         /* Give the tracer a chance to approve the change */
5371         if (tr->current_trace->flag_changed)
5372                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5373                         return -EINVAL;
5374
5375         if (enabled)
5376                 tr->trace_flags |= mask;
5377         else
5378                 tr->trace_flags &= ~mask;
5379
5380         if (mask == TRACE_ITER_RECORD_CMD)
5381                 trace_event_enable_cmd_record(enabled);
5382
5383         if (mask == TRACE_ITER_RECORD_TGID) {
5384                 if (!tgid_map) {
5385                         tgid_map_max = pid_max;
5386                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5387                                        GFP_KERNEL);
5388
5389                         /*
5390                          * Pairs with smp_load_acquire() in
5391                          * trace_find_tgid_ptr() to ensure that if it observes
5392                          * the tgid_map we just allocated then it also observes
5393                          * the corresponding tgid_map_max value.
5394                          */
5395                         smp_store_release(&tgid_map, map);
5396                 }
5397                 if (!tgid_map) {
5398                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5399                         return -ENOMEM;
5400                 }
5401
5402                 trace_event_enable_tgid_record(enabled);
5403         }
5404
5405         if (mask == TRACE_ITER_EVENT_FORK)
5406                 trace_event_follow_fork(tr, enabled);
5407
5408         if (mask == TRACE_ITER_FUNC_FORK)
5409                 ftrace_pid_follow_fork(tr, enabled);
5410
5411         if (mask == TRACE_ITER_OVERWRITE) {
5412                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5413 #ifdef CONFIG_TRACER_MAX_TRACE
5414                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5415 #endif
5416         }
5417
5418         if (mask == TRACE_ITER_PRINTK) {
5419                 trace_printk_start_stop_comm(enabled);
5420                 trace_printk_control(enabled);
5421         }
5422
5423         return 0;
5424 }
5425
5426 int trace_set_options(struct trace_array *tr, char *option)
5427 {
5428         char *cmp;
5429         int neg = 0;
5430         int ret;
5431         size_t orig_len = strlen(option);
5432         int len;
5433
5434         cmp = strstrip(option);
5435
5436         len = str_has_prefix(cmp, "no");
5437         if (len)
5438                 neg = 1;
5439
5440         cmp += len;
5441
5442         mutex_lock(&event_mutex);
5443         mutex_lock(&trace_types_lock);
5444
5445         ret = match_string(trace_options, -1, cmp);
5446         /* If no option could be set, test the specific tracer options */
5447         if (ret < 0)
5448                 ret = set_tracer_option(tr, cmp, neg);
5449         else
5450                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5451
5452         mutex_unlock(&trace_types_lock);
5453         mutex_unlock(&event_mutex);
5454
5455         /*
5456          * If the first trailing whitespace is replaced with '\0' by strstrip,
5457          * turn it back into a space.
5458          */
5459         if (orig_len > strlen(option))
5460                 option[strlen(option)] = ' ';
5461
5462         return ret;
5463 }
5464
5465 static void __init apply_trace_boot_options(void)
5466 {
5467         char *buf = trace_boot_options_buf;
5468         char *option;
5469
5470         while (true) {
5471                 option = strsep(&buf, ",");
5472
5473                 if (!option)
5474                         break;
5475
5476                 if (*option)
5477                         trace_set_options(&global_trace, option);
5478
5479                 /* Put back the comma to allow this to be called again */
5480                 if (buf)
5481                         *(buf - 1) = ',';
5482         }
5483 }
5484
5485 static ssize_t
5486 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5487                         size_t cnt, loff_t *ppos)
5488 {
5489         struct seq_file *m = filp->private_data;
5490         struct trace_array *tr = m->private;
5491         char buf[64];
5492         int ret;
5493
5494         if (cnt >= sizeof(buf))
5495                 return -EINVAL;
5496
5497         if (copy_from_user(buf, ubuf, cnt))
5498                 return -EFAULT;
5499
5500         buf[cnt] = 0;
5501
5502         ret = trace_set_options(tr, buf);
5503         if (ret < 0)
5504                 return ret;
5505
5506         *ppos += cnt;
5507
5508         return cnt;
5509 }
5510
5511 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5512 {
5513         struct trace_array *tr = inode->i_private;
5514         int ret;
5515
5516         ret = tracing_check_open_get_tr(tr);
5517         if (ret)
5518                 return ret;
5519
5520         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5521         if (ret < 0)
5522                 trace_array_put(tr);
5523
5524         return ret;
5525 }
5526
5527 static const struct file_operations tracing_iter_fops = {
5528         .open           = tracing_trace_options_open,
5529         .read           = seq_read,
5530         .llseek         = seq_lseek,
5531         .release        = tracing_single_release_tr,
5532         .write          = tracing_trace_options_write,
5533 };
5534
5535 static const char readme_msg[] =
5536         "tracing mini-HOWTO:\n\n"
5537         "# echo 0 > tracing_on : quick way to disable tracing\n"
5538         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5539         " Important files:\n"
5540         "  trace\t\t\t- The static contents of the buffer\n"
5541         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5542         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5543         "  current_tracer\t- function and latency tracers\n"
5544         "  available_tracers\t- list of configured tracers for current_tracer\n"
5545         "  error_log\t- error log for failed commands (that support it)\n"
5546         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5547         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5548         "  trace_clock\t\t- change the clock used to order events\n"
5549         "       local:   Per cpu clock but may not be synced across CPUs\n"
5550         "      global:   Synced across CPUs but slows tracing down.\n"
5551         "     counter:   Not a clock, but just an increment\n"
5552         "      uptime:   Jiffy counter from time of boot\n"
5553         "        perf:   Same clock that perf events use\n"
5554 #ifdef CONFIG_X86_64
5555         "     x86-tsc:   TSC cycle counter\n"
5556 #endif
5557         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5558         "       delta:   Delta difference against a buffer-wide timestamp\n"
5559         "    absolute:   Absolute (standalone) timestamp\n"
5560         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5561         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5562         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5563         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5564         "\t\t\t  Remove sub-buffer with rmdir\n"
5565         "  trace_options\t\t- Set format or modify how tracing happens\n"
5566         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5567         "\t\t\t  option name\n"
5568         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5569 #ifdef CONFIG_DYNAMIC_FTRACE
5570         "\n  available_filter_functions - list of functions that can be filtered on\n"
5571         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5572         "\t\t\t  functions\n"
5573         "\t     accepts: func_full_name or glob-matching-pattern\n"
5574         "\t     modules: Can select a group via module\n"
5575         "\t      Format: :mod:<module-name>\n"
5576         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5577         "\t    triggers: a command to perform when function is hit\n"
5578         "\t      Format: <function>:<trigger>[:count]\n"
5579         "\t     trigger: traceon, traceoff\n"
5580         "\t\t      enable_event:<system>:<event>\n"
5581         "\t\t      disable_event:<system>:<event>\n"
5582 #ifdef CONFIG_STACKTRACE
5583         "\t\t      stacktrace\n"
5584 #endif
5585 #ifdef CONFIG_TRACER_SNAPSHOT
5586         "\t\t      snapshot\n"
5587 #endif
5588         "\t\t      dump\n"
5589         "\t\t      cpudump\n"
5590         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5591         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5592         "\t     The first one will disable tracing every time do_fault is hit\n"
5593         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5594         "\t       The first time do trap is hit and it disables tracing, the\n"
5595         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5596         "\t       the counter will not decrement. It only decrements when the\n"
5597         "\t       trigger did work\n"
5598         "\t     To remove trigger without count:\n"
5599         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5600         "\t     To remove trigger with a count:\n"
5601         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5602         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5603         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5604         "\t    modules: Can select a group via module command :mod:\n"
5605         "\t    Does not accept triggers\n"
5606 #endif /* CONFIG_DYNAMIC_FTRACE */
5607 #ifdef CONFIG_FUNCTION_TRACER
5608         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5609         "\t\t    (function)\n"
5610         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5611         "\t\t    (function)\n"
5612 #endif
5613 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5614         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5615         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5616         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5617 #endif
5618 #ifdef CONFIG_TRACER_SNAPSHOT
5619         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5620         "\t\t\t  snapshot buffer. Read the contents for more\n"
5621         "\t\t\t  information\n"
5622 #endif
5623 #ifdef CONFIG_STACK_TRACER
5624         "  stack_trace\t\t- Shows the max stack trace when active\n"
5625         "  stack_max_size\t- Shows current max stack size that was traced\n"
5626         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5627         "\t\t\t  new trace)\n"
5628 #ifdef CONFIG_DYNAMIC_FTRACE
5629         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5630         "\t\t\t  traces\n"
5631 #endif
5632 #endif /* CONFIG_STACK_TRACER */
5633 #ifdef CONFIG_DYNAMIC_EVENTS
5634         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5635         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5636 #endif
5637 #ifdef CONFIG_KPROBE_EVENTS
5638         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5639         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5640 #endif
5641 #ifdef CONFIG_UPROBE_EVENTS
5642         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5643         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5644 #endif
5645 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5646         "\t  accepts: event-definitions (one definition per line)\n"
5647         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5648         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5649 #ifdef CONFIG_HIST_TRIGGERS
5650         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5651 #endif
5652         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5653         "\t           -:[<group>/][<event>]\n"
5654 #ifdef CONFIG_KPROBE_EVENTS
5655         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5656   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5657 #endif
5658 #ifdef CONFIG_UPROBE_EVENTS
5659   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5660 #endif
5661         "\t     args: <name>=fetcharg[:type]\n"
5662         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5663 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5664         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5665 #else
5666         "\t           $stack<index>, $stack, $retval, $comm,\n"
5667 #endif
5668         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5669         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5670         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5671         "\t           symstr, <type>\\[<array-size>\\]\n"
5672 #ifdef CONFIG_HIST_TRIGGERS
5673         "\t    field: <stype> <name>;\n"
5674         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5675         "\t           [unsigned] char/int/long\n"
5676 #endif
5677         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5678         "\t            of the <attached-group>/<attached-event>.\n"
5679 #endif
5680         "  events/\t\t- Directory containing all trace event subsystems:\n"
5681         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5682         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5683         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5684         "\t\t\t  events\n"
5685         "      filter\t\t- If set, only events passing filter are traced\n"
5686         "  events/<system>/<event>/\t- Directory containing control files for\n"
5687         "\t\t\t  <event>:\n"
5688         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5689         "      filter\t\t- If set, only events passing filter are traced\n"
5690         "      trigger\t\t- If set, a command to perform when event is hit\n"
5691         "\t    Format: <trigger>[:count][if <filter>]\n"
5692         "\t   trigger: traceon, traceoff\n"
5693         "\t            enable_event:<system>:<event>\n"
5694         "\t            disable_event:<system>:<event>\n"
5695 #ifdef CONFIG_HIST_TRIGGERS
5696         "\t            enable_hist:<system>:<event>\n"
5697         "\t            disable_hist:<system>:<event>\n"
5698 #endif
5699 #ifdef CONFIG_STACKTRACE
5700         "\t\t    stacktrace\n"
5701 #endif
5702 #ifdef CONFIG_TRACER_SNAPSHOT
5703         "\t\t    snapshot\n"
5704 #endif
5705 #ifdef CONFIG_HIST_TRIGGERS
5706         "\t\t    hist (see below)\n"
5707 #endif
5708         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5709         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5710         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5711         "\t                  events/block/block_unplug/trigger\n"
5712         "\t   The first disables tracing every time block_unplug is hit.\n"
5713         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5714         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5715         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5716         "\t   Like function triggers, the counter is only decremented if it\n"
5717         "\t    enabled or disabled tracing.\n"
5718         "\t   To remove a trigger without a count:\n"
5719         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5720         "\t   To remove a trigger with a count:\n"
5721         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5722         "\t   Filters can be ignored when removing a trigger.\n"
5723 #ifdef CONFIG_HIST_TRIGGERS
5724         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5725         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5726         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5727         "\t            [:values=<field1[,field2,...]>]\n"
5728         "\t            [:sort=<field1[,field2,...]>]\n"
5729         "\t            [:size=#entries]\n"
5730         "\t            [:pause][:continue][:clear]\n"
5731         "\t            [:name=histname1]\n"
5732         "\t            [:<handler>.<action>]\n"
5733         "\t            [if <filter>]\n\n"
5734         "\t    Note, special fields can be used as well:\n"
5735         "\t            common_timestamp - to record current timestamp\n"
5736         "\t            common_cpu - to record the CPU the event happened on\n"
5737         "\n"
5738         "\t    A hist trigger variable can be:\n"
5739         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5740         "\t        - a reference to another variable e.g. y=$x,\n"
5741         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5742         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5743         "\n"
5744         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5745         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5746         "\t    variable reference, field or numeric literal.\n"
5747         "\n"
5748         "\t    When a matching event is hit, an entry is added to a hash\n"
5749         "\t    table using the key(s) and value(s) named, and the value of a\n"
5750         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5751         "\t    correspond to fields in the event's format description.  Keys\n"
5752         "\t    can be any field, or the special string 'stacktrace'.\n"
5753         "\t    Compound keys consisting of up to two fields can be specified\n"
5754         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5755         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5756         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5757         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5758         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5759         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5760         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5761         "\t    its histogram data will be shared with other triggers of the\n"
5762         "\t    same name, and trigger hits will update this common data.\n\n"
5763         "\t    Reading the 'hist' file for the event will dump the hash\n"
5764         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5765         "\t    triggers attached to an event, there will be a table for each\n"
5766         "\t    trigger in the output.  The table displayed for a named\n"
5767         "\t    trigger will be the same as any other instance having the\n"
5768         "\t    same name.  The default format used to display a given field\n"
5769         "\t    can be modified by appending any of the following modifiers\n"
5770         "\t    to the field name, as applicable:\n\n"
5771         "\t            .hex        display a number as a hex value\n"
5772         "\t            .sym        display an address as a symbol\n"
5773         "\t            .sym-offset display an address as a symbol and offset\n"
5774         "\t            .execname   display a common_pid as a program name\n"
5775         "\t            .syscall    display a syscall id as a syscall name\n"
5776         "\t            .log2       display log2 value rather than raw number\n"
5777         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5778         "\t            .usecs      display a common_timestamp in microseconds\n"
5779         "\t            .percent    display a number of percentage value\n"
5780         "\t            .graph      display a bar-graph of a value\n\n"
5781         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5782         "\t    trigger or to start a hist trigger but not log any events\n"
5783         "\t    until told to do so.  'continue' can be used to start or\n"
5784         "\t    restart a paused hist trigger.\n\n"
5785         "\t    The 'clear' parameter will clear the contents of a running\n"
5786         "\t    hist trigger and leave its current paused/active state\n"
5787         "\t    unchanged.\n\n"
5788         "\t    The enable_hist and disable_hist triggers can be used to\n"
5789         "\t    have one event conditionally start and stop another event's\n"
5790         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5791         "\t    the enable_event and disable_event triggers.\n\n"
5792         "\t    Hist trigger handlers and actions are executed whenever a\n"
5793         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5794         "\t        <handler>.<action>\n\n"
5795         "\t    The available handlers are:\n\n"
5796         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5797         "\t        onmax(var)               - invoke if var exceeds current max\n"
5798         "\t        onchange(var)            - invoke action if var changes\n\n"
5799         "\t    The available actions are:\n\n"
5800         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5801         "\t        save(field,...)                      - save current event fields\n"
5802 #ifdef CONFIG_TRACER_SNAPSHOT
5803         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5804 #endif
5805 #ifdef CONFIG_SYNTH_EVENTS
5806         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5807         "\t  Write into this file to define/undefine new synthetic events.\n"
5808         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5809 #endif
5810 #endif
5811 ;
5812
5813 static ssize_t
5814 tracing_readme_read(struct file *filp, char __user *ubuf,
5815                        size_t cnt, loff_t *ppos)
5816 {
5817         return simple_read_from_buffer(ubuf, cnt, ppos,
5818                                         readme_msg, strlen(readme_msg));
5819 }
5820
5821 static const struct file_operations tracing_readme_fops = {
5822         .open           = tracing_open_generic,
5823         .read           = tracing_readme_read,
5824         .llseek         = generic_file_llseek,
5825 };
5826
5827 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5828 {
5829         int pid = ++(*pos);
5830
5831         return trace_find_tgid_ptr(pid);
5832 }
5833
5834 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5835 {
5836         int pid = *pos;
5837
5838         return trace_find_tgid_ptr(pid);
5839 }
5840
5841 static void saved_tgids_stop(struct seq_file *m, void *v)
5842 {
5843 }
5844
5845 static int saved_tgids_show(struct seq_file *m, void *v)
5846 {
5847         int *entry = (int *)v;
5848         int pid = entry - tgid_map;
5849         int tgid = *entry;
5850
5851         if (tgid == 0)
5852                 return SEQ_SKIP;
5853
5854         seq_printf(m, "%d %d\n", pid, tgid);
5855         return 0;
5856 }
5857
5858 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5859         .start          = saved_tgids_start,
5860         .stop           = saved_tgids_stop,
5861         .next           = saved_tgids_next,
5862         .show           = saved_tgids_show,
5863 };
5864
5865 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5866 {
5867         int ret;
5868
5869         ret = tracing_check_open_get_tr(NULL);
5870         if (ret)
5871                 return ret;
5872
5873         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5874 }
5875
5876
5877 static const struct file_operations tracing_saved_tgids_fops = {
5878         .open           = tracing_saved_tgids_open,
5879         .read           = seq_read,
5880         .llseek         = seq_lseek,
5881         .release        = seq_release,
5882 };
5883
5884 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5885 {
5886         unsigned int *ptr = v;
5887
5888         if (*pos || m->count)
5889                 ptr++;
5890
5891         (*pos)++;
5892
5893         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5894              ptr++) {
5895                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5896                         continue;
5897
5898                 return ptr;
5899         }
5900
5901         return NULL;
5902 }
5903
5904 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5905 {
5906         void *v;
5907         loff_t l = 0;
5908
5909         preempt_disable();
5910         arch_spin_lock(&trace_cmdline_lock);
5911
5912         v = &savedcmd->map_cmdline_to_pid[0];
5913         while (l <= *pos) {
5914                 v = saved_cmdlines_next(m, v, &l);
5915                 if (!v)
5916                         return NULL;
5917         }
5918
5919         return v;
5920 }
5921
5922 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5923 {
5924         arch_spin_unlock(&trace_cmdline_lock);
5925         preempt_enable();
5926 }
5927
5928 static int saved_cmdlines_show(struct seq_file *m, void *v)
5929 {
5930         char buf[TASK_COMM_LEN];
5931         unsigned int *pid = v;
5932
5933         __trace_find_cmdline(*pid, buf);
5934         seq_printf(m, "%d %s\n", *pid, buf);
5935         return 0;
5936 }
5937
5938 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5939         .start          = saved_cmdlines_start,
5940         .next           = saved_cmdlines_next,
5941         .stop           = saved_cmdlines_stop,
5942         .show           = saved_cmdlines_show,
5943 };
5944
5945 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5946 {
5947         int ret;
5948
5949         ret = tracing_check_open_get_tr(NULL);
5950         if (ret)
5951                 return ret;
5952
5953         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5954 }
5955
5956 static const struct file_operations tracing_saved_cmdlines_fops = {
5957         .open           = tracing_saved_cmdlines_open,
5958         .read           = seq_read,
5959         .llseek         = seq_lseek,
5960         .release        = seq_release,
5961 };
5962
5963 static ssize_t
5964 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5965                                  size_t cnt, loff_t *ppos)
5966 {
5967         char buf[64];
5968         int r;
5969
5970         preempt_disable();
5971         arch_spin_lock(&trace_cmdline_lock);
5972         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5973         arch_spin_unlock(&trace_cmdline_lock);
5974         preempt_enable();
5975
5976         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5977 }
5978
5979 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5980 {
5981         kfree(s->saved_cmdlines);
5982         kfree(s->map_cmdline_to_pid);
5983         kfree(s);
5984 }
5985
5986 static int tracing_resize_saved_cmdlines(unsigned int val)
5987 {
5988         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5989
5990         s = kmalloc(sizeof(*s), GFP_KERNEL);
5991         if (!s)
5992                 return -ENOMEM;
5993
5994         if (allocate_cmdlines_buffer(val, s) < 0) {
5995                 kfree(s);
5996                 return -ENOMEM;
5997         }
5998
5999         preempt_disable();
6000         arch_spin_lock(&trace_cmdline_lock);
6001         savedcmd_temp = savedcmd;
6002         savedcmd = s;
6003         arch_spin_unlock(&trace_cmdline_lock);
6004         preempt_enable();
6005         free_saved_cmdlines_buffer(savedcmd_temp);
6006
6007         return 0;
6008 }
6009
6010 static ssize_t
6011 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6012                                   size_t cnt, loff_t *ppos)
6013 {
6014         unsigned long val;
6015         int ret;
6016
6017         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6018         if (ret)
6019                 return ret;
6020
6021         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
6022         if (!val || val > PID_MAX_DEFAULT)
6023                 return -EINVAL;
6024
6025         ret = tracing_resize_saved_cmdlines((unsigned int)val);
6026         if (ret < 0)
6027                 return ret;
6028
6029         *ppos += cnt;
6030
6031         return cnt;
6032 }
6033
6034 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6035         .open           = tracing_open_generic,
6036         .read           = tracing_saved_cmdlines_size_read,
6037         .write          = tracing_saved_cmdlines_size_write,
6038 };
6039
6040 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6041 static union trace_eval_map_item *
6042 update_eval_map(union trace_eval_map_item *ptr)
6043 {
6044         if (!ptr->map.eval_string) {
6045                 if (ptr->tail.next) {
6046                         ptr = ptr->tail.next;
6047                         /* Set ptr to the next real item (skip head) */
6048                         ptr++;
6049                 } else
6050                         return NULL;
6051         }
6052         return ptr;
6053 }
6054
6055 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6056 {
6057         union trace_eval_map_item *ptr = v;
6058
6059         /*
6060          * Paranoid! If ptr points to end, we don't want to increment past it.
6061          * This really should never happen.
6062          */
6063         (*pos)++;
6064         ptr = update_eval_map(ptr);
6065         if (WARN_ON_ONCE(!ptr))
6066                 return NULL;
6067
6068         ptr++;
6069         ptr = update_eval_map(ptr);
6070
6071         return ptr;
6072 }
6073
6074 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6075 {
6076         union trace_eval_map_item *v;
6077         loff_t l = 0;
6078
6079         mutex_lock(&trace_eval_mutex);
6080
6081         v = trace_eval_maps;
6082         if (v)
6083                 v++;
6084
6085         while (v && l < *pos) {
6086                 v = eval_map_next(m, v, &l);
6087         }
6088
6089         return v;
6090 }
6091
6092 static void eval_map_stop(struct seq_file *m, void *v)
6093 {
6094         mutex_unlock(&trace_eval_mutex);
6095 }
6096
6097 static int eval_map_show(struct seq_file *m, void *v)
6098 {
6099         union trace_eval_map_item *ptr = v;
6100
6101         seq_printf(m, "%s %ld (%s)\n",
6102                    ptr->map.eval_string, ptr->map.eval_value,
6103                    ptr->map.system);
6104
6105         return 0;
6106 }
6107
6108 static const struct seq_operations tracing_eval_map_seq_ops = {
6109         .start          = eval_map_start,
6110         .next           = eval_map_next,
6111         .stop           = eval_map_stop,
6112         .show           = eval_map_show,
6113 };
6114
6115 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6116 {
6117         int ret;
6118
6119         ret = tracing_check_open_get_tr(NULL);
6120         if (ret)
6121                 return ret;
6122
6123         return seq_open(filp, &tracing_eval_map_seq_ops);
6124 }
6125
6126 static const struct file_operations tracing_eval_map_fops = {
6127         .open           = tracing_eval_map_open,
6128         .read           = seq_read,
6129         .llseek         = seq_lseek,
6130         .release        = seq_release,
6131 };
6132
6133 static inline union trace_eval_map_item *
6134 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6135 {
6136         /* Return tail of array given the head */
6137         return ptr + ptr->head.length + 1;
6138 }
6139
6140 static void
6141 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6142                            int len)
6143 {
6144         struct trace_eval_map **stop;
6145         struct trace_eval_map **map;
6146         union trace_eval_map_item *map_array;
6147         union trace_eval_map_item *ptr;
6148
6149         stop = start + len;
6150
6151         /*
6152          * The trace_eval_maps contains the map plus a head and tail item,
6153          * where the head holds the module and length of array, and the
6154          * tail holds a pointer to the next list.
6155          */
6156         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6157         if (!map_array) {
6158                 pr_warn("Unable to allocate trace eval mapping\n");
6159                 return;
6160         }
6161
6162         mutex_lock(&trace_eval_mutex);
6163
6164         if (!trace_eval_maps)
6165                 trace_eval_maps = map_array;
6166         else {
6167                 ptr = trace_eval_maps;
6168                 for (;;) {
6169                         ptr = trace_eval_jmp_to_tail(ptr);
6170                         if (!ptr->tail.next)
6171                                 break;
6172                         ptr = ptr->tail.next;
6173
6174                 }
6175                 ptr->tail.next = map_array;
6176         }
6177         map_array->head.mod = mod;
6178         map_array->head.length = len;
6179         map_array++;
6180
6181         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6182                 map_array->map = **map;
6183                 map_array++;
6184         }
6185         memset(map_array, 0, sizeof(*map_array));
6186
6187         mutex_unlock(&trace_eval_mutex);
6188 }
6189
6190 static void trace_create_eval_file(struct dentry *d_tracer)
6191 {
6192         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6193                           NULL, &tracing_eval_map_fops);
6194 }
6195
6196 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6197 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6198 static inline void trace_insert_eval_map_file(struct module *mod,
6199                               struct trace_eval_map **start, int len) { }
6200 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6201
6202 static void trace_insert_eval_map(struct module *mod,
6203                                   struct trace_eval_map **start, int len)
6204 {
6205         struct trace_eval_map **map;
6206
6207         if (len <= 0)
6208                 return;
6209
6210         map = start;
6211
6212         trace_event_eval_update(map, len);
6213
6214         trace_insert_eval_map_file(mod, start, len);
6215 }
6216
6217 static ssize_t
6218 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6219                        size_t cnt, loff_t *ppos)
6220 {
6221         struct trace_array *tr = filp->private_data;
6222         char buf[MAX_TRACER_SIZE+2];
6223         int r;
6224
6225         mutex_lock(&trace_types_lock);
6226         r = sprintf(buf, "%s\n", tr->current_trace->name);
6227         mutex_unlock(&trace_types_lock);
6228
6229         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6230 }
6231
6232 int tracer_init(struct tracer *t, struct trace_array *tr)
6233 {
6234         tracing_reset_online_cpus(&tr->array_buffer);
6235         return t->init(tr);
6236 }
6237
6238 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6239 {
6240         int cpu;
6241
6242         for_each_tracing_cpu(cpu)
6243                 per_cpu_ptr(buf->data, cpu)->entries = val;
6244 }
6245
6246 #ifdef CONFIG_TRACER_MAX_TRACE
6247 /* resize @tr's buffer to the size of @size_tr's entries */
6248 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6249                                         struct array_buffer *size_buf, int cpu_id)
6250 {
6251         int cpu, ret = 0;
6252
6253         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6254                 for_each_tracing_cpu(cpu) {
6255                         ret = ring_buffer_resize(trace_buf->buffer,
6256                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6257                         if (ret < 0)
6258                                 break;
6259                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6260                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6261                 }
6262         } else {
6263                 ret = ring_buffer_resize(trace_buf->buffer,
6264                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6265                 if (ret == 0)
6266                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6267                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6268         }
6269
6270         return ret;
6271 }
6272 #endif /* CONFIG_TRACER_MAX_TRACE */
6273
6274 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6275                                         unsigned long size, int cpu)
6276 {
6277         int ret;
6278
6279         /*
6280          * If kernel or user changes the size of the ring buffer
6281          * we use the size that was given, and we can forget about
6282          * expanding it later.
6283          */
6284         ring_buffer_expanded = true;
6285
6286         /* May be called before buffers are initialized */
6287         if (!tr->array_buffer.buffer)
6288                 return 0;
6289
6290         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6291         if (ret < 0)
6292                 return ret;
6293
6294 #ifdef CONFIG_TRACER_MAX_TRACE
6295         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6296             !tr->current_trace->use_max_tr)
6297                 goto out;
6298
6299         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6300         if (ret < 0) {
6301                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6302                                                      &tr->array_buffer, cpu);
6303                 if (r < 0) {
6304                         /*
6305                          * AARGH! We are left with different
6306                          * size max buffer!!!!
6307                          * The max buffer is our "snapshot" buffer.
6308                          * When a tracer needs a snapshot (one of the
6309                          * latency tracers), it swaps the max buffer
6310                          * with the saved snap shot. We succeeded to
6311                          * update the size of the main buffer, but failed to
6312                          * update the size of the max buffer. But when we tried
6313                          * to reset the main buffer to the original size, we
6314                          * failed there too. This is very unlikely to
6315                          * happen, but if it does, warn and kill all
6316                          * tracing.
6317                          */
6318                         WARN_ON(1);
6319                         tracing_disabled = 1;
6320                 }
6321                 return ret;
6322         }
6323
6324         if (cpu == RING_BUFFER_ALL_CPUS)
6325                 set_buffer_entries(&tr->max_buffer, size);
6326         else
6327                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6328
6329  out:
6330 #endif /* CONFIG_TRACER_MAX_TRACE */
6331
6332         if (cpu == RING_BUFFER_ALL_CPUS)
6333                 set_buffer_entries(&tr->array_buffer, size);
6334         else
6335                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6336
6337         return ret;
6338 }
6339
6340 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6341                                   unsigned long size, int cpu_id)
6342 {
6343         int ret;
6344
6345         mutex_lock(&trace_types_lock);
6346
6347         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6348                 /* make sure, this cpu is enabled in the mask */
6349                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6350                         ret = -EINVAL;
6351                         goto out;
6352                 }
6353         }
6354
6355         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6356         if (ret < 0)
6357                 ret = -ENOMEM;
6358
6359 out:
6360         mutex_unlock(&trace_types_lock);
6361
6362         return ret;
6363 }
6364
6365
6366 /**
6367  * tracing_update_buffers - used by tracing facility to expand ring buffers
6368  *
6369  * To save on memory when the tracing is never used on a system with it
6370  * configured in. The ring buffers are set to a minimum size. But once
6371  * a user starts to use the tracing facility, then they need to grow
6372  * to their default size.
6373  *
6374  * This function is to be called when a tracer is about to be used.
6375  */
6376 int tracing_update_buffers(void)
6377 {
6378         int ret = 0;
6379
6380         mutex_lock(&trace_types_lock);
6381         if (!ring_buffer_expanded)
6382                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6383                                                 RING_BUFFER_ALL_CPUS);
6384         mutex_unlock(&trace_types_lock);
6385
6386         return ret;
6387 }
6388
6389 struct trace_option_dentry;
6390
6391 static void
6392 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6393
6394 /*
6395  * Used to clear out the tracer before deletion of an instance.
6396  * Must have trace_types_lock held.
6397  */
6398 static void tracing_set_nop(struct trace_array *tr)
6399 {
6400         if (tr->current_trace == &nop_trace)
6401                 return;
6402         
6403         tr->current_trace->enabled--;
6404
6405         if (tr->current_trace->reset)
6406                 tr->current_trace->reset(tr);
6407
6408         tr->current_trace = &nop_trace;
6409 }
6410
6411 static bool tracer_options_updated;
6412
6413 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6414 {
6415         /* Only enable if the directory has been created already. */
6416         if (!tr->dir)
6417                 return;
6418
6419         /* Only create trace option files after update_tracer_options finish */
6420         if (!tracer_options_updated)
6421                 return;
6422
6423         create_trace_option_files(tr, t);
6424 }
6425
6426 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6427 {
6428         struct tracer *t;
6429 #ifdef CONFIG_TRACER_MAX_TRACE
6430         bool had_max_tr;
6431 #endif
6432         int ret = 0;
6433
6434         mutex_lock(&trace_types_lock);
6435
6436         if (!ring_buffer_expanded) {
6437                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6438                                                 RING_BUFFER_ALL_CPUS);
6439                 if (ret < 0)
6440                         goto out;
6441                 ret = 0;
6442         }
6443
6444         for (t = trace_types; t; t = t->next) {
6445                 if (strcmp(t->name, buf) == 0)
6446                         break;
6447         }
6448         if (!t) {
6449                 ret = -EINVAL;
6450                 goto out;
6451         }
6452         if (t == tr->current_trace)
6453                 goto out;
6454
6455 #ifdef CONFIG_TRACER_SNAPSHOT
6456         if (t->use_max_tr) {
6457                 local_irq_disable();
6458                 arch_spin_lock(&tr->max_lock);
6459                 if (tr->cond_snapshot)
6460                         ret = -EBUSY;
6461                 arch_spin_unlock(&tr->max_lock);
6462                 local_irq_enable();
6463                 if (ret)
6464                         goto out;
6465         }
6466 #endif
6467         /* Some tracers won't work on kernel command line */
6468         if (system_state < SYSTEM_RUNNING && t->noboot) {
6469                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6470                         t->name);
6471                 goto out;
6472         }
6473
6474         /* Some tracers are only allowed for the top level buffer */
6475         if (!trace_ok_for_array(t, tr)) {
6476                 ret = -EINVAL;
6477                 goto out;
6478         }
6479
6480         /* If trace pipe files are being read, we can't change the tracer */
6481         if (tr->trace_ref) {
6482                 ret = -EBUSY;
6483                 goto out;
6484         }
6485
6486         trace_branch_disable();
6487
6488         tr->current_trace->enabled--;
6489
6490         if (tr->current_trace->reset)
6491                 tr->current_trace->reset(tr);
6492
6493 #ifdef CONFIG_TRACER_MAX_TRACE
6494         had_max_tr = tr->current_trace->use_max_tr;
6495
6496         /* Current trace needs to be nop_trace before synchronize_rcu */
6497         tr->current_trace = &nop_trace;
6498
6499         if (had_max_tr && !t->use_max_tr) {
6500                 /*
6501                  * We need to make sure that the update_max_tr sees that
6502                  * current_trace changed to nop_trace to keep it from
6503                  * swapping the buffers after we resize it.
6504                  * The update_max_tr is called from interrupts disabled
6505                  * so a synchronized_sched() is sufficient.
6506                  */
6507                 synchronize_rcu();
6508                 free_snapshot(tr);
6509         }
6510
6511         if (t->use_max_tr && !tr->allocated_snapshot) {
6512                 ret = tracing_alloc_snapshot_instance(tr);
6513                 if (ret < 0)
6514                         goto out;
6515         }
6516 #else
6517         tr->current_trace = &nop_trace;
6518 #endif
6519
6520         if (t->init) {
6521                 ret = tracer_init(t, tr);
6522                 if (ret)
6523                         goto out;
6524         }
6525
6526         tr->current_trace = t;
6527         tr->current_trace->enabled++;
6528         trace_branch_enable(tr);
6529  out:
6530         mutex_unlock(&trace_types_lock);
6531
6532         return ret;
6533 }
6534
6535 static ssize_t
6536 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6537                         size_t cnt, loff_t *ppos)
6538 {
6539         struct trace_array *tr = filp->private_data;
6540         char buf[MAX_TRACER_SIZE+1];
6541         char *name;
6542         size_t ret;
6543         int err;
6544
6545         ret = cnt;
6546
6547         if (cnt > MAX_TRACER_SIZE)
6548                 cnt = MAX_TRACER_SIZE;
6549
6550         if (copy_from_user(buf, ubuf, cnt))
6551                 return -EFAULT;
6552
6553         buf[cnt] = 0;
6554
6555         name = strim(buf);
6556
6557         err = tracing_set_tracer(tr, name);
6558         if (err)
6559                 return err;
6560
6561         *ppos += ret;
6562
6563         return ret;
6564 }
6565
6566 static ssize_t
6567 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6568                    size_t cnt, loff_t *ppos)
6569 {
6570         char buf[64];
6571         int r;
6572
6573         r = snprintf(buf, sizeof(buf), "%ld\n",
6574                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6575         if (r > sizeof(buf))
6576                 r = sizeof(buf);
6577         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6578 }
6579
6580 static ssize_t
6581 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6582                     size_t cnt, loff_t *ppos)
6583 {
6584         unsigned long val;
6585         int ret;
6586
6587         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6588         if (ret)
6589                 return ret;
6590
6591         *ptr = val * 1000;
6592
6593         return cnt;
6594 }
6595
6596 static ssize_t
6597 tracing_thresh_read(struct file *filp, char __user *ubuf,
6598                     size_t cnt, loff_t *ppos)
6599 {
6600         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6601 }
6602
6603 static ssize_t
6604 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6605                      size_t cnt, loff_t *ppos)
6606 {
6607         struct trace_array *tr = filp->private_data;
6608         int ret;
6609
6610         mutex_lock(&trace_types_lock);
6611         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6612         if (ret < 0)
6613                 goto out;
6614
6615         if (tr->current_trace->update_thresh) {
6616                 ret = tr->current_trace->update_thresh(tr);
6617                 if (ret < 0)
6618                         goto out;
6619         }
6620
6621         ret = cnt;
6622 out:
6623         mutex_unlock(&trace_types_lock);
6624
6625         return ret;
6626 }
6627
6628 #ifdef CONFIG_TRACER_MAX_TRACE
6629
6630 static ssize_t
6631 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6632                      size_t cnt, loff_t *ppos)
6633 {
6634         struct trace_array *tr = filp->private_data;
6635
6636         return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6637 }
6638
6639 static ssize_t
6640 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6641                       size_t cnt, loff_t *ppos)
6642 {
6643         struct trace_array *tr = filp->private_data;
6644
6645         return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6646 }
6647
6648 #endif
6649
6650 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6651 {
6652         if (cpu == RING_BUFFER_ALL_CPUS) {
6653                 if (cpumask_empty(tr->pipe_cpumask)) {
6654                         cpumask_setall(tr->pipe_cpumask);
6655                         return 0;
6656                 }
6657         } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6658                 cpumask_set_cpu(cpu, tr->pipe_cpumask);
6659                 return 0;
6660         }
6661         return -EBUSY;
6662 }
6663
6664 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6665 {
6666         if (cpu == RING_BUFFER_ALL_CPUS) {
6667                 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6668                 cpumask_clear(tr->pipe_cpumask);
6669         } else {
6670                 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6671                 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6672         }
6673 }
6674
6675 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6676 {
6677         struct trace_array *tr = inode->i_private;
6678         struct trace_iterator *iter;
6679         int cpu;
6680         int ret;
6681
6682         ret = tracing_check_open_get_tr(tr);
6683         if (ret)
6684                 return ret;
6685
6686         mutex_lock(&trace_types_lock);
6687         cpu = tracing_get_cpu(inode);
6688         ret = open_pipe_on_cpu(tr, cpu);
6689         if (ret)
6690                 goto fail_pipe_on_cpu;
6691
6692         /* create a buffer to store the information to pass to userspace */
6693         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6694         if (!iter) {
6695                 ret = -ENOMEM;
6696                 goto fail_alloc_iter;
6697         }
6698
6699         trace_seq_init(&iter->seq);
6700         iter->trace = tr->current_trace;
6701
6702         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6703                 ret = -ENOMEM;
6704                 goto fail;
6705         }
6706
6707         /* trace pipe does not show start of buffer */
6708         cpumask_setall(iter->started);
6709
6710         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6711                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6712
6713         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6714         if (trace_clocks[tr->clock_id].in_ns)
6715                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6716
6717         iter->tr = tr;
6718         iter->array_buffer = &tr->array_buffer;
6719         iter->cpu_file = cpu;
6720         mutex_init(&iter->mutex);
6721         filp->private_data = iter;
6722
6723         if (iter->trace->pipe_open)
6724                 iter->trace->pipe_open(iter);
6725
6726         nonseekable_open(inode, filp);
6727
6728         tr->trace_ref++;
6729
6730         mutex_unlock(&trace_types_lock);
6731         return ret;
6732
6733 fail:
6734         kfree(iter);
6735 fail_alloc_iter:
6736         close_pipe_on_cpu(tr, cpu);
6737 fail_pipe_on_cpu:
6738         __trace_array_put(tr);
6739         mutex_unlock(&trace_types_lock);
6740         return ret;
6741 }
6742
6743 static int tracing_release_pipe(struct inode *inode, struct file *file)
6744 {
6745         struct trace_iterator *iter = file->private_data;
6746         struct trace_array *tr = inode->i_private;
6747
6748         mutex_lock(&trace_types_lock);
6749
6750         tr->trace_ref--;
6751
6752         if (iter->trace->pipe_close)
6753                 iter->trace->pipe_close(iter);
6754         close_pipe_on_cpu(tr, iter->cpu_file);
6755         mutex_unlock(&trace_types_lock);
6756
6757         free_cpumask_var(iter->started);
6758         kfree(iter->fmt);
6759         kfree(iter->temp);
6760         mutex_destroy(&iter->mutex);
6761         kfree(iter);
6762
6763         trace_array_put(tr);
6764
6765         return 0;
6766 }
6767
6768 static __poll_t
6769 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6770 {
6771         struct trace_array *tr = iter->tr;
6772
6773         /* Iterators are static, they should be filled or empty */
6774         if (trace_buffer_iter(iter, iter->cpu_file))
6775                 return EPOLLIN | EPOLLRDNORM;
6776
6777         if (tr->trace_flags & TRACE_ITER_BLOCK)
6778                 /*
6779                  * Always select as readable when in blocking mode
6780                  */
6781                 return EPOLLIN | EPOLLRDNORM;
6782         else
6783                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6784                                              filp, poll_table, iter->tr->buffer_percent);
6785 }
6786
6787 static __poll_t
6788 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6789 {
6790         struct trace_iterator *iter = filp->private_data;
6791
6792         return trace_poll(iter, filp, poll_table);
6793 }
6794
6795 /* Must be called with iter->mutex held. */
6796 static int tracing_wait_pipe(struct file *filp)
6797 {
6798         struct trace_iterator *iter = filp->private_data;
6799         int ret;
6800
6801         while (trace_empty(iter)) {
6802
6803                 if ((filp->f_flags & O_NONBLOCK)) {
6804                         return -EAGAIN;
6805                 }
6806
6807                 /*
6808                  * We block until we read something and tracing is disabled.
6809                  * We still block if tracing is disabled, but we have never
6810                  * read anything. This allows a user to cat this file, and
6811                  * then enable tracing. But after we have read something,
6812                  * we give an EOF when tracing is again disabled.
6813                  *
6814                  * iter->pos will be 0 if we haven't read anything.
6815                  */
6816                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6817                         break;
6818
6819                 mutex_unlock(&iter->mutex);
6820
6821                 ret = wait_on_pipe(iter, 0);
6822
6823                 mutex_lock(&iter->mutex);
6824
6825                 if (ret)
6826                         return ret;
6827         }
6828
6829         return 1;
6830 }
6831
6832 /*
6833  * Consumer reader.
6834  */
6835 static ssize_t
6836 tracing_read_pipe(struct file *filp, char __user *ubuf,
6837                   size_t cnt, loff_t *ppos)
6838 {
6839         struct trace_iterator *iter = filp->private_data;
6840         ssize_t sret;
6841
6842         /*
6843          * Avoid more than one consumer on a single file descriptor
6844          * This is just a matter of traces coherency, the ring buffer itself
6845          * is protected.
6846          */
6847         mutex_lock(&iter->mutex);
6848
6849         /* return any leftover data */
6850         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6851         if (sret != -EBUSY)
6852                 goto out;
6853
6854         trace_seq_init(&iter->seq);
6855
6856         if (iter->trace->read) {
6857                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6858                 if (sret)
6859                         goto out;
6860         }
6861
6862 waitagain:
6863         sret = tracing_wait_pipe(filp);
6864         if (sret <= 0)
6865                 goto out;
6866
6867         /* stop when tracing is finished */
6868         if (trace_empty(iter)) {
6869                 sret = 0;
6870                 goto out;
6871         }
6872
6873         if (cnt >= PAGE_SIZE)
6874                 cnt = PAGE_SIZE - 1;
6875
6876         /* reset all but tr, trace, and overruns */
6877         trace_iterator_reset(iter);
6878         cpumask_clear(iter->started);
6879         trace_seq_init(&iter->seq);
6880
6881         trace_event_read_lock();
6882         trace_access_lock(iter->cpu_file);
6883         while (trace_find_next_entry_inc(iter) != NULL) {
6884                 enum print_line_t ret;
6885                 int save_len = iter->seq.seq.len;
6886
6887                 ret = print_trace_line(iter);
6888                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6889                         /*
6890                          * If one print_trace_line() fills entire trace_seq in one shot,
6891                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6892                          * In this case, we need to consume it, otherwise, loop will peek
6893                          * this event next time, resulting in an infinite loop.
6894                          */
6895                         if (save_len == 0) {
6896                                 iter->seq.full = 0;
6897                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6898                                 trace_consume(iter);
6899                                 break;
6900                         }
6901
6902                         /* In other cases, don't print partial lines */
6903                         iter->seq.seq.len = save_len;
6904                         break;
6905                 }
6906                 if (ret != TRACE_TYPE_NO_CONSUME)
6907                         trace_consume(iter);
6908
6909                 if (trace_seq_used(&iter->seq) >= cnt)
6910                         break;
6911
6912                 /*
6913                  * Setting the full flag means we reached the trace_seq buffer
6914                  * size and we should leave by partial output condition above.
6915                  * One of the trace_seq_* functions is not used properly.
6916                  */
6917                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6918                           iter->ent->type);
6919         }
6920         trace_access_unlock(iter->cpu_file);
6921         trace_event_read_unlock();
6922
6923         /* Now copy what we have to the user */
6924         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6925         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6926                 trace_seq_init(&iter->seq);
6927
6928         /*
6929          * If there was nothing to send to user, in spite of consuming trace
6930          * entries, go back to wait for more entries.
6931          */
6932         if (sret == -EBUSY)
6933                 goto waitagain;
6934
6935 out:
6936         mutex_unlock(&iter->mutex);
6937
6938         return sret;
6939 }
6940
6941 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6942                                      unsigned int idx)
6943 {
6944         __free_page(spd->pages[idx]);
6945 }
6946
6947 static size_t
6948 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6949 {
6950         size_t count;
6951         int save_len;
6952         int ret;
6953
6954         /* Seq buffer is page-sized, exactly what we need. */
6955         for (;;) {
6956                 save_len = iter->seq.seq.len;
6957                 ret = print_trace_line(iter);
6958
6959                 if (trace_seq_has_overflowed(&iter->seq)) {
6960                         iter->seq.seq.len = save_len;
6961                         break;
6962                 }
6963
6964                 /*
6965                  * This should not be hit, because it should only
6966                  * be set if the iter->seq overflowed. But check it
6967                  * anyway to be safe.
6968                  */
6969                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6970                         iter->seq.seq.len = save_len;
6971                         break;
6972                 }
6973
6974                 count = trace_seq_used(&iter->seq) - save_len;
6975                 if (rem < count) {
6976                         rem = 0;
6977                         iter->seq.seq.len = save_len;
6978                         break;
6979                 }
6980
6981                 if (ret != TRACE_TYPE_NO_CONSUME)
6982                         trace_consume(iter);
6983                 rem -= count;
6984                 if (!trace_find_next_entry_inc(iter))   {
6985                         rem = 0;
6986                         iter->ent = NULL;
6987                         break;
6988                 }
6989         }
6990
6991         return rem;
6992 }
6993
6994 static ssize_t tracing_splice_read_pipe(struct file *filp,
6995                                         loff_t *ppos,
6996                                         struct pipe_inode_info *pipe,
6997                                         size_t len,
6998                                         unsigned int flags)
6999 {
7000         struct page *pages_def[PIPE_DEF_BUFFERS];
7001         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7002         struct trace_iterator *iter = filp->private_data;
7003         struct splice_pipe_desc spd = {
7004                 .pages          = pages_def,
7005                 .partial        = partial_def,
7006                 .nr_pages       = 0, /* This gets updated below. */
7007                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7008                 .ops            = &default_pipe_buf_ops,
7009                 .spd_release    = tracing_spd_release_pipe,
7010         };
7011         ssize_t ret;
7012         size_t rem;
7013         unsigned int i;
7014
7015         if (splice_grow_spd(pipe, &spd))
7016                 return -ENOMEM;
7017
7018         mutex_lock(&iter->mutex);
7019
7020         if (iter->trace->splice_read) {
7021                 ret = iter->trace->splice_read(iter, filp,
7022                                                ppos, pipe, len, flags);
7023                 if (ret)
7024                         goto out_err;
7025         }
7026
7027         ret = tracing_wait_pipe(filp);
7028         if (ret <= 0)
7029                 goto out_err;
7030
7031         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7032                 ret = -EFAULT;
7033                 goto out_err;
7034         }
7035
7036         trace_event_read_lock();
7037         trace_access_lock(iter->cpu_file);
7038
7039         /* Fill as many pages as possible. */
7040         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7041                 spd.pages[i] = alloc_page(GFP_KERNEL);
7042                 if (!spd.pages[i])
7043                         break;
7044
7045                 rem = tracing_fill_pipe_page(rem, iter);
7046
7047                 /* Copy the data into the page, so we can start over. */
7048                 ret = trace_seq_to_buffer(&iter->seq,
7049                                           page_address(spd.pages[i]),
7050                                           trace_seq_used(&iter->seq));
7051                 if (ret < 0) {
7052                         __free_page(spd.pages[i]);
7053                         break;
7054                 }
7055                 spd.partial[i].offset = 0;
7056                 spd.partial[i].len = trace_seq_used(&iter->seq);
7057
7058                 trace_seq_init(&iter->seq);
7059         }
7060
7061         trace_access_unlock(iter->cpu_file);
7062         trace_event_read_unlock();
7063         mutex_unlock(&iter->mutex);
7064
7065         spd.nr_pages = i;
7066
7067         if (i)
7068                 ret = splice_to_pipe(pipe, &spd);
7069         else
7070                 ret = 0;
7071 out:
7072         splice_shrink_spd(&spd);
7073         return ret;
7074
7075 out_err:
7076         mutex_unlock(&iter->mutex);
7077         goto out;
7078 }
7079
7080 static ssize_t
7081 tracing_entries_read(struct file *filp, char __user *ubuf,
7082                      size_t cnt, loff_t *ppos)
7083 {
7084         struct inode *inode = file_inode(filp);
7085         struct trace_array *tr = inode->i_private;
7086         int cpu = tracing_get_cpu(inode);
7087         char buf[64];
7088         int r = 0;
7089         ssize_t ret;
7090
7091         mutex_lock(&trace_types_lock);
7092
7093         if (cpu == RING_BUFFER_ALL_CPUS) {
7094                 int cpu, buf_size_same;
7095                 unsigned long size;
7096
7097                 size = 0;
7098                 buf_size_same = 1;
7099                 /* check if all cpu sizes are same */
7100                 for_each_tracing_cpu(cpu) {
7101                         /* fill in the size from first enabled cpu */
7102                         if (size == 0)
7103                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7104                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7105                                 buf_size_same = 0;
7106                                 break;
7107                         }
7108                 }
7109
7110                 if (buf_size_same) {
7111                         if (!ring_buffer_expanded)
7112                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
7113                                             size >> 10,
7114                                             trace_buf_size >> 10);
7115                         else
7116                                 r = sprintf(buf, "%lu\n", size >> 10);
7117                 } else
7118                         r = sprintf(buf, "X\n");
7119         } else
7120                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7121
7122         mutex_unlock(&trace_types_lock);
7123
7124         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7125         return ret;
7126 }
7127
7128 static ssize_t
7129 tracing_entries_write(struct file *filp, const char __user *ubuf,
7130                       size_t cnt, loff_t *ppos)
7131 {
7132         struct inode *inode = file_inode(filp);
7133         struct trace_array *tr = inode->i_private;
7134         unsigned long val;
7135         int ret;
7136
7137         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7138         if (ret)
7139                 return ret;
7140
7141         /* must have at least 1 entry */
7142         if (!val)
7143                 return -EINVAL;
7144
7145         /* value is in KB */
7146         val <<= 10;
7147         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7148         if (ret < 0)
7149                 return ret;
7150
7151         *ppos += cnt;
7152
7153         return cnt;
7154 }
7155
7156 static ssize_t
7157 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7158                                 size_t cnt, loff_t *ppos)
7159 {
7160         struct trace_array *tr = filp->private_data;
7161         char buf[64];
7162         int r, cpu;
7163         unsigned long size = 0, expanded_size = 0;
7164
7165         mutex_lock(&trace_types_lock);
7166         for_each_tracing_cpu(cpu) {
7167                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7168                 if (!ring_buffer_expanded)
7169                         expanded_size += trace_buf_size >> 10;
7170         }
7171         if (ring_buffer_expanded)
7172                 r = sprintf(buf, "%lu\n", size);
7173         else
7174                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7175         mutex_unlock(&trace_types_lock);
7176
7177         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7178 }
7179
7180 static ssize_t
7181 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7182                           size_t cnt, loff_t *ppos)
7183 {
7184         /*
7185          * There is no need to read what the user has written, this function
7186          * is just to make sure that there is no error when "echo" is used
7187          */
7188
7189         *ppos += cnt;
7190
7191         return cnt;
7192 }
7193
7194 static int
7195 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7196 {
7197         struct trace_array *tr = inode->i_private;
7198
7199         /* disable tracing ? */
7200         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7201                 tracer_tracing_off(tr);
7202         /* resize the ring buffer to 0 */
7203         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7204
7205         trace_array_put(tr);
7206
7207         return 0;
7208 }
7209
7210 static ssize_t
7211 tracing_mark_write(struct file *filp, const char __user *ubuf,
7212                                         size_t cnt, loff_t *fpos)
7213 {
7214         struct trace_array *tr = filp->private_data;
7215         struct ring_buffer_event *event;
7216         enum event_trigger_type tt = ETT_NONE;
7217         struct trace_buffer *buffer;
7218         struct print_entry *entry;
7219         ssize_t written;
7220         int size;
7221         int len;
7222
7223 /* Used in tracing_mark_raw_write() as well */
7224 #define FAULTED_STR "<faulted>"
7225 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7226
7227         if (tracing_disabled)
7228                 return -EINVAL;
7229
7230         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7231                 return -EINVAL;
7232
7233         if (cnt > TRACE_BUF_SIZE)
7234                 cnt = TRACE_BUF_SIZE;
7235
7236         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7237
7238         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7239
7240         /* If less than "<faulted>", then make sure we can still add that */
7241         if (cnt < FAULTED_SIZE)
7242                 size += FAULTED_SIZE - cnt;
7243
7244         buffer = tr->array_buffer.buffer;
7245         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7246                                             tracing_gen_ctx());
7247         if (unlikely(!event))
7248                 /* Ring buffer disabled, return as if not open for write */
7249                 return -EBADF;
7250
7251         entry = ring_buffer_event_data(event);
7252         entry->ip = _THIS_IP_;
7253
7254         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7255         if (len) {
7256                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7257                 cnt = FAULTED_SIZE;
7258                 written = -EFAULT;
7259         } else
7260                 written = cnt;
7261
7262         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7263                 /* do not add \n before testing triggers, but add \0 */
7264                 entry->buf[cnt] = '\0';
7265                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7266         }
7267
7268         if (entry->buf[cnt - 1] != '\n') {
7269                 entry->buf[cnt] = '\n';
7270                 entry->buf[cnt + 1] = '\0';
7271         } else
7272                 entry->buf[cnt] = '\0';
7273
7274         if (static_branch_unlikely(&trace_marker_exports_enabled))
7275                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7276         __buffer_unlock_commit(buffer, event);
7277
7278         if (tt)
7279                 event_triggers_post_call(tr->trace_marker_file, tt);
7280
7281         return written;
7282 }
7283
7284 /* Limit it for now to 3K (including tag) */
7285 #define RAW_DATA_MAX_SIZE (1024*3)
7286
7287 static ssize_t
7288 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7289                                         size_t cnt, loff_t *fpos)
7290 {
7291         struct trace_array *tr = filp->private_data;
7292         struct ring_buffer_event *event;
7293         struct trace_buffer *buffer;
7294         struct raw_data_entry *entry;
7295         ssize_t written;
7296         int size;
7297         int len;
7298
7299 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7300
7301         if (tracing_disabled)
7302                 return -EINVAL;
7303
7304         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7305                 return -EINVAL;
7306
7307         /* The marker must at least have a tag id */
7308         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7309                 return -EINVAL;
7310
7311         if (cnt > TRACE_BUF_SIZE)
7312                 cnt = TRACE_BUF_SIZE;
7313
7314         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7315
7316         size = sizeof(*entry) + cnt;
7317         if (cnt < FAULT_SIZE_ID)
7318                 size += FAULT_SIZE_ID - cnt;
7319
7320         buffer = tr->array_buffer.buffer;
7321         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7322                                             tracing_gen_ctx());
7323         if (!event)
7324                 /* Ring buffer disabled, return as if not open for write */
7325                 return -EBADF;
7326
7327         entry = ring_buffer_event_data(event);
7328
7329         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7330         if (len) {
7331                 entry->id = -1;
7332                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7333                 written = -EFAULT;
7334         } else
7335                 written = cnt;
7336
7337         __buffer_unlock_commit(buffer, event);
7338
7339         return written;
7340 }
7341
7342 static int tracing_clock_show(struct seq_file *m, void *v)
7343 {
7344         struct trace_array *tr = m->private;
7345         int i;
7346
7347         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7348                 seq_printf(m,
7349                         "%s%s%s%s", i ? " " : "",
7350                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7351                         i == tr->clock_id ? "]" : "");
7352         seq_putc(m, '\n');
7353
7354         return 0;
7355 }
7356
7357 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7358 {
7359         int i;
7360
7361         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7362                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7363                         break;
7364         }
7365         if (i == ARRAY_SIZE(trace_clocks))
7366                 return -EINVAL;
7367
7368         mutex_lock(&trace_types_lock);
7369
7370         tr->clock_id = i;
7371
7372         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7373
7374         /*
7375          * New clock may not be consistent with the previous clock.
7376          * Reset the buffer so that it doesn't have incomparable timestamps.
7377          */
7378         tracing_reset_online_cpus(&tr->array_buffer);
7379
7380 #ifdef CONFIG_TRACER_MAX_TRACE
7381         if (tr->max_buffer.buffer)
7382                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7383         tracing_reset_online_cpus(&tr->max_buffer);
7384 #endif
7385
7386         mutex_unlock(&trace_types_lock);
7387
7388         return 0;
7389 }
7390
7391 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7392                                    size_t cnt, loff_t *fpos)
7393 {
7394         struct seq_file *m = filp->private_data;
7395         struct trace_array *tr = m->private;
7396         char buf[64];
7397         const char *clockstr;
7398         int ret;
7399
7400         if (cnt >= sizeof(buf))
7401                 return -EINVAL;
7402
7403         if (copy_from_user(buf, ubuf, cnt))
7404                 return -EFAULT;
7405
7406         buf[cnt] = 0;
7407
7408         clockstr = strstrip(buf);
7409
7410         ret = tracing_set_clock(tr, clockstr);
7411         if (ret)
7412                 return ret;
7413
7414         *fpos += cnt;
7415
7416         return cnt;
7417 }
7418
7419 static int tracing_clock_open(struct inode *inode, struct file *file)
7420 {
7421         struct trace_array *tr = inode->i_private;
7422         int ret;
7423
7424         ret = tracing_check_open_get_tr(tr);
7425         if (ret)
7426                 return ret;
7427
7428         ret = single_open(file, tracing_clock_show, inode->i_private);
7429         if (ret < 0)
7430                 trace_array_put(tr);
7431
7432         return ret;
7433 }
7434
7435 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7436 {
7437         struct trace_array *tr = m->private;
7438
7439         mutex_lock(&trace_types_lock);
7440
7441         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7442                 seq_puts(m, "delta [absolute]\n");
7443         else
7444                 seq_puts(m, "[delta] absolute\n");
7445
7446         mutex_unlock(&trace_types_lock);
7447
7448         return 0;
7449 }
7450
7451 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7452 {
7453         struct trace_array *tr = inode->i_private;
7454         int ret;
7455
7456         ret = tracing_check_open_get_tr(tr);
7457         if (ret)
7458                 return ret;
7459
7460         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7461         if (ret < 0)
7462                 trace_array_put(tr);
7463
7464         return ret;
7465 }
7466
7467 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7468 {
7469         if (rbe == this_cpu_read(trace_buffered_event))
7470                 return ring_buffer_time_stamp(buffer);
7471
7472         return ring_buffer_event_time_stamp(buffer, rbe);
7473 }
7474
7475 /*
7476  * Set or disable using the per CPU trace_buffer_event when possible.
7477  */
7478 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7479 {
7480         int ret = 0;
7481
7482         mutex_lock(&trace_types_lock);
7483
7484         if (set && tr->no_filter_buffering_ref++)
7485                 goto out;
7486
7487         if (!set) {
7488                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7489                         ret = -EINVAL;
7490                         goto out;
7491                 }
7492
7493                 --tr->no_filter_buffering_ref;
7494         }
7495  out:
7496         mutex_unlock(&trace_types_lock);
7497
7498         return ret;
7499 }
7500
7501 struct ftrace_buffer_info {
7502         struct trace_iterator   iter;
7503         void                    *spare;
7504         unsigned int            spare_cpu;
7505         unsigned int            read;
7506 };
7507
7508 #ifdef CONFIG_TRACER_SNAPSHOT
7509 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7510 {
7511         struct trace_array *tr = inode->i_private;
7512         struct trace_iterator *iter;
7513         struct seq_file *m;
7514         int ret;
7515
7516         ret = tracing_check_open_get_tr(tr);
7517         if (ret)
7518                 return ret;
7519
7520         if (file->f_mode & FMODE_READ) {
7521                 iter = __tracing_open(inode, file, true);
7522                 if (IS_ERR(iter))
7523                         ret = PTR_ERR(iter);
7524         } else {
7525                 /* Writes still need the seq_file to hold the private data */
7526                 ret = -ENOMEM;
7527                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7528                 if (!m)
7529                         goto out;
7530                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7531                 if (!iter) {
7532                         kfree(m);
7533                         goto out;
7534                 }
7535                 ret = 0;
7536
7537                 iter->tr = tr;
7538                 iter->array_buffer = &tr->max_buffer;
7539                 iter->cpu_file = tracing_get_cpu(inode);
7540                 m->private = iter;
7541                 file->private_data = m;
7542         }
7543 out:
7544         if (ret < 0)
7545                 trace_array_put(tr);
7546
7547         return ret;
7548 }
7549
7550 static void tracing_swap_cpu_buffer(void *tr)
7551 {
7552         update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7553 }
7554
7555 static ssize_t
7556 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7557                        loff_t *ppos)
7558 {
7559         struct seq_file *m = filp->private_data;
7560         struct trace_iterator *iter = m->private;
7561         struct trace_array *tr = iter->tr;
7562         unsigned long val;
7563         int ret;
7564
7565         ret = tracing_update_buffers();
7566         if (ret < 0)
7567                 return ret;
7568
7569         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7570         if (ret)
7571                 return ret;
7572
7573         mutex_lock(&trace_types_lock);
7574
7575         if (tr->current_trace->use_max_tr) {
7576                 ret = -EBUSY;
7577                 goto out;
7578         }
7579
7580         local_irq_disable();
7581         arch_spin_lock(&tr->max_lock);
7582         if (tr->cond_snapshot)
7583                 ret = -EBUSY;
7584         arch_spin_unlock(&tr->max_lock);
7585         local_irq_enable();
7586         if (ret)
7587                 goto out;
7588
7589         switch (val) {
7590         case 0:
7591                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7592                         ret = -EINVAL;
7593                         break;
7594                 }
7595                 if (tr->allocated_snapshot)
7596                         free_snapshot(tr);
7597                 break;
7598         case 1:
7599 /* Only allow per-cpu swap if the ring buffer supports it */
7600 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7601                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7602                         ret = -EINVAL;
7603                         break;
7604                 }
7605 #endif
7606                 if (tr->allocated_snapshot)
7607                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7608                                         &tr->array_buffer, iter->cpu_file);
7609                 else
7610                         ret = tracing_alloc_snapshot_instance(tr);
7611                 if (ret < 0)
7612                         break;
7613                 /* Now, we're going to swap */
7614                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7615                         local_irq_disable();
7616                         update_max_tr(tr, current, smp_processor_id(), NULL);
7617                         local_irq_enable();
7618                 } else {
7619                         smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7620                                                  (void *)tr, 1);
7621                 }
7622                 break;
7623         default:
7624                 if (tr->allocated_snapshot) {
7625                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7626                                 tracing_reset_online_cpus(&tr->max_buffer);
7627                         else
7628                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7629                 }
7630                 break;
7631         }
7632
7633         if (ret >= 0) {
7634                 *ppos += cnt;
7635                 ret = cnt;
7636         }
7637 out:
7638         mutex_unlock(&trace_types_lock);
7639         return ret;
7640 }
7641
7642 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7643 {
7644         struct seq_file *m = file->private_data;
7645         int ret;
7646
7647         ret = tracing_release(inode, file);
7648
7649         if (file->f_mode & FMODE_READ)
7650                 return ret;
7651
7652         /* If write only, the seq_file is just a stub */
7653         if (m)
7654                 kfree(m->private);
7655         kfree(m);
7656
7657         return 0;
7658 }
7659
7660 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7661 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7662                                     size_t count, loff_t *ppos);
7663 static int tracing_buffers_release(struct inode *inode, struct file *file);
7664 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7665                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7666
7667 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7668 {
7669         struct ftrace_buffer_info *info;
7670         int ret;
7671
7672         /* The following checks for tracefs lockdown */
7673         ret = tracing_buffers_open(inode, filp);
7674         if (ret < 0)
7675                 return ret;
7676
7677         info = filp->private_data;
7678
7679         if (info->iter.trace->use_max_tr) {
7680                 tracing_buffers_release(inode, filp);
7681                 return -EBUSY;
7682         }
7683
7684         info->iter.snapshot = true;
7685         info->iter.array_buffer = &info->iter.tr->max_buffer;
7686
7687         return ret;
7688 }
7689
7690 #endif /* CONFIG_TRACER_SNAPSHOT */
7691
7692
7693 static const struct file_operations tracing_thresh_fops = {
7694         .open           = tracing_open_generic,
7695         .read           = tracing_thresh_read,
7696         .write          = tracing_thresh_write,
7697         .llseek         = generic_file_llseek,
7698 };
7699
7700 #ifdef CONFIG_TRACER_MAX_TRACE
7701 static const struct file_operations tracing_max_lat_fops = {
7702         .open           = tracing_open_generic_tr,
7703         .read           = tracing_max_lat_read,
7704         .write          = tracing_max_lat_write,
7705         .llseek         = generic_file_llseek,
7706         .release        = tracing_release_generic_tr,
7707 };
7708 #endif
7709
7710 static const struct file_operations set_tracer_fops = {
7711         .open           = tracing_open_generic,
7712         .read           = tracing_set_trace_read,
7713         .write          = tracing_set_trace_write,
7714         .llseek         = generic_file_llseek,
7715 };
7716
7717 static const struct file_operations tracing_pipe_fops = {
7718         .open           = tracing_open_pipe,
7719         .poll           = tracing_poll_pipe,
7720         .read           = tracing_read_pipe,
7721         .splice_read    = tracing_splice_read_pipe,
7722         .release        = tracing_release_pipe,
7723         .llseek         = no_llseek,
7724 };
7725
7726 static const struct file_operations tracing_entries_fops = {
7727         .open           = tracing_open_generic_tr,
7728         .read           = tracing_entries_read,
7729         .write          = tracing_entries_write,
7730         .llseek         = generic_file_llseek,
7731         .release        = tracing_release_generic_tr,
7732 };
7733
7734 static const struct file_operations tracing_total_entries_fops = {
7735         .open           = tracing_open_generic_tr,
7736         .read           = tracing_total_entries_read,
7737         .llseek         = generic_file_llseek,
7738         .release        = tracing_release_generic_tr,
7739 };
7740
7741 static const struct file_operations tracing_free_buffer_fops = {
7742         .open           = tracing_open_generic_tr,
7743         .write          = tracing_free_buffer_write,
7744         .release        = tracing_free_buffer_release,
7745 };
7746
7747 static const struct file_operations tracing_mark_fops = {
7748         .open           = tracing_mark_open,
7749         .write          = tracing_mark_write,
7750         .release        = tracing_release_generic_tr,
7751 };
7752
7753 static const struct file_operations tracing_mark_raw_fops = {
7754         .open           = tracing_mark_open,
7755         .write          = tracing_mark_raw_write,
7756         .release        = tracing_release_generic_tr,
7757 };
7758
7759 static const struct file_operations trace_clock_fops = {
7760         .open           = tracing_clock_open,
7761         .read           = seq_read,
7762         .llseek         = seq_lseek,
7763         .release        = tracing_single_release_tr,
7764         .write          = tracing_clock_write,
7765 };
7766
7767 static const struct file_operations trace_time_stamp_mode_fops = {
7768         .open           = tracing_time_stamp_mode_open,
7769         .read           = seq_read,
7770         .llseek         = seq_lseek,
7771         .release        = tracing_single_release_tr,
7772 };
7773
7774 #ifdef CONFIG_TRACER_SNAPSHOT
7775 static const struct file_operations snapshot_fops = {
7776         .open           = tracing_snapshot_open,
7777         .read           = seq_read,
7778         .write          = tracing_snapshot_write,
7779         .llseek         = tracing_lseek,
7780         .release        = tracing_snapshot_release,
7781 };
7782
7783 static const struct file_operations snapshot_raw_fops = {
7784         .open           = snapshot_raw_open,
7785         .read           = tracing_buffers_read,
7786         .release        = tracing_buffers_release,
7787         .splice_read    = tracing_buffers_splice_read,
7788         .llseek         = no_llseek,
7789 };
7790
7791 #endif /* CONFIG_TRACER_SNAPSHOT */
7792
7793 /*
7794  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7795  * @filp: The active open file structure
7796  * @ubuf: The userspace provided buffer to read value into
7797  * @cnt: The maximum number of bytes to read
7798  * @ppos: The current "file" position
7799  *
7800  * This function implements the write interface for a struct trace_min_max_param.
7801  * The filp->private_data must point to a trace_min_max_param structure that
7802  * defines where to write the value, the min and the max acceptable values,
7803  * and a lock to protect the write.
7804  */
7805 static ssize_t
7806 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7807 {
7808         struct trace_min_max_param *param = filp->private_data;
7809         u64 val;
7810         int err;
7811
7812         if (!param)
7813                 return -EFAULT;
7814
7815         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7816         if (err)
7817                 return err;
7818
7819         if (param->lock)
7820                 mutex_lock(param->lock);
7821
7822         if (param->min && val < *param->min)
7823                 err = -EINVAL;
7824
7825         if (param->max && val > *param->max)
7826                 err = -EINVAL;
7827
7828         if (!err)
7829                 *param->val = val;
7830
7831         if (param->lock)
7832                 mutex_unlock(param->lock);
7833
7834         if (err)
7835                 return err;
7836
7837         return cnt;
7838 }
7839
7840 /*
7841  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7842  * @filp: The active open file structure
7843  * @ubuf: The userspace provided buffer to read value into
7844  * @cnt: The maximum number of bytes to read
7845  * @ppos: The current "file" position
7846  *
7847  * This function implements the read interface for a struct trace_min_max_param.
7848  * The filp->private_data must point to a trace_min_max_param struct with valid
7849  * data.
7850  */
7851 static ssize_t
7852 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7853 {
7854         struct trace_min_max_param *param = filp->private_data;
7855         char buf[U64_STR_SIZE];
7856         int len;
7857         u64 val;
7858
7859         if (!param)
7860                 return -EFAULT;
7861
7862         val = *param->val;
7863
7864         if (cnt > sizeof(buf))
7865                 cnt = sizeof(buf);
7866
7867         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7868
7869         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7870 }
7871
7872 const struct file_operations trace_min_max_fops = {
7873         .open           = tracing_open_generic,
7874         .read           = trace_min_max_read,
7875         .write          = trace_min_max_write,
7876 };
7877
7878 #define TRACING_LOG_ERRS_MAX    8
7879 #define TRACING_LOG_LOC_MAX     128
7880
7881 #define CMD_PREFIX "  Command: "
7882
7883 struct err_info {
7884         const char      **errs; /* ptr to loc-specific array of err strings */
7885         u8              type;   /* index into errs -> specific err string */
7886         u16             pos;    /* caret position */
7887         u64             ts;
7888 };
7889
7890 struct tracing_log_err {
7891         struct list_head        list;
7892         struct err_info         info;
7893         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7894         char                    *cmd;                     /* what caused err */
7895 };
7896
7897 static DEFINE_MUTEX(tracing_err_log_lock);
7898
7899 static struct tracing_log_err *alloc_tracing_log_err(int len)
7900 {
7901         struct tracing_log_err *err;
7902
7903         err = kzalloc(sizeof(*err), GFP_KERNEL);
7904         if (!err)
7905                 return ERR_PTR(-ENOMEM);
7906
7907         err->cmd = kzalloc(len, GFP_KERNEL);
7908         if (!err->cmd) {
7909                 kfree(err);
7910                 return ERR_PTR(-ENOMEM);
7911         }
7912
7913         return err;
7914 }
7915
7916 static void free_tracing_log_err(struct tracing_log_err *err)
7917 {
7918         kfree(err->cmd);
7919         kfree(err);
7920 }
7921
7922 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7923                                                    int len)
7924 {
7925         struct tracing_log_err *err;
7926         char *cmd;
7927
7928         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7929                 err = alloc_tracing_log_err(len);
7930                 if (PTR_ERR(err) != -ENOMEM)
7931                         tr->n_err_log_entries++;
7932
7933                 return err;
7934         }
7935         cmd = kzalloc(len, GFP_KERNEL);
7936         if (!cmd)
7937                 return ERR_PTR(-ENOMEM);
7938         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7939         kfree(err->cmd);
7940         err->cmd = cmd;
7941         list_del(&err->list);
7942
7943         return err;
7944 }
7945
7946 /**
7947  * err_pos - find the position of a string within a command for error careting
7948  * @cmd: The tracing command that caused the error
7949  * @str: The string to position the caret at within @cmd
7950  *
7951  * Finds the position of the first occurrence of @str within @cmd.  The
7952  * return value can be passed to tracing_log_err() for caret placement
7953  * within @cmd.
7954  *
7955  * Returns the index within @cmd of the first occurrence of @str or 0
7956  * if @str was not found.
7957  */
7958 unsigned int err_pos(char *cmd, const char *str)
7959 {
7960         char *found;
7961
7962         if (WARN_ON(!strlen(cmd)))
7963                 return 0;
7964
7965         found = strstr(cmd, str);
7966         if (found)
7967                 return found - cmd;
7968
7969         return 0;
7970 }
7971
7972 /**
7973  * tracing_log_err - write an error to the tracing error log
7974  * @tr: The associated trace array for the error (NULL for top level array)
7975  * @loc: A string describing where the error occurred
7976  * @cmd: The tracing command that caused the error
7977  * @errs: The array of loc-specific static error strings
7978  * @type: The index into errs[], which produces the specific static err string
7979  * @pos: The position the caret should be placed in the cmd
7980  *
7981  * Writes an error into tracing/error_log of the form:
7982  *
7983  * <loc>: error: <text>
7984  *   Command: <cmd>
7985  *              ^
7986  *
7987  * tracing/error_log is a small log file containing the last
7988  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7989  * unless there has been a tracing error, and the error log can be
7990  * cleared and have its memory freed by writing the empty string in
7991  * truncation mode to it i.e. echo > tracing/error_log.
7992  *
7993  * NOTE: the @errs array along with the @type param are used to
7994  * produce a static error string - this string is not copied and saved
7995  * when the error is logged - only a pointer to it is saved.  See
7996  * existing callers for examples of how static strings are typically
7997  * defined for use with tracing_log_err().
7998  */
7999 void tracing_log_err(struct trace_array *tr,
8000                      const char *loc, const char *cmd,
8001                      const char **errs, u8 type, u16 pos)
8002 {
8003         struct tracing_log_err *err;
8004         int len = 0;
8005
8006         if (!tr)
8007                 tr = &global_trace;
8008
8009         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8010
8011         mutex_lock(&tracing_err_log_lock);
8012         err = get_tracing_log_err(tr, len);
8013         if (PTR_ERR(err) == -ENOMEM) {
8014                 mutex_unlock(&tracing_err_log_lock);
8015                 return;
8016         }
8017
8018         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8019         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8020
8021         err->info.errs = errs;
8022         err->info.type = type;
8023         err->info.pos = pos;
8024         err->info.ts = local_clock();
8025
8026         list_add_tail(&err->list, &tr->err_log);
8027         mutex_unlock(&tracing_err_log_lock);
8028 }
8029
8030 static void clear_tracing_err_log(struct trace_array *tr)
8031 {
8032         struct tracing_log_err *err, *next;
8033
8034         mutex_lock(&tracing_err_log_lock);
8035         list_for_each_entry_safe(err, next, &tr->err_log, list) {
8036                 list_del(&err->list);
8037                 free_tracing_log_err(err);
8038         }
8039
8040         tr->n_err_log_entries = 0;
8041         mutex_unlock(&tracing_err_log_lock);
8042 }
8043
8044 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8045 {
8046         struct trace_array *tr = m->private;
8047
8048         mutex_lock(&tracing_err_log_lock);
8049
8050         return seq_list_start(&tr->err_log, *pos);
8051 }
8052
8053 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8054 {
8055         struct trace_array *tr = m->private;
8056
8057         return seq_list_next(v, &tr->err_log, pos);
8058 }
8059
8060 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8061 {
8062         mutex_unlock(&tracing_err_log_lock);
8063 }
8064
8065 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8066 {
8067         u16 i;
8068
8069         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8070                 seq_putc(m, ' ');
8071         for (i = 0; i < pos; i++)
8072                 seq_putc(m, ' ');
8073         seq_puts(m, "^\n");
8074 }
8075
8076 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8077 {
8078         struct tracing_log_err *err = v;
8079
8080         if (err) {
8081                 const char *err_text = err->info.errs[err->info.type];
8082                 u64 sec = err->info.ts;
8083                 u32 nsec;
8084
8085                 nsec = do_div(sec, NSEC_PER_SEC);
8086                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8087                            err->loc, err_text);
8088                 seq_printf(m, "%s", err->cmd);
8089                 tracing_err_log_show_pos(m, err->info.pos);
8090         }
8091
8092         return 0;
8093 }
8094
8095 static const struct seq_operations tracing_err_log_seq_ops = {
8096         .start  = tracing_err_log_seq_start,
8097         .next   = tracing_err_log_seq_next,
8098         .stop   = tracing_err_log_seq_stop,
8099         .show   = tracing_err_log_seq_show
8100 };
8101
8102 static int tracing_err_log_open(struct inode *inode, struct file *file)
8103 {
8104         struct trace_array *tr = inode->i_private;
8105         int ret = 0;
8106
8107         ret = tracing_check_open_get_tr(tr);
8108         if (ret)
8109                 return ret;
8110
8111         /* If this file was opened for write, then erase contents */
8112         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8113                 clear_tracing_err_log(tr);
8114
8115         if (file->f_mode & FMODE_READ) {
8116                 ret = seq_open(file, &tracing_err_log_seq_ops);
8117                 if (!ret) {
8118                         struct seq_file *m = file->private_data;
8119                         m->private = tr;
8120                 } else {
8121                         trace_array_put(tr);
8122                 }
8123         }
8124         return ret;
8125 }
8126
8127 static ssize_t tracing_err_log_write(struct file *file,
8128                                      const char __user *buffer,
8129                                      size_t count, loff_t *ppos)
8130 {
8131         return count;
8132 }
8133
8134 static int tracing_err_log_release(struct inode *inode, struct file *file)
8135 {
8136         struct trace_array *tr = inode->i_private;
8137
8138         trace_array_put(tr);
8139
8140         if (file->f_mode & FMODE_READ)
8141                 seq_release(inode, file);
8142
8143         return 0;
8144 }
8145
8146 static const struct file_operations tracing_err_log_fops = {
8147         .open           = tracing_err_log_open,
8148         .write          = tracing_err_log_write,
8149         .read           = seq_read,
8150         .llseek         = tracing_lseek,
8151         .release        = tracing_err_log_release,
8152 };
8153
8154 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8155 {
8156         struct trace_array *tr = inode->i_private;
8157         struct ftrace_buffer_info *info;
8158         int ret;
8159
8160         ret = tracing_check_open_get_tr(tr);
8161         if (ret)
8162                 return ret;
8163
8164         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8165         if (!info) {
8166                 trace_array_put(tr);
8167                 return -ENOMEM;
8168         }
8169
8170         mutex_lock(&trace_types_lock);
8171
8172         info->iter.tr           = tr;
8173         info->iter.cpu_file     = tracing_get_cpu(inode);
8174         info->iter.trace        = tr->current_trace;
8175         info->iter.array_buffer = &tr->array_buffer;
8176         info->spare             = NULL;
8177         /* Force reading ring buffer for first read */
8178         info->read              = (unsigned int)-1;
8179
8180         filp->private_data = info;
8181
8182         tr->trace_ref++;
8183
8184         mutex_unlock(&trace_types_lock);
8185
8186         ret = nonseekable_open(inode, filp);
8187         if (ret < 0)
8188                 trace_array_put(tr);
8189
8190         return ret;
8191 }
8192
8193 static __poll_t
8194 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8195 {
8196         struct ftrace_buffer_info *info = filp->private_data;
8197         struct trace_iterator *iter = &info->iter;
8198
8199         return trace_poll(iter, filp, poll_table);
8200 }
8201
8202 static ssize_t
8203 tracing_buffers_read(struct file *filp, char __user *ubuf,
8204                      size_t count, loff_t *ppos)
8205 {
8206         struct ftrace_buffer_info *info = filp->private_data;
8207         struct trace_iterator *iter = &info->iter;
8208         ssize_t ret = 0;
8209         ssize_t size;
8210
8211         if (!count)
8212                 return 0;
8213
8214 #ifdef CONFIG_TRACER_MAX_TRACE
8215         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8216                 return -EBUSY;
8217 #endif
8218
8219         if (!info->spare) {
8220                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8221                                                           iter->cpu_file);
8222                 if (IS_ERR(info->spare)) {
8223                         ret = PTR_ERR(info->spare);
8224                         info->spare = NULL;
8225                 } else {
8226                         info->spare_cpu = iter->cpu_file;
8227                 }
8228         }
8229         if (!info->spare)
8230                 return ret;
8231
8232         /* Do we have previous read data to read? */
8233         if (info->read < PAGE_SIZE)
8234                 goto read;
8235
8236  again:
8237         trace_access_lock(iter->cpu_file);
8238         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8239                                     &info->spare,
8240                                     count,
8241                                     iter->cpu_file, 0);
8242         trace_access_unlock(iter->cpu_file);
8243
8244         if (ret < 0) {
8245                 if (trace_empty(iter)) {
8246                         if ((filp->f_flags & O_NONBLOCK))
8247                                 return -EAGAIN;
8248
8249                         ret = wait_on_pipe(iter, 0);
8250                         if (ret)
8251                                 return ret;
8252
8253                         goto again;
8254                 }
8255                 return 0;
8256         }
8257
8258         info->read = 0;
8259  read:
8260         size = PAGE_SIZE - info->read;
8261         if (size > count)
8262                 size = count;
8263
8264         ret = copy_to_user(ubuf, info->spare + info->read, size);
8265         if (ret == size)
8266                 return -EFAULT;
8267
8268         size -= ret;
8269
8270         *ppos += size;
8271         info->read += size;
8272
8273         return size;
8274 }
8275
8276 static int tracing_buffers_release(struct inode *inode, struct file *file)
8277 {
8278         struct ftrace_buffer_info *info = file->private_data;
8279         struct trace_iterator *iter = &info->iter;
8280
8281         mutex_lock(&trace_types_lock);
8282
8283         iter->tr->trace_ref--;
8284
8285         __trace_array_put(iter->tr);
8286
8287         iter->wait_index++;
8288         /* Make sure the waiters see the new wait_index */
8289         smp_wmb();
8290
8291         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8292
8293         if (info->spare)
8294                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8295                                            info->spare_cpu, info->spare);
8296         kvfree(info);
8297
8298         mutex_unlock(&trace_types_lock);
8299
8300         return 0;
8301 }
8302
8303 struct buffer_ref {
8304         struct trace_buffer     *buffer;
8305         void                    *page;
8306         int                     cpu;
8307         refcount_t              refcount;
8308 };
8309
8310 static void buffer_ref_release(struct buffer_ref *ref)
8311 {
8312         if (!refcount_dec_and_test(&ref->refcount))
8313                 return;
8314         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8315         kfree(ref);
8316 }
8317
8318 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8319                                     struct pipe_buffer *buf)
8320 {
8321         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8322
8323         buffer_ref_release(ref);
8324         buf->private = 0;
8325 }
8326
8327 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8328                                 struct pipe_buffer *buf)
8329 {
8330         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8331
8332         if (refcount_read(&ref->refcount) > INT_MAX/2)
8333                 return false;
8334
8335         refcount_inc(&ref->refcount);
8336         return true;
8337 }
8338
8339 /* Pipe buffer operations for a buffer. */
8340 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8341         .release                = buffer_pipe_buf_release,
8342         .get                    = buffer_pipe_buf_get,
8343 };
8344
8345 /*
8346  * Callback from splice_to_pipe(), if we need to release some pages
8347  * at the end of the spd in case we error'ed out in filling the pipe.
8348  */
8349 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8350 {
8351         struct buffer_ref *ref =
8352                 (struct buffer_ref *)spd->partial[i].private;
8353
8354         buffer_ref_release(ref);
8355         spd->partial[i].private = 0;
8356 }
8357
8358 static ssize_t
8359 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8360                             struct pipe_inode_info *pipe, size_t len,
8361                             unsigned int flags)
8362 {
8363         struct ftrace_buffer_info *info = file->private_data;
8364         struct trace_iterator *iter = &info->iter;
8365         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8366         struct page *pages_def[PIPE_DEF_BUFFERS];
8367         struct splice_pipe_desc spd = {
8368                 .pages          = pages_def,
8369                 .partial        = partial_def,
8370                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8371                 .ops            = &buffer_pipe_buf_ops,
8372                 .spd_release    = buffer_spd_release,
8373         };
8374         struct buffer_ref *ref;
8375         int entries, i;
8376         ssize_t ret = 0;
8377
8378 #ifdef CONFIG_TRACER_MAX_TRACE
8379         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8380                 return -EBUSY;
8381 #endif
8382
8383         if (*ppos & (PAGE_SIZE - 1))
8384                 return -EINVAL;
8385
8386         if (len & (PAGE_SIZE - 1)) {
8387                 if (len < PAGE_SIZE)
8388                         return -EINVAL;
8389                 len &= PAGE_MASK;
8390         }
8391
8392         if (splice_grow_spd(pipe, &spd))
8393                 return -ENOMEM;
8394
8395  again:
8396         trace_access_lock(iter->cpu_file);
8397         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8398
8399         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8400                 struct page *page;
8401                 int r;
8402
8403                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8404                 if (!ref) {
8405                         ret = -ENOMEM;
8406                         break;
8407                 }
8408
8409                 refcount_set(&ref->refcount, 1);
8410                 ref->buffer = iter->array_buffer->buffer;
8411                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8412                 if (IS_ERR(ref->page)) {
8413                         ret = PTR_ERR(ref->page);
8414                         ref->page = NULL;
8415                         kfree(ref);
8416                         break;
8417                 }
8418                 ref->cpu = iter->cpu_file;
8419
8420                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8421                                           len, iter->cpu_file, 1);
8422                 if (r < 0) {
8423                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8424                                                    ref->page);
8425                         kfree(ref);
8426                         break;
8427                 }
8428
8429                 page = virt_to_page(ref->page);
8430
8431                 spd.pages[i] = page;
8432                 spd.partial[i].len = PAGE_SIZE;
8433                 spd.partial[i].offset = 0;
8434                 spd.partial[i].private = (unsigned long)ref;
8435                 spd.nr_pages++;
8436                 *ppos += PAGE_SIZE;
8437
8438                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8439         }
8440
8441         trace_access_unlock(iter->cpu_file);
8442         spd.nr_pages = i;
8443
8444         /* did we read anything? */
8445         if (!spd.nr_pages) {
8446                 long wait_index;
8447
8448                 if (ret)
8449                         goto out;
8450
8451                 ret = -EAGAIN;
8452                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8453                         goto out;
8454
8455                 wait_index = READ_ONCE(iter->wait_index);
8456
8457                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8458                 if (ret)
8459                         goto out;
8460
8461                 /* No need to wait after waking up when tracing is off */
8462                 if (!tracer_tracing_is_on(iter->tr))
8463                         goto out;
8464
8465                 /* Make sure we see the new wait_index */
8466                 smp_rmb();
8467                 if (wait_index != iter->wait_index)
8468                         goto out;
8469
8470                 goto again;
8471         }
8472
8473         ret = splice_to_pipe(pipe, &spd);
8474 out:
8475         splice_shrink_spd(&spd);
8476
8477         return ret;
8478 }
8479
8480 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8481 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8482 {
8483         struct ftrace_buffer_info *info = file->private_data;
8484         struct trace_iterator *iter = &info->iter;
8485
8486         if (cmd)
8487                 return -ENOIOCTLCMD;
8488
8489         mutex_lock(&trace_types_lock);
8490
8491         iter->wait_index++;
8492         /* Make sure the waiters see the new wait_index */
8493         smp_wmb();
8494
8495         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8496
8497         mutex_unlock(&trace_types_lock);
8498         return 0;
8499 }
8500
8501 static const struct file_operations tracing_buffers_fops = {
8502         .open           = tracing_buffers_open,
8503         .read           = tracing_buffers_read,
8504         .poll           = tracing_buffers_poll,
8505         .release        = tracing_buffers_release,
8506         .splice_read    = tracing_buffers_splice_read,
8507         .unlocked_ioctl = tracing_buffers_ioctl,
8508         .llseek         = no_llseek,
8509 };
8510
8511 static ssize_t
8512 tracing_stats_read(struct file *filp, char __user *ubuf,
8513                    size_t count, loff_t *ppos)
8514 {
8515         struct inode *inode = file_inode(filp);
8516         struct trace_array *tr = inode->i_private;
8517         struct array_buffer *trace_buf = &tr->array_buffer;
8518         int cpu = tracing_get_cpu(inode);
8519         struct trace_seq *s;
8520         unsigned long cnt;
8521         unsigned long long t;
8522         unsigned long usec_rem;
8523
8524         s = kmalloc(sizeof(*s), GFP_KERNEL);
8525         if (!s)
8526                 return -ENOMEM;
8527
8528         trace_seq_init(s);
8529
8530         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8531         trace_seq_printf(s, "entries: %ld\n", cnt);
8532
8533         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8534         trace_seq_printf(s, "overrun: %ld\n", cnt);
8535
8536         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8537         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8538
8539         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8540         trace_seq_printf(s, "bytes: %ld\n", cnt);
8541
8542         if (trace_clocks[tr->clock_id].in_ns) {
8543                 /* local or global for trace_clock */
8544                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8545                 usec_rem = do_div(t, USEC_PER_SEC);
8546                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8547                                                                 t, usec_rem);
8548
8549                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8550                 usec_rem = do_div(t, USEC_PER_SEC);
8551                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8552         } else {
8553                 /* counter or tsc mode for trace_clock */
8554                 trace_seq_printf(s, "oldest event ts: %llu\n",
8555                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8556
8557                 trace_seq_printf(s, "now ts: %llu\n",
8558                                 ring_buffer_time_stamp(trace_buf->buffer));
8559         }
8560
8561         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8562         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8563
8564         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8565         trace_seq_printf(s, "read events: %ld\n", cnt);
8566
8567         count = simple_read_from_buffer(ubuf, count, ppos,
8568                                         s->buffer, trace_seq_used(s));
8569
8570         kfree(s);
8571
8572         return count;
8573 }
8574
8575 static const struct file_operations tracing_stats_fops = {
8576         .open           = tracing_open_generic_tr,
8577         .read           = tracing_stats_read,
8578         .llseek         = generic_file_llseek,
8579         .release        = tracing_release_generic_tr,
8580 };
8581
8582 #ifdef CONFIG_DYNAMIC_FTRACE
8583
8584 static ssize_t
8585 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8586                   size_t cnt, loff_t *ppos)
8587 {
8588         ssize_t ret;
8589         char *buf;
8590         int r;
8591
8592         /* 256 should be plenty to hold the amount needed */
8593         buf = kmalloc(256, GFP_KERNEL);
8594         if (!buf)
8595                 return -ENOMEM;
8596
8597         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8598                       ftrace_update_tot_cnt,
8599                       ftrace_number_of_pages,
8600                       ftrace_number_of_groups);
8601
8602         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8603         kfree(buf);
8604         return ret;
8605 }
8606
8607 static const struct file_operations tracing_dyn_info_fops = {
8608         .open           = tracing_open_generic,
8609         .read           = tracing_read_dyn_info,
8610         .llseek         = generic_file_llseek,
8611 };
8612 #endif /* CONFIG_DYNAMIC_FTRACE */
8613
8614 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8615 static void
8616 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8617                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8618                 void *data)
8619 {
8620         tracing_snapshot_instance(tr);
8621 }
8622
8623 static void
8624 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8625                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8626                       void *data)
8627 {
8628         struct ftrace_func_mapper *mapper = data;
8629         long *count = NULL;
8630
8631         if (mapper)
8632                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8633
8634         if (count) {
8635
8636                 if (*count <= 0)
8637                         return;
8638
8639                 (*count)--;
8640         }
8641
8642         tracing_snapshot_instance(tr);
8643 }
8644
8645 static int
8646 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8647                       struct ftrace_probe_ops *ops, void *data)
8648 {
8649         struct ftrace_func_mapper *mapper = data;
8650         long *count = NULL;
8651
8652         seq_printf(m, "%ps:", (void *)ip);
8653
8654         seq_puts(m, "snapshot");
8655
8656         if (mapper)
8657                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8658
8659         if (count)
8660                 seq_printf(m, ":count=%ld\n", *count);
8661         else
8662                 seq_puts(m, ":unlimited\n");
8663
8664         return 0;
8665 }
8666
8667 static int
8668 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8669                      unsigned long ip, void *init_data, void **data)
8670 {
8671         struct ftrace_func_mapper *mapper = *data;
8672
8673         if (!mapper) {
8674                 mapper = allocate_ftrace_func_mapper();
8675                 if (!mapper)
8676                         return -ENOMEM;
8677                 *data = mapper;
8678         }
8679
8680         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8681 }
8682
8683 static void
8684 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8685                      unsigned long ip, void *data)
8686 {
8687         struct ftrace_func_mapper *mapper = data;
8688
8689         if (!ip) {
8690                 if (!mapper)
8691                         return;
8692                 free_ftrace_func_mapper(mapper, NULL);
8693                 return;
8694         }
8695
8696         ftrace_func_mapper_remove_ip(mapper, ip);
8697 }
8698
8699 static struct ftrace_probe_ops snapshot_probe_ops = {
8700         .func                   = ftrace_snapshot,
8701         .print                  = ftrace_snapshot_print,
8702 };
8703
8704 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8705         .func                   = ftrace_count_snapshot,
8706         .print                  = ftrace_snapshot_print,
8707         .init                   = ftrace_snapshot_init,
8708         .free                   = ftrace_snapshot_free,
8709 };
8710
8711 static int
8712 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8713                                char *glob, char *cmd, char *param, int enable)
8714 {
8715         struct ftrace_probe_ops *ops;
8716         void *count = (void *)-1;
8717         char *number;
8718         int ret;
8719
8720         if (!tr)
8721                 return -ENODEV;
8722
8723         /* hash funcs only work with set_ftrace_filter */
8724         if (!enable)
8725                 return -EINVAL;
8726
8727         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8728
8729         if (glob[0] == '!')
8730                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8731
8732         if (!param)
8733                 goto out_reg;
8734
8735         number = strsep(&param, ":");
8736
8737         if (!strlen(number))
8738                 goto out_reg;
8739
8740         /*
8741          * We use the callback data field (which is a pointer)
8742          * as our counter.
8743          */
8744         ret = kstrtoul(number, 0, (unsigned long *)&count);
8745         if (ret)
8746                 return ret;
8747
8748  out_reg:
8749         ret = tracing_alloc_snapshot_instance(tr);
8750         if (ret < 0)
8751                 goto out;
8752
8753         ret = register_ftrace_function_probe(glob, tr, ops, count);
8754
8755  out:
8756         return ret < 0 ? ret : 0;
8757 }
8758
8759 static struct ftrace_func_command ftrace_snapshot_cmd = {
8760         .name                   = "snapshot",
8761         .func                   = ftrace_trace_snapshot_callback,
8762 };
8763
8764 static __init int register_snapshot_cmd(void)
8765 {
8766         return register_ftrace_command(&ftrace_snapshot_cmd);
8767 }
8768 #else
8769 static inline __init int register_snapshot_cmd(void) { return 0; }
8770 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8771
8772 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8773 {
8774         if (WARN_ON(!tr->dir))
8775                 return ERR_PTR(-ENODEV);
8776
8777         /* Top directory uses NULL as the parent */
8778         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8779                 return NULL;
8780
8781         /* All sub buffers have a descriptor */
8782         return tr->dir;
8783 }
8784
8785 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8786 {
8787         struct dentry *d_tracer;
8788
8789         if (tr->percpu_dir)
8790                 return tr->percpu_dir;
8791
8792         d_tracer = tracing_get_dentry(tr);
8793         if (IS_ERR(d_tracer))
8794                 return NULL;
8795
8796         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8797
8798         MEM_FAIL(!tr->percpu_dir,
8799                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8800
8801         return tr->percpu_dir;
8802 }
8803
8804 static struct dentry *
8805 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8806                       void *data, long cpu, const struct file_operations *fops)
8807 {
8808         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8809
8810         if (ret) /* See tracing_get_cpu() */
8811                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8812         return ret;
8813 }
8814
8815 static void
8816 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8817 {
8818         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8819         struct dentry *d_cpu;
8820         char cpu_dir[30]; /* 30 characters should be more than enough */
8821
8822         if (!d_percpu)
8823                 return;
8824
8825         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8826         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8827         if (!d_cpu) {
8828                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8829                 return;
8830         }
8831
8832         /* per cpu trace_pipe */
8833         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8834                                 tr, cpu, &tracing_pipe_fops);
8835
8836         /* per cpu trace */
8837         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8838                                 tr, cpu, &tracing_fops);
8839
8840         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8841                                 tr, cpu, &tracing_buffers_fops);
8842
8843         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8844                                 tr, cpu, &tracing_stats_fops);
8845
8846         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8847                                 tr, cpu, &tracing_entries_fops);
8848
8849 #ifdef CONFIG_TRACER_SNAPSHOT
8850         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8851                                 tr, cpu, &snapshot_fops);
8852
8853         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8854                                 tr, cpu, &snapshot_raw_fops);
8855 #endif
8856 }
8857
8858 #ifdef CONFIG_FTRACE_SELFTEST
8859 /* Let selftest have access to static functions in this file */
8860 #include "trace_selftest.c"
8861 #endif
8862
8863 static ssize_t
8864 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8865                         loff_t *ppos)
8866 {
8867         struct trace_option_dentry *topt = filp->private_data;
8868         char *buf;
8869
8870         if (topt->flags->val & topt->opt->bit)
8871                 buf = "1\n";
8872         else
8873                 buf = "0\n";
8874
8875         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8876 }
8877
8878 static ssize_t
8879 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8880                          loff_t *ppos)
8881 {
8882         struct trace_option_dentry *topt = filp->private_data;
8883         unsigned long val;
8884         int ret;
8885
8886         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8887         if (ret)
8888                 return ret;
8889
8890         if (val != 0 && val != 1)
8891                 return -EINVAL;
8892
8893         if (!!(topt->flags->val & topt->opt->bit) != val) {
8894                 mutex_lock(&trace_types_lock);
8895                 ret = __set_tracer_option(topt->tr, topt->flags,
8896                                           topt->opt, !val);
8897                 mutex_unlock(&trace_types_lock);
8898                 if (ret)
8899                         return ret;
8900         }
8901
8902         *ppos += cnt;
8903
8904         return cnt;
8905 }
8906
8907
8908 static const struct file_operations trace_options_fops = {
8909         .open = tracing_open_generic,
8910         .read = trace_options_read,
8911         .write = trace_options_write,
8912         .llseek = generic_file_llseek,
8913 };
8914
8915 /*
8916  * In order to pass in both the trace_array descriptor as well as the index
8917  * to the flag that the trace option file represents, the trace_array
8918  * has a character array of trace_flags_index[], which holds the index
8919  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8920  * The address of this character array is passed to the flag option file
8921  * read/write callbacks.
8922  *
8923  * In order to extract both the index and the trace_array descriptor,
8924  * get_tr_index() uses the following algorithm.
8925  *
8926  *   idx = *ptr;
8927  *
8928  * As the pointer itself contains the address of the index (remember
8929  * index[1] == 1).
8930  *
8931  * Then to get the trace_array descriptor, by subtracting that index
8932  * from the ptr, we get to the start of the index itself.
8933  *
8934  *   ptr - idx == &index[0]
8935  *
8936  * Then a simple container_of() from that pointer gets us to the
8937  * trace_array descriptor.
8938  */
8939 static void get_tr_index(void *data, struct trace_array **ptr,
8940                          unsigned int *pindex)
8941 {
8942         *pindex = *(unsigned char *)data;
8943
8944         *ptr = container_of(data - *pindex, struct trace_array,
8945                             trace_flags_index);
8946 }
8947
8948 static ssize_t
8949 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8950                         loff_t *ppos)
8951 {
8952         void *tr_index = filp->private_data;
8953         struct trace_array *tr;
8954         unsigned int index;
8955         char *buf;
8956
8957         get_tr_index(tr_index, &tr, &index);
8958
8959         if (tr->trace_flags & (1 << index))
8960                 buf = "1\n";
8961         else
8962                 buf = "0\n";
8963
8964         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8965 }
8966
8967 static ssize_t
8968 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8969                          loff_t *ppos)
8970 {
8971         void *tr_index = filp->private_data;
8972         struct trace_array *tr;
8973         unsigned int index;
8974         unsigned long val;
8975         int ret;
8976
8977         get_tr_index(tr_index, &tr, &index);
8978
8979         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8980         if (ret)
8981                 return ret;
8982
8983         if (val != 0 && val != 1)
8984                 return -EINVAL;
8985
8986         mutex_lock(&event_mutex);
8987         mutex_lock(&trace_types_lock);
8988         ret = set_tracer_flag(tr, 1 << index, val);
8989         mutex_unlock(&trace_types_lock);
8990         mutex_unlock(&event_mutex);
8991
8992         if (ret < 0)
8993                 return ret;
8994
8995         *ppos += cnt;
8996
8997         return cnt;
8998 }
8999
9000 static const struct file_operations trace_options_core_fops = {
9001         .open = tracing_open_generic,
9002         .read = trace_options_core_read,
9003         .write = trace_options_core_write,
9004         .llseek = generic_file_llseek,
9005 };
9006
9007 struct dentry *trace_create_file(const char *name,
9008                                  umode_t mode,
9009                                  struct dentry *parent,
9010                                  void *data,
9011                                  const struct file_operations *fops)
9012 {
9013         struct dentry *ret;
9014
9015         ret = tracefs_create_file(name, mode, parent, data, fops);
9016         if (!ret)
9017                 pr_warn("Could not create tracefs '%s' entry\n", name);
9018
9019         return ret;
9020 }
9021
9022
9023 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9024 {
9025         struct dentry *d_tracer;
9026
9027         if (tr->options)
9028                 return tr->options;
9029
9030         d_tracer = tracing_get_dentry(tr);
9031         if (IS_ERR(d_tracer))
9032                 return NULL;
9033
9034         tr->options = tracefs_create_dir("options", d_tracer);
9035         if (!tr->options) {
9036                 pr_warn("Could not create tracefs directory 'options'\n");
9037                 return NULL;
9038         }
9039
9040         return tr->options;
9041 }
9042
9043 static void
9044 create_trace_option_file(struct trace_array *tr,
9045                          struct trace_option_dentry *topt,
9046                          struct tracer_flags *flags,
9047                          struct tracer_opt *opt)
9048 {
9049         struct dentry *t_options;
9050
9051         t_options = trace_options_init_dentry(tr);
9052         if (!t_options)
9053                 return;
9054
9055         topt->flags = flags;
9056         topt->opt = opt;
9057         topt->tr = tr;
9058
9059         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9060                                         t_options, topt, &trace_options_fops);
9061
9062 }
9063
9064 static void
9065 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9066 {
9067         struct trace_option_dentry *topts;
9068         struct trace_options *tr_topts;
9069         struct tracer_flags *flags;
9070         struct tracer_opt *opts;
9071         int cnt;
9072         int i;
9073
9074         if (!tracer)
9075                 return;
9076
9077         flags = tracer->flags;
9078
9079         if (!flags || !flags->opts)
9080                 return;
9081
9082         /*
9083          * If this is an instance, only create flags for tracers
9084          * the instance may have.
9085          */
9086         if (!trace_ok_for_array(tracer, tr))
9087                 return;
9088
9089         for (i = 0; i < tr->nr_topts; i++) {
9090                 /* Make sure there's no duplicate flags. */
9091                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9092                         return;
9093         }
9094
9095         opts = flags->opts;
9096
9097         for (cnt = 0; opts[cnt].name; cnt++)
9098                 ;
9099
9100         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9101         if (!topts)
9102                 return;
9103
9104         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9105                             GFP_KERNEL);
9106         if (!tr_topts) {
9107                 kfree(topts);
9108                 return;
9109         }
9110
9111         tr->topts = tr_topts;
9112         tr->topts[tr->nr_topts].tracer = tracer;
9113         tr->topts[tr->nr_topts].topts = topts;
9114         tr->nr_topts++;
9115
9116         for (cnt = 0; opts[cnt].name; cnt++) {
9117                 create_trace_option_file(tr, &topts[cnt], flags,
9118                                          &opts[cnt]);
9119                 MEM_FAIL(topts[cnt].entry == NULL,
9120                           "Failed to create trace option: %s",
9121                           opts[cnt].name);
9122         }
9123 }
9124
9125 static struct dentry *
9126 create_trace_option_core_file(struct trace_array *tr,
9127                               const char *option, long index)
9128 {
9129         struct dentry *t_options;
9130
9131         t_options = trace_options_init_dentry(tr);
9132         if (!t_options)
9133                 return NULL;
9134
9135         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9136                                  (void *)&tr->trace_flags_index[index],
9137                                  &trace_options_core_fops);
9138 }
9139
9140 static void create_trace_options_dir(struct trace_array *tr)
9141 {
9142         struct dentry *t_options;
9143         bool top_level = tr == &global_trace;
9144         int i;
9145
9146         t_options = trace_options_init_dentry(tr);
9147         if (!t_options)
9148                 return;
9149
9150         for (i = 0; trace_options[i]; i++) {
9151                 if (top_level ||
9152                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9153                         create_trace_option_core_file(tr, trace_options[i], i);
9154         }
9155 }
9156
9157 static ssize_t
9158 rb_simple_read(struct file *filp, char __user *ubuf,
9159                size_t cnt, loff_t *ppos)
9160 {
9161         struct trace_array *tr = filp->private_data;
9162         char buf[64];
9163         int r;
9164
9165         r = tracer_tracing_is_on(tr);
9166         r = sprintf(buf, "%d\n", r);
9167
9168         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9169 }
9170
9171 static ssize_t
9172 rb_simple_write(struct file *filp, const char __user *ubuf,
9173                 size_t cnt, loff_t *ppos)
9174 {
9175         struct trace_array *tr = filp->private_data;
9176         struct trace_buffer *buffer = tr->array_buffer.buffer;
9177         unsigned long val;
9178         int ret;
9179
9180         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9181         if (ret)
9182                 return ret;
9183
9184         if (buffer) {
9185                 mutex_lock(&trace_types_lock);
9186                 if (!!val == tracer_tracing_is_on(tr)) {
9187                         val = 0; /* do nothing */
9188                 } else if (val) {
9189                         tracer_tracing_on(tr);
9190                         if (tr->current_trace->start)
9191                                 tr->current_trace->start(tr);
9192                 } else {
9193                         tracer_tracing_off(tr);
9194                         if (tr->current_trace->stop)
9195                                 tr->current_trace->stop(tr);
9196                         /* Wake up any waiters */
9197                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9198                 }
9199                 mutex_unlock(&trace_types_lock);
9200         }
9201
9202         (*ppos)++;
9203
9204         return cnt;
9205 }
9206
9207 static const struct file_operations rb_simple_fops = {
9208         .open           = tracing_open_generic_tr,
9209         .read           = rb_simple_read,
9210         .write          = rb_simple_write,
9211         .release        = tracing_release_generic_tr,
9212         .llseek         = default_llseek,
9213 };
9214
9215 static ssize_t
9216 buffer_percent_read(struct file *filp, char __user *ubuf,
9217                     size_t cnt, loff_t *ppos)
9218 {
9219         struct trace_array *tr = filp->private_data;
9220         char buf[64];
9221         int r;
9222
9223         r = tr->buffer_percent;
9224         r = sprintf(buf, "%d\n", r);
9225
9226         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9227 }
9228
9229 static ssize_t
9230 buffer_percent_write(struct file *filp, const char __user *ubuf,
9231                      size_t cnt, loff_t *ppos)
9232 {
9233         struct trace_array *tr = filp->private_data;
9234         unsigned long val;
9235         int ret;
9236
9237         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9238         if (ret)
9239                 return ret;
9240
9241         if (val > 100)
9242                 return -EINVAL;
9243
9244         tr->buffer_percent = val;
9245
9246         (*ppos)++;
9247
9248         return cnt;
9249 }
9250
9251 static const struct file_operations buffer_percent_fops = {
9252         .open           = tracing_open_generic_tr,
9253         .read           = buffer_percent_read,
9254         .write          = buffer_percent_write,
9255         .release        = tracing_release_generic_tr,
9256         .llseek         = default_llseek,
9257 };
9258
9259 static struct dentry *trace_instance_dir;
9260
9261 static void
9262 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9263
9264 static int
9265 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9266 {
9267         enum ring_buffer_flags rb_flags;
9268
9269         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9270
9271         buf->tr = tr;
9272
9273         buf->buffer = ring_buffer_alloc(size, rb_flags);
9274         if (!buf->buffer)
9275                 return -ENOMEM;
9276
9277         buf->data = alloc_percpu(struct trace_array_cpu);
9278         if (!buf->data) {
9279                 ring_buffer_free(buf->buffer);
9280                 buf->buffer = NULL;
9281                 return -ENOMEM;
9282         }
9283
9284         /* Allocate the first page for all buffers */
9285         set_buffer_entries(&tr->array_buffer,
9286                            ring_buffer_size(tr->array_buffer.buffer, 0));
9287
9288         return 0;
9289 }
9290
9291 static void free_trace_buffer(struct array_buffer *buf)
9292 {
9293         if (buf->buffer) {
9294                 ring_buffer_free(buf->buffer);
9295                 buf->buffer = NULL;
9296                 free_percpu(buf->data);
9297                 buf->data = NULL;
9298         }
9299 }
9300
9301 static int allocate_trace_buffers(struct trace_array *tr, int size)
9302 {
9303         int ret;
9304
9305         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9306         if (ret)
9307                 return ret;
9308
9309 #ifdef CONFIG_TRACER_MAX_TRACE
9310         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9311                                     allocate_snapshot ? size : 1);
9312         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9313                 free_trace_buffer(&tr->array_buffer);
9314                 return -ENOMEM;
9315         }
9316         tr->allocated_snapshot = allocate_snapshot;
9317
9318         /*
9319          * Only the top level trace array gets its snapshot allocated
9320          * from the kernel command line.
9321          */
9322         allocate_snapshot = false;
9323 #endif
9324
9325         return 0;
9326 }
9327
9328 static void free_trace_buffers(struct trace_array *tr)
9329 {
9330         if (!tr)
9331                 return;
9332
9333         free_trace_buffer(&tr->array_buffer);
9334
9335 #ifdef CONFIG_TRACER_MAX_TRACE
9336         free_trace_buffer(&tr->max_buffer);
9337 #endif
9338 }
9339
9340 static void init_trace_flags_index(struct trace_array *tr)
9341 {
9342         int i;
9343
9344         /* Used by the trace options files */
9345         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9346                 tr->trace_flags_index[i] = i;
9347 }
9348
9349 static void __update_tracer_options(struct trace_array *tr)
9350 {
9351         struct tracer *t;
9352
9353         for (t = trace_types; t; t = t->next)
9354                 add_tracer_options(tr, t);
9355 }
9356
9357 static void update_tracer_options(struct trace_array *tr)
9358 {
9359         mutex_lock(&trace_types_lock);
9360         tracer_options_updated = true;
9361         __update_tracer_options(tr);
9362         mutex_unlock(&trace_types_lock);
9363 }
9364
9365 /* Must have trace_types_lock held */
9366 struct trace_array *trace_array_find(const char *instance)
9367 {
9368         struct trace_array *tr, *found = NULL;
9369
9370         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9371                 if (tr->name && strcmp(tr->name, instance) == 0) {
9372                         found = tr;
9373                         break;
9374                 }
9375         }
9376
9377         return found;
9378 }
9379
9380 struct trace_array *trace_array_find_get(const char *instance)
9381 {
9382         struct trace_array *tr;
9383
9384         mutex_lock(&trace_types_lock);
9385         tr = trace_array_find(instance);
9386         if (tr)
9387                 tr->ref++;
9388         mutex_unlock(&trace_types_lock);
9389
9390         return tr;
9391 }
9392
9393 static int trace_array_create_dir(struct trace_array *tr)
9394 {
9395         int ret;
9396
9397         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9398         if (!tr->dir)
9399                 return -EINVAL;
9400
9401         ret = event_trace_add_tracer(tr->dir, tr);
9402         if (ret) {
9403                 tracefs_remove(tr->dir);
9404                 return ret;
9405         }
9406
9407         init_tracer_tracefs(tr, tr->dir);
9408         __update_tracer_options(tr);
9409
9410         return ret;
9411 }
9412
9413 static struct trace_array *trace_array_create(const char *name)
9414 {
9415         struct trace_array *tr;
9416         int ret;
9417
9418         ret = -ENOMEM;
9419         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9420         if (!tr)
9421                 return ERR_PTR(ret);
9422
9423         tr->name = kstrdup(name, GFP_KERNEL);
9424         if (!tr->name)
9425                 goto out_free_tr;
9426
9427         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9428                 goto out_free_tr;
9429
9430         if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9431                 goto out_free_tr;
9432
9433         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9434
9435         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9436
9437         raw_spin_lock_init(&tr->start_lock);
9438
9439         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9440
9441         tr->current_trace = &nop_trace;
9442
9443         INIT_LIST_HEAD(&tr->systems);
9444         INIT_LIST_HEAD(&tr->events);
9445         INIT_LIST_HEAD(&tr->hist_vars);
9446         INIT_LIST_HEAD(&tr->err_log);
9447
9448         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9449                 goto out_free_tr;
9450
9451         if (ftrace_allocate_ftrace_ops(tr) < 0)
9452                 goto out_free_tr;
9453
9454         ftrace_init_trace_array(tr);
9455
9456         init_trace_flags_index(tr);
9457
9458         if (trace_instance_dir) {
9459                 ret = trace_array_create_dir(tr);
9460                 if (ret)
9461                         goto out_free_tr;
9462         } else
9463                 __trace_early_add_events(tr);
9464
9465         list_add(&tr->list, &ftrace_trace_arrays);
9466
9467         tr->ref++;
9468
9469         return tr;
9470
9471  out_free_tr:
9472         ftrace_free_ftrace_ops(tr);
9473         free_trace_buffers(tr);
9474         free_cpumask_var(tr->pipe_cpumask);
9475         free_cpumask_var(tr->tracing_cpumask);
9476         kfree(tr->name);
9477         kfree(tr);
9478
9479         return ERR_PTR(ret);
9480 }
9481
9482 static int instance_mkdir(const char *name)
9483 {
9484         struct trace_array *tr;
9485         int ret;
9486
9487         mutex_lock(&event_mutex);
9488         mutex_lock(&trace_types_lock);
9489
9490         ret = -EEXIST;
9491         if (trace_array_find(name))
9492                 goto out_unlock;
9493
9494         tr = trace_array_create(name);
9495
9496         ret = PTR_ERR_OR_ZERO(tr);
9497
9498 out_unlock:
9499         mutex_unlock(&trace_types_lock);
9500         mutex_unlock(&event_mutex);
9501         return ret;
9502 }
9503
9504 /**
9505  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9506  * @name: The name of the trace array to be looked up/created.
9507  *
9508  * Returns pointer to trace array with given name.
9509  * NULL, if it cannot be created.
9510  *
9511  * NOTE: This function increments the reference counter associated with the
9512  * trace array returned. This makes sure it cannot be freed while in use.
9513  * Use trace_array_put() once the trace array is no longer needed.
9514  * If the trace_array is to be freed, trace_array_destroy() needs to
9515  * be called after the trace_array_put(), or simply let user space delete
9516  * it from the tracefs instances directory. But until the
9517  * trace_array_put() is called, user space can not delete it.
9518  *
9519  */
9520 struct trace_array *trace_array_get_by_name(const char *name)
9521 {
9522         struct trace_array *tr;
9523
9524         mutex_lock(&event_mutex);
9525         mutex_lock(&trace_types_lock);
9526
9527         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9528                 if (tr->name && strcmp(tr->name, name) == 0)
9529                         goto out_unlock;
9530         }
9531
9532         tr = trace_array_create(name);
9533
9534         if (IS_ERR(tr))
9535                 tr = NULL;
9536 out_unlock:
9537         if (tr)
9538                 tr->ref++;
9539
9540         mutex_unlock(&trace_types_lock);
9541         mutex_unlock(&event_mutex);
9542         return tr;
9543 }
9544 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9545
9546 static int __remove_instance(struct trace_array *tr)
9547 {
9548         int i;
9549
9550         /* Reference counter for a newly created trace array = 1. */
9551         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9552                 return -EBUSY;
9553
9554         list_del(&tr->list);
9555
9556         /* Disable all the flags that were enabled coming in */
9557         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9558                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9559                         set_tracer_flag(tr, 1 << i, 0);
9560         }
9561
9562         tracing_set_nop(tr);
9563         clear_ftrace_function_probes(tr);
9564         event_trace_del_tracer(tr);
9565         ftrace_clear_pids(tr);
9566         ftrace_destroy_function_files(tr);
9567         tracefs_remove(tr->dir);
9568         free_percpu(tr->last_func_repeats);
9569         free_trace_buffers(tr);
9570         clear_tracing_err_log(tr);
9571
9572         for (i = 0; i < tr->nr_topts; i++) {
9573                 kfree(tr->topts[i].topts);
9574         }
9575         kfree(tr->topts);
9576
9577         free_cpumask_var(tr->pipe_cpumask);
9578         free_cpumask_var(tr->tracing_cpumask);
9579         kfree(tr->name);
9580         kfree(tr);
9581
9582         return 0;
9583 }
9584
9585 int trace_array_destroy(struct trace_array *this_tr)
9586 {
9587         struct trace_array *tr;
9588         int ret;
9589
9590         if (!this_tr)
9591                 return -EINVAL;
9592
9593         mutex_lock(&event_mutex);
9594         mutex_lock(&trace_types_lock);
9595
9596         ret = -ENODEV;
9597
9598         /* Making sure trace array exists before destroying it. */
9599         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9600                 if (tr == this_tr) {
9601                         ret = __remove_instance(tr);
9602                         break;
9603                 }
9604         }
9605
9606         mutex_unlock(&trace_types_lock);
9607         mutex_unlock(&event_mutex);
9608
9609         return ret;
9610 }
9611 EXPORT_SYMBOL_GPL(trace_array_destroy);
9612
9613 static int instance_rmdir(const char *name)
9614 {
9615         struct trace_array *tr;
9616         int ret;
9617
9618         mutex_lock(&event_mutex);
9619         mutex_lock(&trace_types_lock);
9620
9621         ret = -ENODEV;
9622         tr = trace_array_find(name);
9623         if (tr)
9624                 ret = __remove_instance(tr);
9625
9626         mutex_unlock(&trace_types_lock);
9627         mutex_unlock(&event_mutex);
9628
9629         return ret;
9630 }
9631
9632 static __init void create_trace_instances(struct dentry *d_tracer)
9633 {
9634         struct trace_array *tr;
9635
9636         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9637                                                          instance_mkdir,
9638                                                          instance_rmdir);
9639         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9640                 return;
9641
9642         mutex_lock(&event_mutex);
9643         mutex_lock(&trace_types_lock);
9644
9645         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9646                 if (!tr->name)
9647                         continue;
9648                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9649                              "Failed to create instance directory\n"))
9650                         break;
9651         }
9652
9653         mutex_unlock(&trace_types_lock);
9654         mutex_unlock(&event_mutex);
9655 }
9656
9657 static void
9658 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9659 {
9660         struct trace_event_file *file;
9661         int cpu;
9662
9663         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9664                         tr, &show_traces_fops);
9665
9666         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9667                         tr, &set_tracer_fops);
9668
9669         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9670                           tr, &tracing_cpumask_fops);
9671
9672         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9673                           tr, &tracing_iter_fops);
9674
9675         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9676                           tr, &tracing_fops);
9677
9678         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9679                           tr, &tracing_pipe_fops);
9680
9681         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9682                           tr, &tracing_entries_fops);
9683
9684         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9685                           tr, &tracing_total_entries_fops);
9686
9687         trace_create_file("free_buffer", 0200, d_tracer,
9688                           tr, &tracing_free_buffer_fops);
9689
9690         trace_create_file("trace_marker", 0220, d_tracer,
9691                           tr, &tracing_mark_fops);
9692
9693         file = __find_event_file(tr, "ftrace", "print");
9694         if (file && file->dir)
9695                 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9696                                   file, &event_trigger_fops);
9697         tr->trace_marker_file = file;
9698
9699         trace_create_file("trace_marker_raw", 0220, d_tracer,
9700                           tr, &tracing_mark_raw_fops);
9701
9702         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9703                           &trace_clock_fops);
9704
9705         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9706                           tr, &rb_simple_fops);
9707
9708         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9709                           &trace_time_stamp_mode_fops);
9710
9711         tr->buffer_percent = 50;
9712
9713         trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9714                         tr, &buffer_percent_fops);
9715
9716         create_trace_options_dir(tr);
9717
9718 #ifdef CONFIG_TRACER_MAX_TRACE
9719         trace_create_maxlat_file(tr, d_tracer);
9720 #endif
9721
9722         if (ftrace_create_function_files(tr, d_tracer))
9723                 MEM_FAIL(1, "Could not allocate function filter files");
9724
9725 #ifdef CONFIG_TRACER_SNAPSHOT
9726         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9727                           tr, &snapshot_fops);
9728 #endif
9729
9730         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9731                           tr, &tracing_err_log_fops);
9732
9733         for_each_tracing_cpu(cpu)
9734                 tracing_init_tracefs_percpu(tr, cpu);
9735
9736         ftrace_init_tracefs(tr, d_tracer);
9737 }
9738
9739 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9740 {
9741         struct vfsmount *mnt;
9742         struct file_system_type *type;
9743
9744         /*
9745          * To maintain backward compatibility for tools that mount
9746          * debugfs to get to the tracing facility, tracefs is automatically
9747          * mounted to the debugfs/tracing directory.
9748          */
9749         type = get_fs_type("tracefs");
9750         if (!type)
9751                 return NULL;
9752         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9753         put_filesystem(type);
9754         if (IS_ERR(mnt))
9755                 return NULL;
9756         mntget(mnt);
9757
9758         return mnt;
9759 }
9760
9761 /**
9762  * tracing_init_dentry - initialize top level trace array
9763  *
9764  * This is called when creating files or directories in the tracing
9765  * directory. It is called via fs_initcall() by any of the boot up code
9766  * and expects to return the dentry of the top level tracing directory.
9767  */
9768 int tracing_init_dentry(void)
9769 {
9770         struct trace_array *tr = &global_trace;
9771
9772         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9773                 pr_warn("Tracing disabled due to lockdown\n");
9774                 return -EPERM;
9775         }
9776
9777         /* The top level trace array uses  NULL as parent */
9778         if (tr->dir)
9779                 return 0;
9780
9781         if (WARN_ON(!tracefs_initialized()))
9782                 return -ENODEV;
9783
9784         /*
9785          * As there may still be users that expect the tracing
9786          * files to exist in debugfs/tracing, we must automount
9787          * the tracefs file system there, so older tools still
9788          * work with the newer kernel.
9789          */
9790         tr->dir = debugfs_create_automount("tracing", NULL,
9791                                            trace_automount, NULL);
9792
9793         return 0;
9794 }
9795
9796 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9797 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9798
9799 static struct workqueue_struct *eval_map_wq __initdata;
9800 static struct work_struct eval_map_work __initdata;
9801 static struct work_struct tracerfs_init_work __initdata;
9802
9803 static void __init eval_map_work_func(struct work_struct *work)
9804 {
9805         int len;
9806
9807         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9808         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9809 }
9810
9811 static int __init trace_eval_init(void)
9812 {
9813         INIT_WORK(&eval_map_work, eval_map_work_func);
9814
9815         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9816         if (!eval_map_wq) {
9817                 pr_err("Unable to allocate eval_map_wq\n");
9818                 /* Do work here */
9819                 eval_map_work_func(&eval_map_work);
9820                 return -ENOMEM;
9821         }
9822
9823         queue_work(eval_map_wq, &eval_map_work);
9824         return 0;
9825 }
9826
9827 subsys_initcall(trace_eval_init);
9828
9829 static int __init trace_eval_sync(void)
9830 {
9831         /* Make sure the eval map updates are finished */
9832         if (eval_map_wq)
9833                 destroy_workqueue(eval_map_wq);
9834         return 0;
9835 }
9836
9837 late_initcall_sync(trace_eval_sync);
9838
9839
9840 #ifdef CONFIG_MODULES
9841 static void trace_module_add_evals(struct module *mod)
9842 {
9843         if (!mod->num_trace_evals)
9844                 return;
9845
9846         /*
9847          * Modules with bad taint do not have events created, do
9848          * not bother with enums either.
9849          */
9850         if (trace_module_has_bad_taint(mod))
9851                 return;
9852
9853         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9854 }
9855
9856 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9857 static void trace_module_remove_evals(struct module *mod)
9858 {
9859         union trace_eval_map_item *map;
9860         union trace_eval_map_item **last = &trace_eval_maps;
9861
9862         if (!mod->num_trace_evals)
9863                 return;
9864
9865         mutex_lock(&trace_eval_mutex);
9866
9867         map = trace_eval_maps;
9868
9869         while (map) {
9870                 if (map->head.mod == mod)
9871                         break;
9872                 map = trace_eval_jmp_to_tail(map);
9873                 last = &map->tail.next;
9874                 map = map->tail.next;
9875         }
9876         if (!map)
9877                 goto out;
9878
9879         *last = trace_eval_jmp_to_tail(map)->tail.next;
9880         kfree(map);
9881  out:
9882         mutex_unlock(&trace_eval_mutex);
9883 }
9884 #else
9885 static inline void trace_module_remove_evals(struct module *mod) { }
9886 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9887
9888 static int trace_module_notify(struct notifier_block *self,
9889                                unsigned long val, void *data)
9890 {
9891         struct module *mod = data;
9892
9893         switch (val) {
9894         case MODULE_STATE_COMING:
9895                 trace_module_add_evals(mod);
9896                 break;
9897         case MODULE_STATE_GOING:
9898                 trace_module_remove_evals(mod);
9899                 break;
9900         }
9901
9902         return NOTIFY_OK;
9903 }
9904
9905 static struct notifier_block trace_module_nb = {
9906         .notifier_call = trace_module_notify,
9907         .priority = 0,
9908 };
9909 #endif /* CONFIG_MODULES */
9910
9911 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9912 {
9913
9914         event_trace_init();
9915
9916         init_tracer_tracefs(&global_trace, NULL);
9917         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9918
9919         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9920                         &global_trace, &tracing_thresh_fops);
9921
9922         trace_create_file("README", TRACE_MODE_READ, NULL,
9923                         NULL, &tracing_readme_fops);
9924
9925         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9926                         NULL, &tracing_saved_cmdlines_fops);
9927
9928         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9929                           NULL, &tracing_saved_cmdlines_size_fops);
9930
9931         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9932                         NULL, &tracing_saved_tgids_fops);
9933
9934         trace_create_eval_file(NULL);
9935
9936 #ifdef CONFIG_MODULES
9937         register_module_notifier(&trace_module_nb);
9938 #endif
9939
9940 #ifdef CONFIG_DYNAMIC_FTRACE
9941         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9942                         NULL, &tracing_dyn_info_fops);
9943 #endif
9944
9945         create_trace_instances(NULL);
9946
9947         update_tracer_options(&global_trace);
9948 }
9949
9950 static __init int tracer_init_tracefs(void)
9951 {
9952         int ret;
9953
9954         trace_access_lock_init();
9955
9956         ret = tracing_init_dentry();
9957         if (ret)
9958                 return 0;
9959
9960         if (eval_map_wq) {
9961                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9962                 queue_work(eval_map_wq, &tracerfs_init_work);
9963         } else {
9964                 tracer_init_tracefs_work_func(NULL);
9965         }
9966
9967         rv_init_interface();
9968
9969         return 0;
9970 }
9971
9972 fs_initcall(tracer_init_tracefs);
9973
9974 static int trace_panic_handler(struct notifier_block *this,
9975                                unsigned long event, void *unused)
9976 {
9977         if (ftrace_dump_on_oops)
9978                 ftrace_dump(ftrace_dump_on_oops);
9979         return NOTIFY_OK;
9980 }
9981
9982 static struct notifier_block trace_panic_notifier = {
9983         .notifier_call  = trace_panic_handler,
9984         .next           = NULL,
9985         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9986 };
9987
9988 static int trace_die_handler(struct notifier_block *self,
9989                              unsigned long val,
9990                              void *data)
9991 {
9992         switch (val) {
9993         case DIE_OOPS:
9994                 if (ftrace_dump_on_oops)
9995                         ftrace_dump(ftrace_dump_on_oops);
9996                 break;
9997         default:
9998                 break;
9999         }
10000         return NOTIFY_OK;
10001 }
10002
10003 static struct notifier_block trace_die_notifier = {
10004         .notifier_call = trace_die_handler,
10005         .priority = 200
10006 };
10007
10008 /*
10009  * printk is set to max of 1024, we really don't need it that big.
10010  * Nothing should be printing 1000 characters anyway.
10011  */
10012 #define TRACE_MAX_PRINT         1000
10013
10014 /*
10015  * Define here KERN_TRACE so that we have one place to modify
10016  * it if we decide to change what log level the ftrace dump
10017  * should be at.
10018  */
10019 #define KERN_TRACE              KERN_EMERG
10020
10021 void
10022 trace_printk_seq(struct trace_seq *s)
10023 {
10024         /* Probably should print a warning here. */
10025         if (s->seq.len >= TRACE_MAX_PRINT)
10026                 s->seq.len = TRACE_MAX_PRINT;
10027
10028         /*
10029          * More paranoid code. Although the buffer size is set to
10030          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10031          * an extra layer of protection.
10032          */
10033         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10034                 s->seq.len = s->seq.size - 1;
10035
10036         /* should be zero ended, but we are paranoid. */
10037         s->buffer[s->seq.len] = 0;
10038
10039         printk(KERN_TRACE "%s", s->buffer);
10040
10041         trace_seq_init(s);
10042 }
10043
10044 void trace_init_global_iter(struct trace_iterator *iter)
10045 {
10046         iter->tr = &global_trace;
10047         iter->trace = iter->tr->current_trace;
10048         iter->cpu_file = RING_BUFFER_ALL_CPUS;
10049         iter->array_buffer = &global_trace.array_buffer;
10050
10051         if (iter->trace && iter->trace->open)
10052                 iter->trace->open(iter);
10053
10054         /* Annotate start of buffers if we had overruns */
10055         if (ring_buffer_overruns(iter->array_buffer->buffer))
10056                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10057
10058         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10059         if (trace_clocks[iter->tr->clock_id].in_ns)
10060                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10061
10062         /* Can not use kmalloc for iter.temp and iter.fmt */
10063         iter->temp = static_temp_buf;
10064         iter->temp_size = STATIC_TEMP_BUF_SIZE;
10065         iter->fmt = static_fmt_buf;
10066         iter->fmt_size = STATIC_FMT_BUF_SIZE;
10067 }
10068
10069 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10070 {
10071         /* use static because iter can be a bit big for the stack */
10072         static struct trace_iterator iter;
10073         static atomic_t dump_running;
10074         struct trace_array *tr = &global_trace;
10075         unsigned int old_userobj;
10076         unsigned long flags;
10077         int cnt = 0, cpu;
10078
10079         /* Only allow one dump user at a time. */
10080         if (atomic_inc_return(&dump_running) != 1) {
10081                 atomic_dec(&dump_running);
10082                 return;
10083         }
10084
10085         /*
10086          * Always turn off tracing when we dump.
10087          * We don't need to show trace output of what happens
10088          * between multiple crashes.
10089          *
10090          * If the user does a sysrq-z, then they can re-enable
10091          * tracing with echo 1 > tracing_on.
10092          */
10093         tracing_off();
10094
10095         local_irq_save(flags);
10096
10097         /* Simulate the iterator */
10098         trace_init_global_iter(&iter);
10099
10100         for_each_tracing_cpu(cpu) {
10101                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10102         }
10103
10104         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10105
10106         /* don't look at user memory in panic mode */
10107         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10108
10109         switch (oops_dump_mode) {
10110         case DUMP_ALL:
10111                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10112                 break;
10113         case DUMP_ORIG:
10114                 iter.cpu_file = raw_smp_processor_id();
10115                 break;
10116         case DUMP_NONE:
10117                 goto out_enable;
10118         default:
10119                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10120                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10121         }
10122
10123         printk(KERN_TRACE "Dumping ftrace buffer:\n");
10124
10125         /* Did function tracer already get disabled? */
10126         if (ftrace_is_dead()) {
10127                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10128                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10129         }
10130
10131         /*
10132          * We need to stop all tracing on all CPUS to read
10133          * the next buffer. This is a bit expensive, but is
10134          * not done often. We fill all what we can read,
10135          * and then release the locks again.
10136          */
10137
10138         while (!trace_empty(&iter)) {
10139
10140                 if (!cnt)
10141                         printk(KERN_TRACE "---------------------------------\n");
10142
10143                 cnt++;
10144
10145                 trace_iterator_reset(&iter);
10146                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10147
10148                 if (trace_find_next_entry_inc(&iter) != NULL) {
10149                         int ret;
10150
10151                         ret = print_trace_line(&iter);
10152                         if (ret != TRACE_TYPE_NO_CONSUME)
10153                                 trace_consume(&iter);
10154                 }
10155                 touch_nmi_watchdog();
10156
10157                 trace_printk_seq(&iter.seq);
10158         }
10159
10160         if (!cnt)
10161                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10162         else
10163                 printk(KERN_TRACE "---------------------------------\n");
10164
10165  out_enable:
10166         tr->trace_flags |= old_userobj;
10167
10168         for_each_tracing_cpu(cpu) {
10169                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10170         }
10171         atomic_dec(&dump_running);
10172         local_irq_restore(flags);
10173 }
10174 EXPORT_SYMBOL_GPL(ftrace_dump);
10175
10176 #define WRITE_BUFSIZE  4096
10177
10178 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10179                                 size_t count, loff_t *ppos,
10180                                 int (*createfn)(const char *))
10181 {
10182         char *kbuf, *buf, *tmp;
10183         int ret = 0;
10184         size_t done = 0;
10185         size_t size;
10186
10187         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10188         if (!kbuf)
10189                 return -ENOMEM;
10190
10191         while (done < count) {
10192                 size = count - done;
10193
10194                 if (size >= WRITE_BUFSIZE)
10195                         size = WRITE_BUFSIZE - 1;
10196
10197                 if (copy_from_user(kbuf, buffer + done, size)) {
10198                         ret = -EFAULT;
10199                         goto out;
10200                 }
10201                 kbuf[size] = '\0';
10202                 buf = kbuf;
10203                 do {
10204                         tmp = strchr(buf, '\n');
10205                         if (tmp) {
10206                                 *tmp = '\0';
10207                                 size = tmp - buf + 1;
10208                         } else {
10209                                 size = strlen(buf);
10210                                 if (done + size < count) {
10211                                         if (buf != kbuf)
10212                                                 break;
10213                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10214                                         pr_warn("Line length is too long: Should be less than %d\n",
10215                                                 WRITE_BUFSIZE - 2);
10216                                         ret = -EINVAL;
10217                                         goto out;
10218                                 }
10219                         }
10220                         done += size;
10221
10222                         /* Remove comments */
10223                         tmp = strchr(buf, '#');
10224
10225                         if (tmp)
10226                                 *tmp = '\0';
10227
10228                         ret = createfn(buf);
10229                         if (ret)
10230                                 goto out;
10231                         buf += size;
10232
10233                 } while (done < count);
10234         }
10235         ret = done;
10236
10237 out:
10238         kfree(kbuf);
10239
10240         return ret;
10241 }
10242
10243 __init static int tracer_alloc_buffers(void)
10244 {
10245         int ring_buf_size;
10246         int ret = -ENOMEM;
10247
10248
10249         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10250                 pr_warn("Tracing disabled due to lockdown\n");
10251                 return -EPERM;
10252         }
10253
10254         /*
10255          * Make sure we don't accidentally add more trace options
10256          * than we have bits for.
10257          */
10258         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10259
10260         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10261                 goto out;
10262
10263         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10264                 goto out_free_buffer_mask;
10265
10266         /* Only allocate trace_printk buffers if a trace_printk exists */
10267         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10268                 /* Must be called before global_trace.buffer is allocated */
10269                 trace_printk_init_buffers();
10270
10271         /* To save memory, keep the ring buffer size to its minimum */
10272         if (ring_buffer_expanded)
10273                 ring_buf_size = trace_buf_size;
10274         else
10275                 ring_buf_size = 1;
10276
10277         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10278         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10279
10280         raw_spin_lock_init(&global_trace.start_lock);
10281
10282         /*
10283          * The prepare callbacks allocates some memory for the ring buffer. We
10284          * don't free the buffer if the CPU goes down. If we were to free
10285          * the buffer, then the user would lose any trace that was in the
10286          * buffer. The memory will be removed once the "instance" is removed.
10287          */
10288         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10289                                       "trace/RB:prepare", trace_rb_cpu_prepare,
10290                                       NULL);
10291         if (ret < 0)
10292                 goto out_free_cpumask;
10293         /* Used for event triggers */
10294         ret = -ENOMEM;
10295         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10296         if (!temp_buffer)
10297                 goto out_rm_hp_state;
10298
10299         if (trace_create_savedcmd() < 0)
10300                 goto out_free_temp_buffer;
10301
10302         if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10303                 goto out_free_savedcmd;
10304
10305         /* TODO: make the number of buffers hot pluggable with CPUS */
10306         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10307                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10308                 goto out_free_pipe_cpumask;
10309         }
10310         if (global_trace.buffer_disabled)
10311                 tracing_off();
10312
10313         if (trace_boot_clock) {
10314                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10315                 if (ret < 0)
10316                         pr_warn("Trace clock %s not defined, going back to default\n",
10317                                 trace_boot_clock);
10318         }
10319
10320         /*
10321          * register_tracer() might reference current_trace, so it
10322          * needs to be set before we register anything. This is
10323          * just a bootstrap of current_trace anyway.
10324          */
10325         global_trace.current_trace = &nop_trace;
10326
10327         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10328
10329         ftrace_init_global_array_ops(&global_trace);
10330
10331         init_trace_flags_index(&global_trace);
10332
10333         register_tracer(&nop_trace);
10334
10335         /* Function tracing may start here (via kernel command line) */
10336         init_function_trace();
10337
10338         /* All seems OK, enable tracing */
10339         tracing_disabled = 0;
10340
10341         atomic_notifier_chain_register(&panic_notifier_list,
10342                                        &trace_panic_notifier);
10343
10344         register_die_notifier(&trace_die_notifier);
10345
10346         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10347
10348         INIT_LIST_HEAD(&global_trace.systems);
10349         INIT_LIST_HEAD(&global_trace.events);
10350         INIT_LIST_HEAD(&global_trace.hist_vars);
10351         INIT_LIST_HEAD(&global_trace.err_log);
10352         list_add(&global_trace.list, &ftrace_trace_arrays);
10353
10354         apply_trace_boot_options();
10355
10356         register_snapshot_cmd();
10357
10358         test_can_verify();
10359
10360         return 0;
10361
10362 out_free_pipe_cpumask:
10363         free_cpumask_var(global_trace.pipe_cpumask);
10364 out_free_savedcmd:
10365         free_saved_cmdlines_buffer(savedcmd);
10366 out_free_temp_buffer:
10367         ring_buffer_free(temp_buffer);
10368 out_rm_hp_state:
10369         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10370 out_free_cpumask:
10371         free_cpumask_var(global_trace.tracing_cpumask);
10372 out_free_buffer_mask:
10373         free_cpumask_var(tracing_buffer_mask);
10374 out:
10375         return ret;
10376 }
10377
10378 void __init ftrace_boot_snapshot(void)
10379 {
10380         if (snapshot_at_boot) {
10381                 tracing_snapshot();
10382                 internal_trace_puts("** Boot snapshot taken **\n");
10383         }
10384 }
10385
10386 void __init early_trace_init(void)
10387 {
10388         if (tracepoint_printk) {
10389                 tracepoint_print_iter =
10390                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10391                 if (MEM_FAIL(!tracepoint_print_iter,
10392                              "Failed to allocate trace iterator\n"))
10393                         tracepoint_printk = 0;
10394                 else
10395                         static_key_enable(&tracepoint_printk_key.key);
10396         }
10397         tracer_alloc_buffers();
10398
10399         init_events();
10400 }
10401
10402 void __init trace_init(void)
10403 {
10404         trace_event_init();
10405 }
10406
10407 __init static void clear_boot_tracer(void)
10408 {
10409         /*
10410          * The default tracer at boot buffer is an init section.
10411          * This function is called in lateinit. If we did not
10412          * find the boot tracer, then clear it out, to prevent
10413          * later registration from accessing the buffer that is
10414          * about to be freed.
10415          */
10416         if (!default_bootup_tracer)
10417                 return;
10418
10419         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10420                default_bootup_tracer);
10421         default_bootup_tracer = NULL;
10422 }
10423
10424 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10425 __init static void tracing_set_default_clock(void)
10426 {
10427         /* sched_clock_stable() is determined in late_initcall */
10428         if (!trace_boot_clock && !sched_clock_stable()) {
10429                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10430                         pr_warn("Can not set tracing clock due to lockdown\n");
10431                         return;
10432                 }
10433
10434                 printk(KERN_WARNING
10435                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10436                        "If you want to keep using the local clock, then add:\n"
10437                        "  \"trace_clock=local\"\n"
10438                        "on the kernel command line\n");
10439                 tracing_set_clock(&global_trace, "global");
10440         }
10441 }
10442 #else
10443 static inline void tracing_set_default_clock(void) { }
10444 #endif
10445
10446 __init static int late_trace_init(void)
10447 {
10448         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10449                 static_key_disable(&tracepoint_printk_key.key);
10450                 tracepoint_printk = 0;
10451         }
10452
10453         tracing_set_default_clock();
10454         clear_boot_tracer();
10455         return 0;
10456 }
10457
10458 late_initcall_sync(late_trace_init);