ring-buffer: Fix 32-bit rb_time_read() race with rb_time_cmpxchg()
[platform/kernel/linux-starfive.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
53
54 #include "trace.h"
55 #include "trace_output.h"
56
57 /*
58  * On boot up, the ring buffer is set to the minimum size, so that
59  * we do not waste memory on systems that are not using tracing.
60  */
61 bool ring_buffer_expanded;
62
63 #ifdef CONFIG_FTRACE_STARTUP_TEST
64 /*
65  * We need to change this state when a selftest is running.
66  * A selftest will lurk into the ring-buffer to count the
67  * entries inserted during the selftest although some concurrent
68  * insertions into the ring-buffer such as trace_printk could occurred
69  * at the same time, giving false positive or negative results.
70  */
71 static bool __read_mostly tracing_selftest_running;
72
73 /*
74  * If boot-time tracing including tracers/events via kernel cmdline
75  * is running, we do not want to run SELFTEST.
76  */
77 bool __read_mostly tracing_selftest_disabled;
78
79 void __init disable_tracing_selftest(const char *reason)
80 {
81         if (!tracing_selftest_disabled) {
82                 tracing_selftest_disabled = true;
83                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
84         }
85 }
86 #else
87 #define tracing_selftest_running        0
88 #define tracing_selftest_disabled       0
89 #endif
90
91 /* Pipe tracepoints to printk */
92 static struct trace_iterator *tracepoint_print_iter;
93 int tracepoint_printk;
94 static bool tracepoint_printk_stop_on_boot __initdata;
95 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
96
97 /* For tracers that don't implement custom flags */
98 static struct tracer_opt dummy_tracer_opt[] = {
99         { }
100 };
101
102 static int
103 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
104 {
105         return 0;
106 }
107
108 /*
109  * To prevent the comm cache from being overwritten when no
110  * tracing is active, only save the comm when a trace event
111  * occurred.
112  */
113 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
114
115 /*
116  * Kill all tracing for good (never come back).
117  * It is initialized to 1 but will turn to zero if the initialization
118  * of the tracer is successful. But that is the only place that sets
119  * this back to zero.
120  */
121 static int tracing_disabled = 1;
122
123 cpumask_var_t __read_mostly     tracing_buffer_mask;
124
125 /*
126  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
127  *
128  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
129  * is set, then ftrace_dump is called. This will output the contents
130  * of the ftrace buffers to the console.  This is very useful for
131  * capturing traces that lead to crashes and outputing it to a
132  * serial console.
133  *
134  * It is default off, but you can enable it with either specifying
135  * "ftrace_dump_on_oops" in the kernel command line, or setting
136  * /proc/sys/kernel/ftrace_dump_on_oops
137  * Set 1 if you want to dump buffers of all CPUs
138  * Set 2 if you want to dump the buffer of the CPU that triggered oops
139  */
140
141 enum ftrace_dump_mode ftrace_dump_on_oops;
142
143 /* When set, tracing will stop when a WARN*() is hit */
144 int __disable_trace_on_warning;
145
146 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
147 /* Map of enums to their values, for "eval_map" file */
148 struct trace_eval_map_head {
149         struct module                   *mod;
150         unsigned long                   length;
151 };
152
153 union trace_eval_map_item;
154
155 struct trace_eval_map_tail {
156         /*
157          * "end" is first and points to NULL as it must be different
158          * than "mod" or "eval_string"
159          */
160         union trace_eval_map_item       *next;
161         const char                      *end;   /* points to NULL */
162 };
163
164 static DEFINE_MUTEX(trace_eval_mutex);
165
166 /*
167  * The trace_eval_maps are saved in an array with two extra elements,
168  * one at the beginning, and one at the end. The beginning item contains
169  * the count of the saved maps (head.length), and the module they
170  * belong to if not built in (head.mod). The ending item contains a
171  * pointer to the next array of saved eval_map items.
172  */
173 union trace_eval_map_item {
174         struct trace_eval_map           map;
175         struct trace_eval_map_head      head;
176         struct trace_eval_map_tail      tail;
177 };
178
179 static union trace_eval_map_item *trace_eval_maps;
180 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
181
182 int tracing_set_tracer(struct trace_array *tr, const char *buf);
183 static void ftrace_trace_userstack(struct trace_array *tr,
184                                    struct trace_buffer *buffer,
185                                    unsigned int trace_ctx);
186
187 #define MAX_TRACER_SIZE         100
188 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
189 static char *default_bootup_tracer;
190
191 static bool allocate_snapshot;
192 static bool snapshot_at_boot;
193
194 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
195 static int boot_instance_index;
196
197 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
198 static int boot_snapshot_index;
199
200 static int __init set_cmdline_ftrace(char *str)
201 {
202         strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
203         default_bootup_tracer = bootup_tracer_buf;
204         /* We are using ftrace early, expand it */
205         ring_buffer_expanded = true;
206         return 1;
207 }
208 __setup("ftrace=", set_cmdline_ftrace);
209
210 static int __init set_ftrace_dump_on_oops(char *str)
211 {
212         if (*str++ != '=' || !*str || !strcmp("1", str)) {
213                 ftrace_dump_on_oops = DUMP_ALL;
214                 return 1;
215         }
216
217         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
218                 ftrace_dump_on_oops = DUMP_ORIG;
219                 return 1;
220         }
221
222         return 0;
223 }
224 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
225
226 static int __init stop_trace_on_warning(char *str)
227 {
228         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
229                 __disable_trace_on_warning = 1;
230         return 1;
231 }
232 __setup("traceoff_on_warning", stop_trace_on_warning);
233
234 static int __init boot_alloc_snapshot(char *str)
235 {
236         char *slot = boot_snapshot_info + boot_snapshot_index;
237         int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
238         int ret;
239
240         if (str[0] == '=') {
241                 str++;
242                 if (strlen(str) >= left)
243                         return -1;
244
245                 ret = snprintf(slot, left, "%s\t", str);
246                 boot_snapshot_index += ret;
247         } else {
248                 allocate_snapshot = true;
249                 /* We also need the main ring buffer expanded */
250                 ring_buffer_expanded = true;
251         }
252         return 1;
253 }
254 __setup("alloc_snapshot", boot_alloc_snapshot);
255
256
257 static int __init boot_snapshot(char *str)
258 {
259         snapshot_at_boot = true;
260         boot_alloc_snapshot(str);
261         return 1;
262 }
263 __setup("ftrace_boot_snapshot", boot_snapshot);
264
265
266 static int __init boot_instance(char *str)
267 {
268         char *slot = boot_instance_info + boot_instance_index;
269         int left = sizeof(boot_instance_info) - boot_instance_index;
270         int ret;
271
272         if (strlen(str) >= left)
273                 return -1;
274
275         ret = snprintf(slot, left, "%s\t", str);
276         boot_instance_index += ret;
277
278         return 1;
279 }
280 __setup("trace_instance=", boot_instance);
281
282
283 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
284
285 static int __init set_trace_boot_options(char *str)
286 {
287         strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
288         return 1;
289 }
290 __setup("trace_options=", set_trace_boot_options);
291
292 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
293 static char *trace_boot_clock __initdata;
294
295 static int __init set_trace_boot_clock(char *str)
296 {
297         strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
298         trace_boot_clock = trace_boot_clock_buf;
299         return 1;
300 }
301 __setup("trace_clock=", set_trace_boot_clock);
302
303 static int __init set_tracepoint_printk(char *str)
304 {
305         /* Ignore the "tp_printk_stop_on_boot" param */
306         if (*str == '_')
307                 return 0;
308
309         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
310                 tracepoint_printk = 1;
311         return 1;
312 }
313 __setup("tp_printk", set_tracepoint_printk);
314
315 static int __init set_tracepoint_printk_stop(char *str)
316 {
317         tracepoint_printk_stop_on_boot = true;
318         return 1;
319 }
320 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
321
322 unsigned long long ns2usecs(u64 nsec)
323 {
324         nsec += 500;
325         do_div(nsec, 1000);
326         return nsec;
327 }
328
329 static void
330 trace_process_export(struct trace_export *export,
331                struct ring_buffer_event *event, int flag)
332 {
333         struct trace_entry *entry;
334         unsigned int size = 0;
335
336         if (export->flags & flag) {
337                 entry = ring_buffer_event_data(event);
338                 size = ring_buffer_event_length(event);
339                 export->write(export, entry, size);
340         }
341 }
342
343 static DEFINE_MUTEX(ftrace_export_lock);
344
345 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
346
347 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
348 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
349 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
350
351 static inline void ftrace_exports_enable(struct trace_export *export)
352 {
353         if (export->flags & TRACE_EXPORT_FUNCTION)
354                 static_branch_inc(&trace_function_exports_enabled);
355
356         if (export->flags & TRACE_EXPORT_EVENT)
357                 static_branch_inc(&trace_event_exports_enabled);
358
359         if (export->flags & TRACE_EXPORT_MARKER)
360                 static_branch_inc(&trace_marker_exports_enabled);
361 }
362
363 static inline void ftrace_exports_disable(struct trace_export *export)
364 {
365         if (export->flags & TRACE_EXPORT_FUNCTION)
366                 static_branch_dec(&trace_function_exports_enabled);
367
368         if (export->flags & TRACE_EXPORT_EVENT)
369                 static_branch_dec(&trace_event_exports_enabled);
370
371         if (export->flags & TRACE_EXPORT_MARKER)
372                 static_branch_dec(&trace_marker_exports_enabled);
373 }
374
375 static void ftrace_exports(struct ring_buffer_event *event, int flag)
376 {
377         struct trace_export *export;
378
379         preempt_disable_notrace();
380
381         export = rcu_dereference_raw_check(ftrace_exports_list);
382         while (export) {
383                 trace_process_export(export, event, flag);
384                 export = rcu_dereference_raw_check(export->next);
385         }
386
387         preempt_enable_notrace();
388 }
389
390 static inline void
391 add_trace_export(struct trace_export **list, struct trace_export *export)
392 {
393         rcu_assign_pointer(export->next, *list);
394         /*
395          * We are entering export into the list but another
396          * CPU might be walking that list. We need to make sure
397          * the export->next pointer is valid before another CPU sees
398          * the export pointer included into the list.
399          */
400         rcu_assign_pointer(*list, export);
401 }
402
403 static inline int
404 rm_trace_export(struct trace_export **list, struct trace_export *export)
405 {
406         struct trace_export **p;
407
408         for (p = list; *p != NULL; p = &(*p)->next)
409                 if (*p == export)
410                         break;
411
412         if (*p != export)
413                 return -1;
414
415         rcu_assign_pointer(*p, (*p)->next);
416
417         return 0;
418 }
419
420 static inline void
421 add_ftrace_export(struct trace_export **list, struct trace_export *export)
422 {
423         ftrace_exports_enable(export);
424
425         add_trace_export(list, export);
426 }
427
428 static inline int
429 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
430 {
431         int ret;
432
433         ret = rm_trace_export(list, export);
434         ftrace_exports_disable(export);
435
436         return ret;
437 }
438
439 int register_ftrace_export(struct trace_export *export)
440 {
441         if (WARN_ON_ONCE(!export->write))
442                 return -1;
443
444         mutex_lock(&ftrace_export_lock);
445
446         add_ftrace_export(&ftrace_exports_list, export);
447
448         mutex_unlock(&ftrace_export_lock);
449
450         return 0;
451 }
452 EXPORT_SYMBOL_GPL(register_ftrace_export);
453
454 int unregister_ftrace_export(struct trace_export *export)
455 {
456         int ret;
457
458         mutex_lock(&ftrace_export_lock);
459
460         ret = rm_ftrace_export(&ftrace_exports_list, export);
461
462         mutex_unlock(&ftrace_export_lock);
463
464         return ret;
465 }
466 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
467
468 /* trace_flags holds trace_options default values */
469 #define TRACE_DEFAULT_FLAGS                                             \
470         (FUNCTION_DEFAULT_FLAGS |                                       \
471          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
472          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
473          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
474          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
475          TRACE_ITER_HASH_PTR)
476
477 /* trace_options that are only supported by global_trace */
478 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
479                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
480
481 /* trace_flags that are default zero for instances */
482 #define ZEROED_TRACE_FLAGS \
483         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
484
485 /*
486  * The global_trace is the descriptor that holds the top-level tracing
487  * buffers for the live tracing.
488  */
489 static struct trace_array global_trace = {
490         .trace_flags = TRACE_DEFAULT_FLAGS,
491 };
492
493 LIST_HEAD(ftrace_trace_arrays);
494
495 int trace_array_get(struct trace_array *this_tr)
496 {
497         struct trace_array *tr;
498         int ret = -ENODEV;
499
500         mutex_lock(&trace_types_lock);
501         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
502                 if (tr == this_tr) {
503                         tr->ref++;
504                         ret = 0;
505                         break;
506                 }
507         }
508         mutex_unlock(&trace_types_lock);
509
510         return ret;
511 }
512
513 static void __trace_array_put(struct trace_array *this_tr)
514 {
515         WARN_ON(!this_tr->ref);
516         this_tr->ref--;
517 }
518
519 /**
520  * trace_array_put - Decrement the reference counter for this trace array.
521  * @this_tr : pointer to the trace array
522  *
523  * NOTE: Use this when we no longer need the trace array returned by
524  * trace_array_get_by_name(). This ensures the trace array can be later
525  * destroyed.
526  *
527  */
528 void trace_array_put(struct trace_array *this_tr)
529 {
530         if (!this_tr)
531                 return;
532
533         mutex_lock(&trace_types_lock);
534         __trace_array_put(this_tr);
535         mutex_unlock(&trace_types_lock);
536 }
537 EXPORT_SYMBOL_GPL(trace_array_put);
538
539 int tracing_check_open_get_tr(struct trace_array *tr)
540 {
541         int ret;
542
543         ret = security_locked_down(LOCKDOWN_TRACEFS);
544         if (ret)
545                 return ret;
546
547         if (tracing_disabled)
548                 return -ENODEV;
549
550         if (tr && trace_array_get(tr) < 0)
551                 return -ENODEV;
552
553         return 0;
554 }
555
556 int call_filter_check_discard(struct trace_event_call *call, void *rec,
557                               struct trace_buffer *buffer,
558                               struct ring_buffer_event *event)
559 {
560         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
561             !filter_match_preds(call->filter, rec)) {
562                 __trace_event_discard_commit(buffer, event);
563                 return 1;
564         }
565
566         return 0;
567 }
568
569 /**
570  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
571  * @filtered_pids: The list of pids to check
572  * @search_pid: The PID to find in @filtered_pids
573  *
574  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
575  */
576 bool
577 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
578 {
579         return trace_pid_list_is_set(filtered_pids, search_pid);
580 }
581
582 /**
583  * trace_ignore_this_task - should a task be ignored for tracing
584  * @filtered_pids: The list of pids to check
585  * @filtered_no_pids: The list of pids not to be traced
586  * @task: The task that should be ignored if not filtered
587  *
588  * Checks if @task should be traced or not from @filtered_pids.
589  * Returns true if @task should *NOT* be traced.
590  * Returns false if @task should be traced.
591  */
592 bool
593 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
594                        struct trace_pid_list *filtered_no_pids,
595                        struct task_struct *task)
596 {
597         /*
598          * If filtered_no_pids is not empty, and the task's pid is listed
599          * in filtered_no_pids, then return true.
600          * Otherwise, if filtered_pids is empty, that means we can
601          * trace all tasks. If it has content, then only trace pids
602          * within filtered_pids.
603          */
604
605         return (filtered_pids &&
606                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
607                 (filtered_no_pids &&
608                  trace_find_filtered_pid(filtered_no_pids, task->pid));
609 }
610
611 /**
612  * trace_filter_add_remove_task - Add or remove a task from a pid_list
613  * @pid_list: The list to modify
614  * @self: The current task for fork or NULL for exit
615  * @task: The task to add or remove
616  *
617  * If adding a task, if @self is defined, the task is only added if @self
618  * is also included in @pid_list. This happens on fork and tasks should
619  * only be added when the parent is listed. If @self is NULL, then the
620  * @task pid will be removed from the list, which would happen on exit
621  * of a task.
622  */
623 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
624                                   struct task_struct *self,
625                                   struct task_struct *task)
626 {
627         if (!pid_list)
628                 return;
629
630         /* For forks, we only add if the forking task is listed */
631         if (self) {
632                 if (!trace_find_filtered_pid(pid_list, self->pid))
633                         return;
634         }
635
636         /* "self" is set for forks, and NULL for exits */
637         if (self)
638                 trace_pid_list_set(pid_list, task->pid);
639         else
640                 trace_pid_list_clear(pid_list, task->pid);
641 }
642
643 /**
644  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
645  * @pid_list: The pid list to show
646  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
647  * @pos: The position of the file
648  *
649  * This is used by the seq_file "next" operation to iterate the pids
650  * listed in a trace_pid_list structure.
651  *
652  * Returns the pid+1 as we want to display pid of zero, but NULL would
653  * stop the iteration.
654  */
655 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
656 {
657         long pid = (unsigned long)v;
658         unsigned int next;
659
660         (*pos)++;
661
662         /* pid already is +1 of the actual previous bit */
663         if (trace_pid_list_next(pid_list, pid, &next) < 0)
664                 return NULL;
665
666         pid = next;
667
668         /* Return pid + 1 to allow zero to be represented */
669         return (void *)(pid + 1);
670 }
671
672 /**
673  * trace_pid_start - Used for seq_file to start reading pid lists
674  * @pid_list: The pid list to show
675  * @pos: The position of the file
676  *
677  * This is used by seq_file "start" operation to start the iteration
678  * of listing pids.
679  *
680  * Returns the pid+1 as we want to display pid of zero, but NULL would
681  * stop the iteration.
682  */
683 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
684 {
685         unsigned long pid;
686         unsigned int first;
687         loff_t l = 0;
688
689         if (trace_pid_list_first(pid_list, &first) < 0)
690                 return NULL;
691
692         pid = first;
693
694         /* Return pid + 1 so that zero can be the exit value */
695         for (pid++; pid && l < *pos;
696              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
697                 ;
698         return (void *)pid;
699 }
700
701 /**
702  * trace_pid_show - show the current pid in seq_file processing
703  * @m: The seq_file structure to write into
704  * @v: A void pointer of the pid (+1) value to display
705  *
706  * Can be directly used by seq_file operations to display the current
707  * pid value.
708  */
709 int trace_pid_show(struct seq_file *m, void *v)
710 {
711         unsigned long pid = (unsigned long)v - 1;
712
713         seq_printf(m, "%lu\n", pid);
714         return 0;
715 }
716
717 /* 128 should be much more than enough */
718 #define PID_BUF_SIZE            127
719
720 int trace_pid_write(struct trace_pid_list *filtered_pids,
721                     struct trace_pid_list **new_pid_list,
722                     const char __user *ubuf, size_t cnt)
723 {
724         struct trace_pid_list *pid_list;
725         struct trace_parser parser;
726         unsigned long val;
727         int nr_pids = 0;
728         ssize_t read = 0;
729         ssize_t ret;
730         loff_t pos;
731         pid_t pid;
732
733         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
734                 return -ENOMEM;
735
736         /*
737          * Always recreate a new array. The write is an all or nothing
738          * operation. Always create a new array when adding new pids by
739          * the user. If the operation fails, then the current list is
740          * not modified.
741          */
742         pid_list = trace_pid_list_alloc();
743         if (!pid_list) {
744                 trace_parser_put(&parser);
745                 return -ENOMEM;
746         }
747
748         if (filtered_pids) {
749                 /* copy the current bits to the new max */
750                 ret = trace_pid_list_first(filtered_pids, &pid);
751                 while (!ret) {
752                         trace_pid_list_set(pid_list, pid);
753                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
754                         nr_pids++;
755                 }
756         }
757
758         ret = 0;
759         while (cnt > 0) {
760
761                 pos = 0;
762
763                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
764                 if (ret < 0)
765                         break;
766
767                 read += ret;
768                 ubuf += ret;
769                 cnt -= ret;
770
771                 if (!trace_parser_loaded(&parser))
772                         break;
773
774                 ret = -EINVAL;
775                 if (kstrtoul(parser.buffer, 0, &val))
776                         break;
777
778                 pid = (pid_t)val;
779
780                 if (trace_pid_list_set(pid_list, pid) < 0) {
781                         ret = -1;
782                         break;
783                 }
784                 nr_pids++;
785
786                 trace_parser_clear(&parser);
787                 ret = 0;
788         }
789         trace_parser_put(&parser);
790
791         if (ret < 0) {
792                 trace_pid_list_free(pid_list);
793                 return ret;
794         }
795
796         if (!nr_pids) {
797                 /* Cleared the list of pids */
798                 trace_pid_list_free(pid_list);
799                 pid_list = NULL;
800         }
801
802         *new_pid_list = pid_list;
803
804         return read;
805 }
806
807 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
808 {
809         u64 ts;
810
811         /* Early boot up does not have a buffer yet */
812         if (!buf->buffer)
813                 return trace_clock_local();
814
815         ts = ring_buffer_time_stamp(buf->buffer);
816         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
817
818         return ts;
819 }
820
821 u64 ftrace_now(int cpu)
822 {
823         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
824 }
825
826 /**
827  * tracing_is_enabled - Show if global_trace has been enabled
828  *
829  * Shows if the global trace has been enabled or not. It uses the
830  * mirror flag "buffer_disabled" to be used in fast paths such as for
831  * the irqsoff tracer. But it may be inaccurate due to races. If you
832  * need to know the accurate state, use tracing_is_on() which is a little
833  * slower, but accurate.
834  */
835 int tracing_is_enabled(void)
836 {
837         /*
838          * For quick access (irqsoff uses this in fast path), just
839          * return the mirror variable of the state of the ring buffer.
840          * It's a little racy, but we don't really care.
841          */
842         smp_rmb();
843         return !global_trace.buffer_disabled;
844 }
845
846 /*
847  * trace_buf_size is the size in bytes that is allocated
848  * for a buffer. Note, the number of bytes is always rounded
849  * to page size.
850  *
851  * This number is purposely set to a low number of 16384.
852  * If the dump on oops happens, it will be much appreciated
853  * to not have to wait for all that output. Anyway this can be
854  * boot time and run time configurable.
855  */
856 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
857
858 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
859
860 /* trace_types holds a link list of available tracers. */
861 static struct tracer            *trace_types __read_mostly;
862
863 /*
864  * trace_types_lock is used to protect the trace_types list.
865  */
866 DEFINE_MUTEX(trace_types_lock);
867
868 /*
869  * serialize the access of the ring buffer
870  *
871  * ring buffer serializes readers, but it is low level protection.
872  * The validity of the events (which returns by ring_buffer_peek() ..etc)
873  * are not protected by ring buffer.
874  *
875  * The content of events may become garbage if we allow other process consumes
876  * these events concurrently:
877  *   A) the page of the consumed events may become a normal page
878  *      (not reader page) in ring buffer, and this page will be rewritten
879  *      by events producer.
880  *   B) The page of the consumed events may become a page for splice_read,
881  *      and this page will be returned to system.
882  *
883  * These primitives allow multi process access to different cpu ring buffer
884  * concurrently.
885  *
886  * These primitives don't distinguish read-only and read-consume access.
887  * Multi read-only access are also serialized.
888  */
889
890 #ifdef CONFIG_SMP
891 static DECLARE_RWSEM(all_cpu_access_lock);
892 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
893
894 static inline void trace_access_lock(int cpu)
895 {
896         if (cpu == RING_BUFFER_ALL_CPUS) {
897                 /* gain it for accessing the whole ring buffer. */
898                 down_write(&all_cpu_access_lock);
899         } else {
900                 /* gain it for accessing a cpu ring buffer. */
901
902                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
903                 down_read(&all_cpu_access_lock);
904
905                 /* Secondly block other access to this @cpu ring buffer. */
906                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
907         }
908 }
909
910 static inline void trace_access_unlock(int cpu)
911 {
912         if (cpu == RING_BUFFER_ALL_CPUS) {
913                 up_write(&all_cpu_access_lock);
914         } else {
915                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
916                 up_read(&all_cpu_access_lock);
917         }
918 }
919
920 static inline void trace_access_lock_init(void)
921 {
922         int cpu;
923
924         for_each_possible_cpu(cpu)
925                 mutex_init(&per_cpu(cpu_access_lock, cpu));
926 }
927
928 #else
929
930 static DEFINE_MUTEX(access_lock);
931
932 static inline void trace_access_lock(int cpu)
933 {
934         (void)cpu;
935         mutex_lock(&access_lock);
936 }
937
938 static inline void trace_access_unlock(int cpu)
939 {
940         (void)cpu;
941         mutex_unlock(&access_lock);
942 }
943
944 static inline void trace_access_lock_init(void)
945 {
946 }
947
948 #endif
949
950 #ifdef CONFIG_STACKTRACE
951 static void __ftrace_trace_stack(struct trace_buffer *buffer,
952                                  unsigned int trace_ctx,
953                                  int skip, struct pt_regs *regs);
954 static inline void ftrace_trace_stack(struct trace_array *tr,
955                                       struct trace_buffer *buffer,
956                                       unsigned int trace_ctx,
957                                       int skip, struct pt_regs *regs);
958
959 #else
960 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
961                                         unsigned int trace_ctx,
962                                         int skip, struct pt_regs *regs)
963 {
964 }
965 static inline void ftrace_trace_stack(struct trace_array *tr,
966                                       struct trace_buffer *buffer,
967                                       unsigned long trace_ctx,
968                                       int skip, struct pt_regs *regs)
969 {
970 }
971
972 #endif
973
974 static __always_inline void
975 trace_event_setup(struct ring_buffer_event *event,
976                   int type, unsigned int trace_ctx)
977 {
978         struct trace_entry *ent = ring_buffer_event_data(event);
979
980         tracing_generic_entry_update(ent, type, trace_ctx);
981 }
982
983 static __always_inline struct ring_buffer_event *
984 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
985                           int type,
986                           unsigned long len,
987                           unsigned int trace_ctx)
988 {
989         struct ring_buffer_event *event;
990
991         event = ring_buffer_lock_reserve(buffer, len);
992         if (event != NULL)
993                 trace_event_setup(event, type, trace_ctx);
994
995         return event;
996 }
997
998 void tracer_tracing_on(struct trace_array *tr)
999 {
1000         if (tr->array_buffer.buffer)
1001                 ring_buffer_record_on(tr->array_buffer.buffer);
1002         /*
1003          * This flag is looked at when buffers haven't been allocated
1004          * yet, or by some tracers (like irqsoff), that just want to
1005          * know if the ring buffer has been disabled, but it can handle
1006          * races of where it gets disabled but we still do a record.
1007          * As the check is in the fast path of the tracers, it is more
1008          * important to be fast than accurate.
1009          */
1010         tr->buffer_disabled = 0;
1011         /* Make the flag seen by readers */
1012         smp_wmb();
1013 }
1014
1015 /**
1016  * tracing_on - enable tracing buffers
1017  *
1018  * This function enables tracing buffers that may have been
1019  * disabled with tracing_off.
1020  */
1021 void tracing_on(void)
1022 {
1023         tracer_tracing_on(&global_trace);
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_on);
1026
1027
1028 static __always_inline void
1029 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1030 {
1031         __this_cpu_write(trace_taskinfo_save, true);
1032
1033         /* If this is the temp buffer, we need to commit fully */
1034         if (this_cpu_read(trace_buffered_event) == event) {
1035                 /* Length is in event->array[0] */
1036                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1037                 /* Release the temp buffer */
1038                 this_cpu_dec(trace_buffered_event_cnt);
1039                 /* ring_buffer_unlock_commit() enables preemption */
1040                 preempt_enable_notrace();
1041         } else
1042                 ring_buffer_unlock_commit(buffer);
1043 }
1044
1045 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1046                        const char *str, int size)
1047 {
1048         struct ring_buffer_event *event;
1049         struct trace_buffer *buffer;
1050         struct print_entry *entry;
1051         unsigned int trace_ctx;
1052         int alloc;
1053
1054         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1055                 return 0;
1056
1057         if (unlikely(tracing_selftest_running && tr == &global_trace))
1058                 return 0;
1059
1060         if (unlikely(tracing_disabled))
1061                 return 0;
1062
1063         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1064
1065         trace_ctx = tracing_gen_ctx();
1066         buffer = tr->array_buffer.buffer;
1067         ring_buffer_nest_start(buffer);
1068         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1069                                             trace_ctx);
1070         if (!event) {
1071                 size = 0;
1072                 goto out;
1073         }
1074
1075         entry = ring_buffer_event_data(event);
1076         entry->ip = ip;
1077
1078         memcpy(&entry->buf, str, size);
1079
1080         /* Add a newline if necessary */
1081         if (entry->buf[size - 1] != '\n') {
1082                 entry->buf[size] = '\n';
1083                 entry->buf[size + 1] = '\0';
1084         } else
1085                 entry->buf[size] = '\0';
1086
1087         __buffer_unlock_commit(buffer, event);
1088         ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1089  out:
1090         ring_buffer_nest_end(buffer);
1091         return size;
1092 }
1093 EXPORT_SYMBOL_GPL(__trace_array_puts);
1094
1095 /**
1096  * __trace_puts - write a constant string into the trace buffer.
1097  * @ip:    The address of the caller
1098  * @str:   The constant string to write
1099  * @size:  The size of the string.
1100  */
1101 int __trace_puts(unsigned long ip, const char *str, int size)
1102 {
1103         return __trace_array_puts(&global_trace, ip, str, size);
1104 }
1105 EXPORT_SYMBOL_GPL(__trace_puts);
1106
1107 /**
1108  * __trace_bputs - write the pointer to a constant string into trace buffer
1109  * @ip:    The address of the caller
1110  * @str:   The constant string to write to the buffer to
1111  */
1112 int __trace_bputs(unsigned long ip, const char *str)
1113 {
1114         struct ring_buffer_event *event;
1115         struct trace_buffer *buffer;
1116         struct bputs_entry *entry;
1117         unsigned int trace_ctx;
1118         int size = sizeof(struct bputs_entry);
1119         int ret = 0;
1120
1121         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1122                 return 0;
1123
1124         if (unlikely(tracing_selftest_running || tracing_disabled))
1125                 return 0;
1126
1127         trace_ctx = tracing_gen_ctx();
1128         buffer = global_trace.array_buffer.buffer;
1129
1130         ring_buffer_nest_start(buffer);
1131         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1132                                             trace_ctx);
1133         if (!event)
1134                 goto out;
1135
1136         entry = ring_buffer_event_data(event);
1137         entry->ip                       = ip;
1138         entry->str                      = str;
1139
1140         __buffer_unlock_commit(buffer, event);
1141         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1142
1143         ret = 1;
1144  out:
1145         ring_buffer_nest_end(buffer);
1146         return ret;
1147 }
1148 EXPORT_SYMBOL_GPL(__trace_bputs);
1149
1150 #ifdef CONFIG_TRACER_SNAPSHOT
1151 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1152                                            void *cond_data)
1153 {
1154         struct tracer *tracer = tr->current_trace;
1155         unsigned long flags;
1156
1157         if (in_nmi()) {
1158                 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1159                 trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1160                 return;
1161         }
1162
1163         if (!tr->allocated_snapshot) {
1164                 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1165                 trace_array_puts(tr, "*** stopping trace here!   ***\n");
1166                 tracer_tracing_off(tr);
1167                 return;
1168         }
1169
1170         /* Note, snapshot can not be used when the tracer uses it */
1171         if (tracer->use_max_tr) {
1172                 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1173                 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1174                 return;
1175         }
1176
1177         local_irq_save(flags);
1178         update_max_tr(tr, current, smp_processor_id(), cond_data);
1179         local_irq_restore(flags);
1180 }
1181
1182 void tracing_snapshot_instance(struct trace_array *tr)
1183 {
1184         tracing_snapshot_instance_cond(tr, NULL);
1185 }
1186
1187 /**
1188  * tracing_snapshot - take a snapshot of the current buffer.
1189  *
1190  * This causes a swap between the snapshot buffer and the current live
1191  * tracing buffer. You can use this to take snapshots of the live
1192  * trace when some condition is triggered, but continue to trace.
1193  *
1194  * Note, make sure to allocate the snapshot with either
1195  * a tracing_snapshot_alloc(), or by doing it manually
1196  * with: echo 1 > /sys/kernel/tracing/snapshot
1197  *
1198  * If the snapshot buffer is not allocated, it will stop tracing.
1199  * Basically making a permanent snapshot.
1200  */
1201 void tracing_snapshot(void)
1202 {
1203         struct trace_array *tr = &global_trace;
1204
1205         tracing_snapshot_instance(tr);
1206 }
1207 EXPORT_SYMBOL_GPL(tracing_snapshot);
1208
1209 /**
1210  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1211  * @tr:         The tracing instance to snapshot
1212  * @cond_data:  The data to be tested conditionally, and possibly saved
1213  *
1214  * This is the same as tracing_snapshot() except that the snapshot is
1215  * conditional - the snapshot will only happen if the
1216  * cond_snapshot.update() implementation receiving the cond_data
1217  * returns true, which means that the trace array's cond_snapshot
1218  * update() operation used the cond_data to determine whether the
1219  * snapshot should be taken, and if it was, presumably saved it along
1220  * with the snapshot.
1221  */
1222 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1223 {
1224         tracing_snapshot_instance_cond(tr, cond_data);
1225 }
1226 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1227
1228 /**
1229  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1230  * @tr:         The tracing instance
1231  *
1232  * When the user enables a conditional snapshot using
1233  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1234  * with the snapshot.  This accessor is used to retrieve it.
1235  *
1236  * Should not be called from cond_snapshot.update(), since it takes
1237  * the tr->max_lock lock, which the code calling
1238  * cond_snapshot.update() has already done.
1239  *
1240  * Returns the cond_data associated with the trace array's snapshot.
1241  */
1242 void *tracing_cond_snapshot_data(struct trace_array *tr)
1243 {
1244         void *cond_data = NULL;
1245
1246         local_irq_disable();
1247         arch_spin_lock(&tr->max_lock);
1248
1249         if (tr->cond_snapshot)
1250                 cond_data = tr->cond_snapshot->cond_data;
1251
1252         arch_spin_unlock(&tr->max_lock);
1253         local_irq_enable();
1254
1255         return cond_data;
1256 }
1257 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1258
1259 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1260                                         struct array_buffer *size_buf, int cpu_id);
1261 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1262
1263 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1264 {
1265         int ret;
1266
1267         if (!tr->allocated_snapshot) {
1268
1269                 /* allocate spare buffer */
1270                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1271                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1272                 if (ret < 0)
1273                         return ret;
1274
1275                 tr->allocated_snapshot = true;
1276         }
1277
1278         return 0;
1279 }
1280
1281 static void free_snapshot(struct trace_array *tr)
1282 {
1283         /*
1284          * We don't free the ring buffer. instead, resize it because
1285          * The max_tr ring buffer has some state (e.g. ring->clock) and
1286          * we want preserve it.
1287          */
1288         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1289         set_buffer_entries(&tr->max_buffer, 1);
1290         tracing_reset_online_cpus(&tr->max_buffer);
1291         tr->allocated_snapshot = false;
1292 }
1293
1294 /**
1295  * tracing_alloc_snapshot - allocate snapshot buffer.
1296  *
1297  * This only allocates the snapshot buffer if it isn't already
1298  * allocated - it doesn't also take a snapshot.
1299  *
1300  * This is meant to be used in cases where the snapshot buffer needs
1301  * to be set up for events that can't sleep but need to be able to
1302  * trigger a snapshot.
1303  */
1304 int tracing_alloc_snapshot(void)
1305 {
1306         struct trace_array *tr = &global_trace;
1307         int ret;
1308
1309         ret = tracing_alloc_snapshot_instance(tr);
1310         WARN_ON(ret < 0);
1311
1312         return ret;
1313 }
1314 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1315
1316 /**
1317  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1318  *
1319  * This is similar to tracing_snapshot(), but it will allocate the
1320  * snapshot buffer if it isn't already allocated. Use this only
1321  * where it is safe to sleep, as the allocation may sleep.
1322  *
1323  * This causes a swap between the snapshot buffer and the current live
1324  * tracing buffer. You can use this to take snapshots of the live
1325  * trace when some condition is triggered, but continue to trace.
1326  */
1327 void tracing_snapshot_alloc(void)
1328 {
1329         int ret;
1330
1331         ret = tracing_alloc_snapshot();
1332         if (ret < 0)
1333                 return;
1334
1335         tracing_snapshot();
1336 }
1337 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1338
1339 /**
1340  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1341  * @tr:         The tracing instance
1342  * @cond_data:  User data to associate with the snapshot
1343  * @update:     Implementation of the cond_snapshot update function
1344  *
1345  * Check whether the conditional snapshot for the given instance has
1346  * already been enabled, or if the current tracer is already using a
1347  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1348  * save the cond_data and update function inside.
1349  *
1350  * Returns 0 if successful, error otherwise.
1351  */
1352 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1353                                  cond_update_fn_t update)
1354 {
1355         struct cond_snapshot *cond_snapshot;
1356         int ret = 0;
1357
1358         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1359         if (!cond_snapshot)
1360                 return -ENOMEM;
1361
1362         cond_snapshot->cond_data = cond_data;
1363         cond_snapshot->update = update;
1364
1365         mutex_lock(&trace_types_lock);
1366
1367         ret = tracing_alloc_snapshot_instance(tr);
1368         if (ret)
1369                 goto fail_unlock;
1370
1371         if (tr->current_trace->use_max_tr) {
1372                 ret = -EBUSY;
1373                 goto fail_unlock;
1374         }
1375
1376         /*
1377          * The cond_snapshot can only change to NULL without the
1378          * trace_types_lock. We don't care if we race with it going
1379          * to NULL, but we want to make sure that it's not set to
1380          * something other than NULL when we get here, which we can
1381          * do safely with only holding the trace_types_lock and not
1382          * having to take the max_lock.
1383          */
1384         if (tr->cond_snapshot) {
1385                 ret = -EBUSY;
1386                 goto fail_unlock;
1387         }
1388
1389         local_irq_disable();
1390         arch_spin_lock(&tr->max_lock);
1391         tr->cond_snapshot = cond_snapshot;
1392         arch_spin_unlock(&tr->max_lock);
1393         local_irq_enable();
1394
1395         mutex_unlock(&trace_types_lock);
1396
1397         return ret;
1398
1399  fail_unlock:
1400         mutex_unlock(&trace_types_lock);
1401         kfree(cond_snapshot);
1402         return ret;
1403 }
1404 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1405
1406 /**
1407  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1408  * @tr:         The tracing instance
1409  *
1410  * Check whether the conditional snapshot for the given instance is
1411  * enabled; if so, free the cond_snapshot associated with it,
1412  * otherwise return -EINVAL.
1413  *
1414  * Returns 0 if successful, error otherwise.
1415  */
1416 int tracing_snapshot_cond_disable(struct trace_array *tr)
1417 {
1418         int ret = 0;
1419
1420         local_irq_disable();
1421         arch_spin_lock(&tr->max_lock);
1422
1423         if (!tr->cond_snapshot)
1424                 ret = -EINVAL;
1425         else {
1426                 kfree(tr->cond_snapshot);
1427                 tr->cond_snapshot = NULL;
1428         }
1429
1430         arch_spin_unlock(&tr->max_lock);
1431         local_irq_enable();
1432
1433         return ret;
1434 }
1435 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1436 #else
1437 void tracing_snapshot(void)
1438 {
1439         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1440 }
1441 EXPORT_SYMBOL_GPL(tracing_snapshot);
1442 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1443 {
1444         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1445 }
1446 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1447 int tracing_alloc_snapshot(void)
1448 {
1449         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1450         return -ENODEV;
1451 }
1452 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1453 void tracing_snapshot_alloc(void)
1454 {
1455         /* Give warning */
1456         tracing_snapshot();
1457 }
1458 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1459 void *tracing_cond_snapshot_data(struct trace_array *tr)
1460 {
1461         return NULL;
1462 }
1463 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1464 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1465 {
1466         return -ENODEV;
1467 }
1468 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1469 int tracing_snapshot_cond_disable(struct trace_array *tr)
1470 {
1471         return false;
1472 }
1473 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1474 #define free_snapshot(tr)       do { } while (0)
1475 #endif /* CONFIG_TRACER_SNAPSHOT */
1476
1477 void tracer_tracing_off(struct trace_array *tr)
1478 {
1479         if (tr->array_buffer.buffer)
1480                 ring_buffer_record_off(tr->array_buffer.buffer);
1481         /*
1482          * This flag is looked at when buffers haven't been allocated
1483          * yet, or by some tracers (like irqsoff), that just want to
1484          * know if the ring buffer has been disabled, but it can handle
1485          * races of where it gets disabled but we still do a record.
1486          * As the check is in the fast path of the tracers, it is more
1487          * important to be fast than accurate.
1488          */
1489         tr->buffer_disabled = 1;
1490         /* Make the flag seen by readers */
1491         smp_wmb();
1492 }
1493
1494 /**
1495  * tracing_off - turn off tracing buffers
1496  *
1497  * This function stops the tracing buffers from recording data.
1498  * It does not disable any overhead the tracers themselves may
1499  * be causing. This function simply causes all recording to
1500  * the ring buffers to fail.
1501  */
1502 void tracing_off(void)
1503 {
1504         tracer_tracing_off(&global_trace);
1505 }
1506 EXPORT_SYMBOL_GPL(tracing_off);
1507
1508 void disable_trace_on_warning(void)
1509 {
1510         if (__disable_trace_on_warning) {
1511                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1512                         "Disabling tracing due to warning\n");
1513                 tracing_off();
1514         }
1515 }
1516
1517 /**
1518  * tracer_tracing_is_on - show real state of ring buffer enabled
1519  * @tr : the trace array to know if ring buffer is enabled
1520  *
1521  * Shows real state of the ring buffer if it is enabled or not.
1522  */
1523 bool tracer_tracing_is_on(struct trace_array *tr)
1524 {
1525         if (tr->array_buffer.buffer)
1526                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1527         return !tr->buffer_disabled;
1528 }
1529
1530 /**
1531  * tracing_is_on - show state of ring buffers enabled
1532  */
1533 int tracing_is_on(void)
1534 {
1535         return tracer_tracing_is_on(&global_trace);
1536 }
1537 EXPORT_SYMBOL_GPL(tracing_is_on);
1538
1539 static int __init set_buf_size(char *str)
1540 {
1541         unsigned long buf_size;
1542
1543         if (!str)
1544                 return 0;
1545         buf_size = memparse(str, &str);
1546         /*
1547          * nr_entries can not be zero and the startup
1548          * tests require some buffer space. Therefore
1549          * ensure we have at least 4096 bytes of buffer.
1550          */
1551         trace_buf_size = max(4096UL, buf_size);
1552         return 1;
1553 }
1554 __setup("trace_buf_size=", set_buf_size);
1555
1556 static int __init set_tracing_thresh(char *str)
1557 {
1558         unsigned long threshold;
1559         int ret;
1560
1561         if (!str)
1562                 return 0;
1563         ret = kstrtoul(str, 0, &threshold);
1564         if (ret < 0)
1565                 return 0;
1566         tracing_thresh = threshold * 1000;
1567         return 1;
1568 }
1569 __setup("tracing_thresh=", set_tracing_thresh);
1570
1571 unsigned long nsecs_to_usecs(unsigned long nsecs)
1572 {
1573         return nsecs / 1000;
1574 }
1575
1576 /*
1577  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1578  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1579  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1580  * of strings in the order that the evals (enum) were defined.
1581  */
1582 #undef C
1583 #define C(a, b) b
1584
1585 /* These must match the bit positions in trace_iterator_flags */
1586 static const char *trace_options[] = {
1587         TRACE_FLAGS
1588         NULL
1589 };
1590
1591 static struct {
1592         u64 (*func)(void);
1593         const char *name;
1594         int in_ns;              /* is this clock in nanoseconds? */
1595 } trace_clocks[] = {
1596         { trace_clock_local,            "local",        1 },
1597         { trace_clock_global,           "global",       1 },
1598         { trace_clock_counter,          "counter",      0 },
1599         { trace_clock_jiffies,          "uptime",       0 },
1600         { trace_clock,                  "perf",         1 },
1601         { ktime_get_mono_fast_ns,       "mono",         1 },
1602         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1603         { ktime_get_boot_fast_ns,       "boot",         1 },
1604         { ktime_get_tai_fast_ns,        "tai",          1 },
1605         ARCH_TRACE_CLOCKS
1606 };
1607
1608 bool trace_clock_in_ns(struct trace_array *tr)
1609 {
1610         if (trace_clocks[tr->clock_id].in_ns)
1611                 return true;
1612
1613         return false;
1614 }
1615
1616 /*
1617  * trace_parser_get_init - gets the buffer for trace parser
1618  */
1619 int trace_parser_get_init(struct trace_parser *parser, int size)
1620 {
1621         memset(parser, 0, sizeof(*parser));
1622
1623         parser->buffer = kmalloc(size, GFP_KERNEL);
1624         if (!parser->buffer)
1625                 return 1;
1626
1627         parser->size = size;
1628         return 0;
1629 }
1630
1631 /*
1632  * trace_parser_put - frees the buffer for trace parser
1633  */
1634 void trace_parser_put(struct trace_parser *parser)
1635 {
1636         kfree(parser->buffer);
1637         parser->buffer = NULL;
1638 }
1639
1640 /*
1641  * trace_get_user - reads the user input string separated by  space
1642  * (matched by isspace(ch))
1643  *
1644  * For each string found the 'struct trace_parser' is updated,
1645  * and the function returns.
1646  *
1647  * Returns number of bytes read.
1648  *
1649  * See kernel/trace/trace.h for 'struct trace_parser' details.
1650  */
1651 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1652         size_t cnt, loff_t *ppos)
1653 {
1654         char ch;
1655         size_t read = 0;
1656         ssize_t ret;
1657
1658         if (!*ppos)
1659                 trace_parser_clear(parser);
1660
1661         ret = get_user(ch, ubuf++);
1662         if (ret)
1663                 goto out;
1664
1665         read++;
1666         cnt--;
1667
1668         /*
1669          * The parser is not finished with the last write,
1670          * continue reading the user input without skipping spaces.
1671          */
1672         if (!parser->cont) {
1673                 /* skip white space */
1674                 while (cnt && isspace(ch)) {
1675                         ret = get_user(ch, ubuf++);
1676                         if (ret)
1677                                 goto out;
1678                         read++;
1679                         cnt--;
1680                 }
1681
1682                 parser->idx = 0;
1683
1684                 /* only spaces were written */
1685                 if (isspace(ch) || !ch) {
1686                         *ppos += read;
1687                         ret = read;
1688                         goto out;
1689                 }
1690         }
1691
1692         /* read the non-space input */
1693         while (cnt && !isspace(ch) && ch) {
1694                 if (parser->idx < parser->size - 1)
1695                         parser->buffer[parser->idx++] = ch;
1696                 else {
1697                         ret = -EINVAL;
1698                         goto out;
1699                 }
1700                 ret = get_user(ch, ubuf++);
1701                 if (ret)
1702                         goto out;
1703                 read++;
1704                 cnt--;
1705         }
1706
1707         /* We either got finished input or we have to wait for another call. */
1708         if (isspace(ch) || !ch) {
1709                 parser->buffer[parser->idx] = 0;
1710                 parser->cont = false;
1711         } else if (parser->idx < parser->size - 1) {
1712                 parser->cont = true;
1713                 parser->buffer[parser->idx++] = ch;
1714                 /* Make sure the parsed string always terminates with '\0'. */
1715                 parser->buffer[parser->idx] = 0;
1716         } else {
1717                 ret = -EINVAL;
1718                 goto out;
1719         }
1720
1721         *ppos += read;
1722         ret = read;
1723
1724 out:
1725         return ret;
1726 }
1727
1728 /* TODO add a seq_buf_to_buffer() */
1729 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1730 {
1731         int len;
1732
1733         if (trace_seq_used(s) <= s->seq.readpos)
1734                 return -EBUSY;
1735
1736         len = trace_seq_used(s) - s->seq.readpos;
1737         if (cnt > len)
1738                 cnt = len;
1739         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1740
1741         s->seq.readpos += cnt;
1742         return cnt;
1743 }
1744
1745 unsigned long __read_mostly     tracing_thresh;
1746
1747 #ifdef CONFIG_TRACER_MAX_TRACE
1748 static const struct file_operations tracing_max_lat_fops;
1749
1750 #ifdef LATENCY_FS_NOTIFY
1751
1752 static struct workqueue_struct *fsnotify_wq;
1753
1754 static void latency_fsnotify_workfn(struct work_struct *work)
1755 {
1756         struct trace_array *tr = container_of(work, struct trace_array,
1757                                               fsnotify_work);
1758         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1759 }
1760
1761 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1762 {
1763         struct trace_array *tr = container_of(iwork, struct trace_array,
1764                                               fsnotify_irqwork);
1765         queue_work(fsnotify_wq, &tr->fsnotify_work);
1766 }
1767
1768 static void trace_create_maxlat_file(struct trace_array *tr,
1769                                      struct dentry *d_tracer)
1770 {
1771         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1772         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1773         tr->d_max_latency = trace_create_file("tracing_max_latency",
1774                                               TRACE_MODE_WRITE,
1775                                               d_tracer, tr,
1776                                               &tracing_max_lat_fops);
1777 }
1778
1779 __init static int latency_fsnotify_init(void)
1780 {
1781         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1782                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1783         if (!fsnotify_wq) {
1784                 pr_err("Unable to allocate tr_max_lat_wq\n");
1785                 return -ENOMEM;
1786         }
1787         return 0;
1788 }
1789
1790 late_initcall_sync(latency_fsnotify_init);
1791
1792 void latency_fsnotify(struct trace_array *tr)
1793 {
1794         if (!fsnotify_wq)
1795                 return;
1796         /*
1797          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1798          * possible that we are called from __schedule() or do_idle(), which
1799          * could cause a deadlock.
1800          */
1801         irq_work_queue(&tr->fsnotify_irqwork);
1802 }
1803
1804 #else /* !LATENCY_FS_NOTIFY */
1805
1806 #define trace_create_maxlat_file(tr, d_tracer)                          \
1807         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1808                           d_tracer, tr, &tracing_max_lat_fops)
1809
1810 #endif
1811
1812 /*
1813  * Copy the new maximum trace into the separate maximum-trace
1814  * structure. (this way the maximum trace is permanently saved,
1815  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1816  */
1817 static void
1818 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1819 {
1820         struct array_buffer *trace_buf = &tr->array_buffer;
1821         struct array_buffer *max_buf = &tr->max_buffer;
1822         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1823         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1824
1825         max_buf->cpu = cpu;
1826         max_buf->time_start = data->preempt_timestamp;
1827
1828         max_data->saved_latency = tr->max_latency;
1829         max_data->critical_start = data->critical_start;
1830         max_data->critical_end = data->critical_end;
1831
1832         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1833         max_data->pid = tsk->pid;
1834         /*
1835          * If tsk == current, then use current_uid(), as that does not use
1836          * RCU. The irq tracer can be called out of RCU scope.
1837          */
1838         if (tsk == current)
1839                 max_data->uid = current_uid();
1840         else
1841                 max_data->uid = task_uid(tsk);
1842
1843         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1844         max_data->policy = tsk->policy;
1845         max_data->rt_priority = tsk->rt_priority;
1846
1847         /* record this tasks comm */
1848         tracing_record_cmdline(tsk);
1849         latency_fsnotify(tr);
1850 }
1851
1852 /**
1853  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1854  * @tr: tracer
1855  * @tsk: the task with the latency
1856  * @cpu: The cpu that initiated the trace.
1857  * @cond_data: User data associated with a conditional snapshot
1858  *
1859  * Flip the buffers between the @tr and the max_tr and record information
1860  * about which task was the cause of this latency.
1861  */
1862 void
1863 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1864               void *cond_data)
1865 {
1866         if (tr->stop_count)
1867                 return;
1868
1869         WARN_ON_ONCE(!irqs_disabled());
1870
1871         if (!tr->allocated_snapshot) {
1872                 /* Only the nop tracer should hit this when disabling */
1873                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1874                 return;
1875         }
1876
1877         arch_spin_lock(&tr->max_lock);
1878
1879         /* Inherit the recordable setting from array_buffer */
1880         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1881                 ring_buffer_record_on(tr->max_buffer.buffer);
1882         else
1883                 ring_buffer_record_off(tr->max_buffer.buffer);
1884
1885 #ifdef CONFIG_TRACER_SNAPSHOT
1886         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1887                 arch_spin_unlock(&tr->max_lock);
1888                 return;
1889         }
1890 #endif
1891         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1892
1893         __update_max_tr(tr, tsk, cpu);
1894
1895         arch_spin_unlock(&tr->max_lock);
1896 }
1897
1898 /**
1899  * update_max_tr_single - only copy one trace over, and reset the rest
1900  * @tr: tracer
1901  * @tsk: task with the latency
1902  * @cpu: the cpu of the buffer to copy.
1903  *
1904  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1905  */
1906 void
1907 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1908 {
1909         int ret;
1910
1911         if (tr->stop_count)
1912                 return;
1913
1914         WARN_ON_ONCE(!irqs_disabled());
1915         if (!tr->allocated_snapshot) {
1916                 /* Only the nop tracer should hit this when disabling */
1917                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1918                 return;
1919         }
1920
1921         arch_spin_lock(&tr->max_lock);
1922
1923         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1924
1925         if (ret == -EBUSY) {
1926                 /*
1927                  * We failed to swap the buffer due to a commit taking
1928                  * place on this CPU. We fail to record, but we reset
1929                  * the max trace buffer (no one writes directly to it)
1930                  * and flag that it failed.
1931                  * Another reason is resize is in progress.
1932                  */
1933                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1934                         "Failed to swap buffers due to commit or resize in progress\n");
1935         }
1936
1937         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1938
1939         __update_max_tr(tr, tsk, cpu);
1940         arch_spin_unlock(&tr->max_lock);
1941 }
1942
1943 #endif /* CONFIG_TRACER_MAX_TRACE */
1944
1945 static int wait_on_pipe(struct trace_iterator *iter, int full)
1946 {
1947         /* Iterators are static, they should be filled or empty */
1948         if (trace_buffer_iter(iter, iter->cpu_file))
1949                 return 0;
1950
1951         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1952                                 full);
1953 }
1954
1955 #ifdef CONFIG_FTRACE_STARTUP_TEST
1956 static bool selftests_can_run;
1957
1958 struct trace_selftests {
1959         struct list_head                list;
1960         struct tracer                   *type;
1961 };
1962
1963 static LIST_HEAD(postponed_selftests);
1964
1965 static int save_selftest(struct tracer *type)
1966 {
1967         struct trace_selftests *selftest;
1968
1969         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1970         if (!selftest)
1971                 return -ENOMEM;
1972
1973         selftest->type = type;
1974         list_add(&selftest->list, &postponed_selftests);
1975         return 0;
1976 }
1977
1978 static int run_tracer_selftest(struct tracer *type)
1979 {
1980         struct trace_array *tr = &global_trace;
1981         struct tracer *saved_tracer = tr->current_trace;
1982         int ret;
1983
1984         if (!type->selftest || tracing_selftest_disabled)
1985                 return 0;
1986
1987         /*
1988          * If a tracer registers early in boot up (before scheduling is
1989          * initialized and such), then do not run its selftests yet.
1990          * Instead, run it a little later in the boot process.
1991          */
1992         if (!selftests_can_run)
1993                 return save_selftest(type);
1994
1995         if (!tracing_is_on()) {
1996                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1997                         type->name);
1998                 return 0;
1999         }
2000
2001         /*
2002          * Run a selftest on this tracer.
2003          * Here we reset the trace buffer, and set the current
2004          * tracer to be this tracer. The tracer can then run some
2005          * internal tracing to verify that everything is in order.
2006          * If we fail, we do not register this tracer.
2007          */
2008         tracing_reset_online_cpus(&tr->array_buffer);
2009
2010         tr->current_trace = type;
2011
2012 #ifdef CONFIG_TRACER_MAX_TRACE
2013         if (type->use_max_tr) {
2014                 /* If we expanded the buffers, make sure the max is expanded too */
2015                 if (ring_buffer_expanded)
2016                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2017                                            RING_BUFFER_ALL_CPUS);
2018                 tr->allocated_snapshot = true;
2019         }
2020 #endif
2021
2022         /* the test is responsible for initializing and enabling */
2023         pr_info("Testing tracer %s: ", type->name);
2024         ret = type->selftest(type, tr);
2025         /* the test is responsible for resetting too */
2026         tr->current_trace = saved_tracer;
2027         if (ret) {
2028                 printk(KERN_CONT "FAILED!\n");
2029                 /* Add the warning after printing 'FAILED' */
2030                 WARN_ON(1);
2031                 return -1;
2032         }
2033         /* Only reset on passing, to avoid touching corrupted buffers */
2034         tracing_reset_online_cpus(&tr->array_buffer);
2035
2036 #ifdef CONFIG_TRACER_MAX_TRACE
2037         if (type->use_max_tr) {
2038                 tr->allocated_snapshot = false;
2039
2040                 /* Shrink the max buffer again */
2041                 if (ring_buffer_expanded)
2042                         ring_buffer_resize(tr->max_buffer.buffer, 1,
2043                                            RING_BUFFER_ALL_CPUS);
2044         }
2045 #endif
2046
2047         printk(KERN_CONT "PASSED\n");
2048         return 0;
2049 }
2050
2051 static int do_run_tracer_selftest(struct tracer *type)
2052 {
2053         int ret;
2054
2055         /*
2056          * Tests can take a long time, especially if they are run one after the
2057          * other, as does happen during bootup when all the tracers are
2058          * registered. This could cause the soft lockup watchdog to trigger.
2059          */
2060         cond_resched();
2061
2062         tracing_selftest_running = true;
2063         ret = run_tracer_selftest(type);
2064         tracing_selftest_running = false;
2065
2066         return ret;
2067 }
2068
2069 static __init int init_trace_selftests(void)
2070 {
2071         struct trace_selftests *p, *n;
2072         struct tracer *t, **last;
2073         int ret;
2074
2075         selftests_can_run = true;
2076
2077         mutex_lock(&trace_types_lock);
2078
2079         if (list_empty(&postponed_selftests))
2080                 goto out;
2081
2082         pr_info("Running postponed tracer tests:\n");
2083
2084         tracing_selftest_running = true;
2085         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2086                 /* This loop can take minutes when sanitizers are enabled, so
2087                  * lets make sure we allow RCU processing.
2088                  */
2089                 cond_resched();
2090                 ret = run_tracer_selftest(p->type);
2091                 /* If the test fails, then warn and remove from available_tracers */
2092                 if (ret < 0) {
2093                         WARN(1, "tracer: %s failed selftest, disabling\n",
2094                              p->type->name);
2095                         last = &trace_types;
2096                         for (t = trace_types; t; t = t->next) {
2097                                 if (t == p->type) {
2098                                         *last = t->next;
2099                                         break;
2100                                 }
2101                                 last = &t->next;
2102                         }
2103                 }
2104                 list_del(&p->list);
2105                 kfree(p);
2106         }
2107         tracing_selftest_running = false;
2108
2109  out:
2110         mutex_unlock(&trace_types_lock);
2111
2112         return 0;
2113 }
2114 core_initcall(init_trace_selftests);
2115 #else
2116 static inline int run_tracer_selftest(struct tracer *type)
2117 {
2118         return 0;
2119 }
2120 static inline int do_run_tracer_selftest(struct tracer *type)
2121 {
2122         return 0;
2123 }
2124 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2125
2126 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2127
2128 static void __init apply_trace_boot_options(void);
2129
2130 /**
2131  * register_tracer - register a tracer with the ftrace system.
2132  * @type: the plugin for the tracer
2133  *
2134  * Register a new plugin tracer.
2135  */
2136 int __init register_tracer(struct tracer *type)
2137 {
2138         struct tracer *t;
2139         int ret = 0;
2140
2141         if (!type->name) {
2142                 pr_info("Tracer must have a name\n");
2143                 return -1;
2144         }
2145
2146         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2147                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2148                 return -1;
2149         }
2150
2151         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2152                 pr_warn("Can not register tracer %s due to lockdown\n",
2153                            type->name);
2154                 return -EPERM;
2155         }
2156
2157         mutex_lock(&trace_types_lock);
2158
2159         for (t = trace_types; t; t = t->next) {
2160                 if (strcmp(type->name, t->name) == 0) {
2161                         /* already found */
2162                         pr_info("Tracer %s already registered\n",
2163                                 type->name);
2164                         ret = -1;
2165                         goto out;
2166                 }
2167         }
2168
2169         if (!type->set_flag)
2170                 type->set_flag = &dummy_set_flag;
2171         if (!type->flags) {
2172                 /*allocate a dummy tracer_flags*/
2173                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2174                 if (!type->flags) {
2175                         ret = -ENOMEM;
2176                         goto out;
2177                 }
2178                 type->flags->val = 0;
2179                 type->flags->opts = dummy_tracer_opt;
2180         } else
2181                 if (!type->flags->opts)
2182                         type->flags->opts = dummy_tracer_opt;
2183
2184         /* store the tracer for __set_tracer_option */
2185         type->flags->trace = type;
2186
2187         ret = do_run_tracer_selftest(type);
2188         if (ret < 0)
2189                 goto out;
2190
2191         type->next = trace_types;
2192         trace_types = type;
2193         add_tracer_options(&global_trace, type);
2194
2195  out:
2196         mutex_unlock(&trace_types_lock);
2197
2198         if (ret || !default_bootup_tracer)
2199                 goto out_unlock;
2200
2201         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2202                 goto out_unlock;
2203
2204         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2205         /* Do we want this tracer to start on bootup? */
2206         tracing_set_tracer(&global_trace, type->name);
2207         default_bootup_tracer = NULL;
2208
2209         apply_trace_boot_options();
2210
2211         /* disable other selftests, since this will break it. */
2212         disable_tracing_selftest("running a tracer");
2213
2214  out_unlock:
2215         return ret;
2216 }
2217
2218 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2219 {
2220         struct trace_buffer *buffer = buf->buffer;
2221
2222         if (!buffer)
2223                 return;
2224
2225         ring_buffer_record_disable(buffer);
2226
2227         /* Make sure all commits have finished */
2228         synchronize_rcu();
2229         ring_buffer_reset_cpu(buffer, cpu);
2230
2231         ring_buffer_record_enable(buffer);
2232 }
2233
2234 void tracing_reset_online_cpus(struct array_buffer *buf)
2235 {
2236         struct trace_buffer *buffer = buf->buffer;
2237
2238         if (!buffer)
2239                 return;
2240
2241         ring_buffer_record_disable(buffer);
2242
2243         /* Make sure all commits have finished */
2244         synchronize_rcu();
2245
2246         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2247
2248         ring_buffer_reset_online_cpus(buffer);
2249
2250         ring_buffer_record_enable(buffer);
2251 }
2252
2253 /* Must have trace_types_lock held */
2254 void tracing_reset_all_online_cpus_unlocked(void)
2255 {
2256         struct trace_array *tr;
2257
2258         lockdep_assert_held(&trace_types_lock);
2259
2260         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2261                 if (!tr->clear_trace)
2262                         continue;
2263                 tr->clear_trace = false;
2264                 tracing_reset_online_cpus(&tr->array_buffer);
2265 #ifdef CONFIG_TRACER_MAX_TRACE
2266                 tracing_reset_online_cpus(&tr->max_buffer);
2267 #endif
2268         }
2269 }
2270
2271 void tracing_reset_all_online_cpus(void)
2272 {
2273         mutex_lock(&trace_types_lock);
2274         tracing_reset_all_online_cpus_unlocked();
2275         mutex_unlock(&trace_types_lock);
2276 }
2277
2278 /*
2279  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2280  * is the tgid last observed corresponding to pid=i.
2281  */
2282 static int *tgid_map;
2283
2284 /* The maximum valid index into tgid_map. */
2285 static size_t tgid_map_max;
2286
2287 #define SAVED_CMDLINES_DEFAULT 128
2288 #define NO_CMDLINE_MAP UINT_MAX
2289 /*
2290  * Preemption must be disabled before acquiring trace_cmdline_lock.
2291  * The various trace_arrays' max_lock must be acquired in a context
2292  * where interrupt is disabled.
2293  */
2294 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2295 struct saved_cmdlines_buffer {
2296         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2297         unsigned *map_cmdline_to_pid;
2298         unsigned cmdline_num;
2299         int cmdline_idx;
2300         char *saved_cmdlines;
2301 };
2302 static struct saved_cmdlines_buffer *savedcmd;
2303
2304 static inline char *get_saved_cmdlines(int idx)
2305 {
2306         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2307 }
2308
2309 static inline void set_cmdline(int idx, const char *cmdline)
2310 {
2311         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2312 }
2313
2314 static int allocate_cmdlines_buffer(unsigned int val,
2315                                     struct saved_cmdlines_buffer *s)
2316 {
2317         s->map_cmdline_to_pid = kmalloc_array(val,
2318                                               sizeof(*s->map_cmdline_to_pid),
2319                                               GFP_KERNEL);
2320         if (!s->map_cmdline_to_pid)
2321                 return -ENOMEM;
2322
2323         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2324         if (!s->saved_cmdlines) {
2325                 kfree(s->map_cmdline_to_pid);
2326                 return -ENOMEM;
2327         }
2328
2329         s->cmdline_idx = 0;
2330         s->cmdline_num = val;
2331         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2332                sizeof(s->map_pid_to_cmdline));
2333         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2334                val * sizeof(*s->map_cmdline_to_pid));
2335
2336         return 0;
2337 }
2338
2339 static int trace_create_savedcmd(void)
2340 {
2341         int ret;
2342
2343         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2344         if (!savedcmd)
2345                 return -ENOMEM;
2346
2347         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2348         if (ret < 0) {
2349                 kfree(savedcmd);
2350                 savedcmd = NULL;
2351                 return -ENOMEM;
2352         }
2353
2354         return 0;
2355 }
2356
2357 int is_tracing_stopped(void)
2358 {
2359         return global_trace.stop_count;
2360 }
2361
2362 static void tracing_start_tr(struct trace_array *tr)
2363 {
2364         struct trace_buffer *buffer;
2365         unsigned long flags;
2366
2367         if (tracing_disabled)
2368                 return;
2369
2370         raw_spin_lock_irqsave(&tr->start_lock, flags);
2371         if (--tr->stop_count) {
2372                 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2373                         /* Someone screwed up their debugging */
2374                         tr->stop_count = 0;
2375                 }
2376                 goto out;
2377         }
2378
2379         /* Prevent the buffers from switching */
2380         arch_spin_lock(&tr->max_lock);
2381
2382         buffer = tr->array_buffer.buffer;
2383         if (buffer)
2384                 ring_buffer_record_enable(buffer);
2385
2386 #ifdef CONFIG_TRACER_MAX_TRACE
2387         buffer = tr->max_buffer.buffer;
2388         if (buffer)
2389                 ring_buffer_record_enable(buffer);
2390 #endif
2391
2392         arch_spin_unlock(&tr->max_lock);
2393
2394  out:
2395         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2396 }
2397
2398 /**
2399  * tracing_start - quick start of the tracer
2400  *
2401  * If tracing is enabled but was stopped by tracing_stop,
2402  * this will start the tracer back up.
2403  */
2404 void tracing_start(void)
2405
2406 {
2407         return tracing_start_tr(&global_trace);
2408 }
2409
2410 static void tracing_stop_tr(struct trace_array *tr)
2411 {
2412         struct trace_buffer *buffer;
2413         unsigned long flags;
2414
2415         raw_spin_lock_irqsave(&tr->start_lock, flags);
2416         if (tr->stop_count++)
2417                 goto out;
2418
2419         /* Prevent the buffers from switching */
2420         arch_spin_lock(&tr->max_lock);
2421
2422         buffer = tr->array_buffer.buffer;
2423         if (buffer)
2424                 ring_buffer_record_disable(buffer);
2425
2426 #ifdef CONFIG_TRACER_MAX_TRACE
2427         buffer = tr->max_buffer.buffer;
2428         if (buffer)
2429                 ring_buffer_record_disable(buffer);
2430 #endif
2431
2432         arch_spin_unlock(&tr->max_lock);
2433
2434  out:
2435         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2436 }
2437
2438 /**
2439  * tracing_stop - quick stop of the tracer
2440  *
2441  * Light weight way to stop tracing. Use in conjunction with
2442  * tracing_start.
2443  */
2444 void tracing_stop(void)
2445 {
2446         return tracing_stop_tr(&global_trace);
2447 }
2448
2449 static int trace_save_cmdline(struct task_struct *tsk)
2450 {
2451         unsigned tpid, idx;
2452
2453         /* treat recording of idle task as a success */
2454         if (!tsk->pid)
2455                 return 1;
2456
2457         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2458
2459         /*
2460          * It's not the end of the world if we don't get
2461          * the lock, but we also don't want to spin
2462          * nor do we want to disable interrupts,
2463          * so if we miss here, then better luck next time.
2464          *
2465          * This is called within the scheduler and wake up, so interrupts
2466          * had better been disabled and run queue lock been held.
2467          */
2468         lockdep_assert_preemption_disabled();
2469         if (!arch_spin_trylock(&trace_cmdline_lock))
2470                 return 0;
2471
2472         idx = savedcmd->map_pid_to_cmdline[tpid];
2473         if (idx == NO_CMDLINE_MAP) {
2474                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2475
2476                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2477                 savedcmd->cmdline_idx = idx;
2478         }
2479
2480         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2481         set_cmdline(idx, tsk->comm);
2482
2483         arch_spin_unlock(&trace_cmdline_lock);
2484
2485         return 1;
2486 }
2487
2488 static void __trace_find_cmdline(int pid, char comm[])
2489 {
2490         unsigned map;
2491         int tpid;
2492
2493         if (!pid) {
2494                 strcpy(comm, "<idle>");
2495                 return;
2496         }
2497
2498         if (WARN_ON_ONCE(pid < 0)) {
2499                 strcpy(comm, "<XXX>");
2500                 return;
2501         }
2502
2503         tpid = pid & (PID_MAX_DEFAULT - 1);
2504         map = savedcmd->map_pid_to_cmdline[tpid];
2505         if (map != NO_CMDLINE_MAP) {
2506                 tpid = savedcmd->map_cmdline_to_pid[map];
2507                 if (tpid == pid) {
2508                         strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2509                         return;
2510                 }
2511         }
2512         strcpy(comm, "<...>");
2513 }
2514
2515 void trace_find_cmdline(int pid, char comm[])
2516 {
2517         preempt_disable();
2518         arch_spin_lock(&trace_cmdline_lock);
2519
2520         __trace_find_cmdline(pid, comm);
2521
2522         arch_spin_unlock(&trace_cmdline_lock);
2523         preempt_enable();
2524 }
2525
2526 static int *trace_find_tgid_ptr(int pid)
2527 {
2528         /*
2529          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2530          * if we observe a non-NULL tgid_map then we also observe the correct
2531          * tgid_map_max.
2532          */
2533         int *map = smp_load_acquire(&tgid_map);
2534
2535         if (unlikely(!map || pid > tgid_map_max))
2536                 return NULL;
2537
2538         return &map[pid];
2539 }
2540
2541 int trace_find_tgid(int pid)
2542 {
2543         int *ptr = trace_find_tgid_ptr(pid);
2544
2545         return ptr ? *ptr : 0;
2546 }
2547
2548 static int trace_save_tgid(struct task_struct *tsk)
2549 {
2550         int *ptr;
2551
2552         /* treat recording of idle task as a success */
2553         if (!tsk->pid)
2554                 return 1;
2555
2556         ptr = trace_find_tgid_ptr(tsk->pid);
2557         if (!ptr)
2558                 return 0;
2559
2560         *ptr = tsk->tgid;
2561         return 1;
2562 }
2563
2564 static bool tracing_record_taskinfo_skip(int flags)
2565 {
2566         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2567                 return true;
2568         if (!__this_cpu_read(trace_taskinfo_save))
2569                 return true;
2570         return false;
2571 }
2572
2573 /**
2574  * tracing_record_taskinfo - record the task info of a task
2575  *
2576  * @task:  task to record
2577  * @flags: TRACE_RECORD_CMDLINE for recording comm
2578  *         TRACE_RECORD_TGID for recording tgid
2579  */
2580 void tracing_record_taskinfo(struct task_struct *task, int flags)
2581 {
2582         bool done;
2583
2584         if (tracing_record_taskinfo_skip(flags))
2585                 return;
2586
2587         /*
2588          * Record as much task information as possible. If some fail, continue
2589          * to try to record the others.
2590          */
2591         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2592         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2593
2594         /* If recording any information failed, retry again soon. */
2595         if (!done)
2596                 return;
2597
2598         __this_cpu_write(trace_taskinfo_save, false);
2599 }
2600
2601 /**
2602  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2603  *
2604  * @prev: previous task during sched_switch
2605  * @next: next task during sched_switch
2606  * @flags: TRACE_RECORD_CMDLINE for recording comm
2607  *         TRACE_RECORD_TGID for recording tgid
2608  */
2609 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2610                                           struct task_struct *next, int flags)
2611 {
2612         bool done;
2613
2614         if (tracing_record_taskinfo_skip(flags))
2615                 return;
2616
2617         /*
2618          * Record as much task information as possible. If some fail, continue
2619          * to try to record the others.
2620          */
2621         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2622         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2623         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2624         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2625
2626         /* If recording any information failed, retry again soon. */
2627         if (!done)
2628                 return;
2629
2630         __this_cpu_write(trace_taskinfo_save, false);
2631 }
2632
2633 /* Helpers to record a specific task information */
2634 void tracing_record_cmdline(struct task_struct *task)
2635 {
2636         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2637 }
2638
2639 void tracing_record_tgid(struct task_struct *task)
2640 {
2641         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2642 }
2643
2644 /*
2645  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2646  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2647  * simplifies those functions and keeps them in sync.
2648  */
2649 enum print_line_t trace_handle_return(struct trace_seq *s)
2650 {
2651         return trace_seq_has_overflowed(s) ?
2652                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2653 }
2654 EXPORT_SYMBOL_GPL(trace_handle_return);
2655
2656 static unsigned short migration_disable_value(void)
2657 {
2658 #if defined(CONFIG_SMP)
2659         return current->migration_disabled;
2660 #else
2661         return 0;
2662 #endif
2663 }
2664
2665 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2666 {
2667         unsigned int trace_flags = irqs_status;
2668         unsigned int pc;
2669
2670         pc = preempt_count();
2671
2672         if (pc & NMI_MASK)
2673                 trace_flags |= TRACE_FLAG_NMI;
2674         if (pc & HARDIRQ_MASK)
2675                 trace_flags |= TRACE_FLAG_HARDIRQ;
2676         if (in_serving_softirq())
2677                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2678         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2679                 trace_flags |= TRACE_FLAG_BH_OFF;
2680
2681         if (tif_need_resched())
2682                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2683         if (test_preempt_need_resched())
2684                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2685         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2686                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2687 }
2688
2689 struct ring_buffer_event *
2690 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2691                           int type,
2692                           unsigned long len,
2693                           unsigned int trace_ctx)
2694 {
2695         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2696 }
2697
2698 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2699 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2700 static int trace_buffered_event_ref;
2701
2702 /**
2703  * trace_buffered_event_enable - enable buffering events
2704  *
2705  * When events are being filtered, it is quicker to use a temporary
2706  * buffer to write the event data into if there's a likely chance
2707  * that it will not be committed. The discard of the ring buffer
2708  * is not as fast as committing, and is much slower than copying
2709  * a commit.
2710  *
2711  * When an event is to be filtered, allocate per cpu buffers to
2712  * write the event data into, and if the event is filtered and discarded
2713  * it is simply dropped, otherwise, the entire data is to be committed
2714  * in one shot.
2715  */
2716 void trace_buffered_event_enable(void)
2717 {
2718         struct ring_buffer_event *event;
2719         struct page *page;
2720         int cpu;
2721
2722         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2723
2724         if (trace_buffered_event_ref++)
2725                 return;
2726
2727         for_each_tracing_cpu(cpu) {
2728                 page = alloc_pages_node(cpu_to_node(cpu),
2729                                         GFP_KERNEL | __GFP_NORETRY, 0);
2730                 /* This is just an optimization and can handle failures */
2731                 if (!page) {
2732                         pr_err("Failed to allocate event buffer\n");
2733                         break;
2734                 }
2735
2736                 event = page_address(page);
2737                 memset(event, 0, sizeof(*event));
2738
2739                 per_cpu(trace_buffered_event, cpu) = event;
2740
2741                 preempt_disable();
2742                 if (cpu == smp_processor_id() &&
2743                     __this_cpu_read(trace_buffered_event) !=
2744                     per_cpu(trace_buffered_event, cpu))
2745                         WARN_ON_ONCE(1);
2746                 preempt_enable();
2747         }
2748 }
2749
2750 static void enable_trace_buffered_event(void *data)
2751 {
2752         /* Probably not needed, but do it anyway */
2753         smp_rmb();
2754         this_cpu_dec(trace_buffered_event_cnt);
2755 }
2756
2757 static void disable_trace_buffered_event(void *data)
2758 {
2759         this_cpu_inc(trace_buffered_event_cnt);
2760 }
2761
2762 /**
2763  * trace_buffered_event_disable - disable buffering events
2764  *
2765  * When a filter is removed, it is faster to not use the buffered
2766  * events, and to commit directly into the ring buffer. Free up
2767  * the temp buffers when there are no more users. This requires
2768  * special synchronization with current events.
2769  */
2770 void trace_buffered_event_disable(void)
2771 {
2772         int cpu;
2773
2774         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2775
2776         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2777                 return;
2778
2779         if (--trace_buffered_event_ref)
2780                 return;
2781
2782         /* For each CPU, set the buffer as used. */
2783         on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2784                          NULL, true);
2785
2786         /* Wait for all current users to finish */
2787         synchronize_rcu();
2788
2789         for_each_tracing_cpu(cpu) {
2790                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2791                 per_cpu(trace_buffered_event, cpu) = NULL;
2792         }
2793
2794         /*
2795          * Wait for all CPUs that potentially started checking if they can use
2796          * their event buffer only after the previous synchronize_rcu() call and
2797          * they still read a valid pointer from trace_buffered_event. It must be
2798          * ensured they don't see cleared trace_buffered_event_cnt else they
2799          * could wrongly decide to use the pointed-to buffer which is now freed.
2800          */
2801         synchronize_rcu();
2802
2803         /* For each CPU, relinquish the buffer */
2804         on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2805                          true);
2806 }
2807
2808 static struct trace_buffer *temp_buffer;
2809
2810 struct ring_buffer_event *
2811 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2812                           struct trace_event_file *trace_file,
2813                           int type, unsigned long len,
2814                           unsigned int trace_ctx)
2815 {
2816         struct ring_buffer_event *entry;
2817         struct trace_array *tr = trace_file->tr;
2818         int val;
2819
2820         *current_rb = tr->array_buffer.buffer;
2821
2822         if (!tr->no_filter_buffering_ref &&
2823             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2824                 preempt_disable_notrace();
2825                 /*
2826                  * Filtering is on, so try to use the per cpu buffer first.
2827                  * This buffer will simulate a ring_buffer_event,
2828                  * where the type_len is zero and the array[0] will
2829                  * hold the full length.
2830                  * (see include/linux/ring-buffer.h for details on
2831                  *  how the ring_buffer_event is structured).
2832                  *
2833                  * Using a temp buffer during filtering and copying it
2834                  * on a matched filter is quicker than writing directly
2835                  * into the ring buffer and then discarding it when
2836                  * it doesn't match. That is because the discard
2837                  * requires several atomic operations to get right.
2838                  * Copying on match and doing nothing on a failed match
2839                  * is still quicker than no copy on match, but having
2840                  * to discard out of the ring buffer on a failed match.
2841                  */
2842                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2843                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2844
2845                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2846
2847                         /*
2848                          * Preemption is disabled, but interrupts and NMIs
2849                          * can still come in now. If that happens after
2850                          * the above increment, then it will have to go
2851                          * back to the old method of allocating the event
2852                          * on the ring buffer, and if the filter fails, it
2853                          * will have to call ring_buffer_discard_commit()
2854                          * to remove it.
2855                          *
2856                          * Need to also check the unlikely case that the
2857                          * length is bigger than the temp buffer size.
2858                          * If that happens, then the reserve is pretty much
2859                          * guaranteed to fail, as the ring buffer currently
2860                          * only allows events less than a page. But that may
2861                          * change in the future, so let the ring buffer reserve
2862                          * handle the failure in that case.
2863                          */
2864                         if (val == 1 && likely(len <= max_len)) {
2865                                 trace_event_setup(entry, type, trace_ctx);
2866                                 entry->array[0] = len;
2867                                 /* Return with preemption disabled */
2868                                 return entry;
2869                         }
2870                         this_cpu_dec(trace_buffered_event_cnt);
2871                 }
2872                 /* __trace_buffer_lock_reserve() disables preemption */
2873                 preempt_enable_notrace();
2874         }
2875
2876         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2877                                             trace_ctx);
2878         /*
2879          * If tracing is off, but we have triggers enabled
2880          * we still need to look at the event data. Use the temp_buffer
2881          * to store the trace event for the trigger to use. It's recursive
2882          * safe and will not be recorded anywhere.
2883          */
2884         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2885                 *current_rb = temp_buffer;
2886                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2887                                                     trace_ctx);
2888         }
2889         return entry;
2890 }
2891 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2892
2893 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2894 static DEFINE_MUTEX(tracepoint_printk_mutex);
2895
2896 static void output_printk(struct trace_event_buffer *fbuffer)
2897 {
2898         struct trace_event_call *event_call;
2899         struct trace_event_file *file;
2900         struct trace_event *event;
2901         unsigned long flags;
2902         struct trace_iterator *iter = tracepoint_print_iter;
2903
2904         /* We should never get here if iter is NULL */
2905         if (WARN_ON_ONCE(!iter))
2906                 return;
2907
2908         event_call = fbuffer->trace_file->event_call;
2909         if (!event_call || !event_call->event.funcs ||
2910             !event_call->event.funcs->trace)
2911                 return;
2912
2913         file = fbuffer->trace_file;
2914         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2915             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2916              !filter_match_preds(file->filter, fbuffer->entry)))
2917                 return;
2918
2919         event = &fbuffer->trace_file->event_call->event;
2920
2921         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2922         trace_seq_init(&iter->seq);
2923         iter->ent = fbuffer->entry;
2924         event_call->event.funcs->trace(iter, 0, event);
2925         trace_seq_putc(&iter->seq, 0);
2926         printk("%s", iter->seq.buffer);
2927
2928         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2929 }
2930
2931 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2932                              void *buffer, size_t *lenp,
2933                              loff_t *ppos)
2934 {
2935         int save_tracepoint_printk;
2936         int ret;
2937
2938         mutex_lock(&tracepoint_printk_mutex);
2939         save_tracepoint_printk = tracepoint_printk;
2940
2941         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2942
2943         /*
2944          * This will force exiting early, as tracepoint_printk
2945          * is always zero when tracepoint_printk_iter is not allocated
2946          */
2947         if (!tracepoint_print_iter)
2948                 tracepoint_printk = 0;
2949
2950         if (save_tracepoint_printk == tracepoint_printk)
2951                 goto out;
2952
2953         if (tracepoint_printk)
2954                 static_key_enable(&tracepoint_printk_key.key);
2955         else
2956                 static_key_disable(&tracepoint_printk_key.key);
2957
2958  out:
2959         mutex_unlock(&tracepoint_printk_mutex);
2960
2961         return ret;
2962 }
2963
2964 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2965 {
2966         enum event_trigger_type tt = ETT_NONE;
2967         struct trace_event_file *file = fbuffer->trace_file;
2968
2969         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2970                         fbuffer->entry, &tt))
2971                 goto discard;
2972
2973         if (static_key_false(&tracepoint_printk_key.key))
2974                 output_printk(fbuffer);
2975
2976         if (static_branch_unlikely(&trace_event_exports_enabled))
2977                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2978
2979         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2980                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2981
2982 discard:
2983         if (tt)
2984                 event_triggers_post_call(file, tt);
2985
2986 }
2987 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2988
2989 /*
2990  * Skip 3:
2991  *
2992  *   trace_buffer_unlock_commit_regs()
2993  *   trace_event_buffer_commit()
2994  *   trace_event_raw_event_xxx()
2995  */
2996 # define STACK_SKIP 3
2997
2998 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2999                                      struct trace_buffer *buffer,
3000                                      struct ring_buffer_event *event,
3001                                      unsigned int trace_ctx,
3002                                      struct pt_regs *regs)
3003 {
3004         __buffer_unlock_commit(buffer, event);
3005
3006         /*
3007          * If regs is not set, then skip the necessary functions.
3008          * Note, we can still get here via blktrace, wakeup tracer
3009          * and mmiotrace, but that's ok if they lose a function or
3010          * two. They are not that meaningful.
3011          */
3012         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3013         ftrace_trace_userstack(tr, buffer, trace_ctx);
3014 }
3015
3016 /*
3017  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3018  */
3019 void
3020 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3021                                    struct ring_buffer_event *event)
3022 {
3023         __buffer_unlock_commit(buffer, event);
3024 }
3025
3026 void
3027 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3028                parent_ip, unsigned int trace_ctx)
3029 {
3030         struct trace_event_call *call = &event_function;
3031         struct trace_buffer *buffer = tr->array_buffer.buffer;
3032         struct ring_buffer_event *event;
3033         struct ftrace_entry *entry;
3034
3035         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3036                                             trace_ctx);
3037         if (!event)
3038                 return;
3039         entry   = ring_buffer_event_data(event);
3040         entry->ip                       = ip;
3041         entry->parent_ip                = parent_ip;
3042
3043         if (!call_filter_check_discard(call, entry, buffer, event)) {
3044                 if (static_branch_unlikely(&trace_function_exports_enabled))
3045                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3046                 __buffer_unlock_commit(buffer, event);
3047         }
3048 }
3049
3050 #ifdef CONFIG_STACKTRACE
3051
3052 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3053 #define FTRACE_KSTACK_NESTING   4
3054
3055 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3056
3057 struct ftrace_stack {
3058         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3059 };
3060
3061
3062 struct ftrace_stacks {
3063         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3064 };
3065
3066 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3067 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3068
3069 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3070                                  unsigned int trace_ctx,
3071                                  int skip, struct pt_regs *regs)
3072 {
3073         struct trace_event_call *call = &event_kernel_stack;
3074         struct ring_buffer_event *event;
3075         unsigned int size, nr_entries;
3076         struct ftrace_stack *fstack;
3077         struct stack_entry *entry;
3078         int stackidx;
3079
3080         /*
3081          * Add one, for this function and the call to save_stack_trace()
3082          * If regs is set, then these functions will not be in the way.
3083          */
3084 #ifndef CONFIG_UNWINDER_ORC
3085         if (!regs)
3086                 skip++;
3087 #endif
3088
3089         preempt_disable_notrace();
3090
3091         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3092
3093         /* This should never happen. If it does, yell once and skip */
3094         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3095                 goto out;
3096
3097         /*
3098          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3099          * interrupt will either see the value pre increment or post
3100          * increment. If the interrupt happens pre increment it will have
3101          * restored the counter when it returns.  We just need a barrier to
3102          * keep gcc from moving things around.
3103          */
3104         barrier();
3105
3106         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3107         size = ARRAY_SIZE(fstack->calls);
3108
3109         if (regs) {
3110                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3111                                                    size, skip);
3112         } else {
3113                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3114         }
3115
3116         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3117                                     struct_size(entry, caller, nr_entries),
3118                                     trace_ctx);
3119         if (!event)
3120                 goto out;
3121         entry = ring_buffer_event_data(event);
3122
3123         entry->size = nr_entries;
3124         memcpy(&entry->caller, fstack->calls,
3125                flex_array_size(entry, caller, nr_entries));
3126
3127         if (!call_filter_check_discard(call, entry, buffer, event))
3128                 __buffer_unlock_commit(buffer, event);
3129
3130  out:
3131         /* Again, don't let gcc optimize things here */
3132         barrier();
3133         __this_cpu_dec(ftrace_stack_reserve);
3134         preempt_enable_notrace();
3135
3136 }
3137
3138 static inline void ftrace_trace_stack(struct trace_array *tr,
3139                                       struct trace_buffer *buffer,
3140                                       unsigned int trace_ctx,
3141                                       int skip, struct pt_regs *regs)
3142 {
3143         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3144                 return;
3145
3146         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3147 }
3148
3149 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3150                    int skip)
3151 {
3152         struct trace_buffer *buffer = tr->array_buffer.buffer;
3153
3154         if (rcu_is_watching()) {
3155                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3156                 return;
3157         }
3158
3159         if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3160                 return;
3161
3162         /*
3163          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3164          * but if the above rcu_is_watching() failed, then the NMI
3165          * triggered someplace critical, and ct_irq_enter() should
3166          * not be called from NMI.
3167          */
3168         if (unlikely(in_nmi()))
3169                 return;
3170
3171         ct_irq_enter_irqson();
3172         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3173         ct_irq_exit_irqson();
3174 }
3175
3176 /**
3177  * trace_dump_stack - record a stack back trace in the trace buffer
3178  * @skip: Number of functions to skip (helper handlers)
3179  */
3180 void trace_dump_stack(int skip)
3181 {
3182         if (tracing_disabled || tracing_selftest_running)
3183                 return;
3184
3185 #ifndef CONFIG_UNWINDER_ORC
3186         /* Skip 1 to skip this function. */
3187         skip++;
3188 #endif
3189         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3190                              tracing_gen_ctx(), skip, NULL);
3191 }
3192 EXPORT_SYMBOL_GPL(trace_dump_stack);
3193
3194 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3195 static DEFINE_PER_CPU(int, user_stack_count);
3196
3197 static void
3198 ftrace_trace_userstack(struct trace_array *tr,
3199                        struct trace_buffer *buffer, unsigned int trace_ctx)
3200 {
3201         struct trace_event_call *call = &event_user_stack;
3202         struct ring_buffer_event *event;
3203         struct userstack_entry *entry;
3204
3205         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3206                 return;
3207
3208         /*
3209          * NMIs can not handle page faults, even with fix ups.
3210          * The save user stack can (and often does) fault.
3211          */
3212         if (unlikely(in_nmi()))
3213                 return;
3214
3215         /*
3216          * prevent recursion, since the user stack tracing may
3217          * trigger other kernel events.
3218          */
3219         preempt_disable();
3220         if (__this_cpu_read(user_stack_count))
3221                 goto out;
3222
3223         __this_cpu_inc(user_stack_count);
3224
3225         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3226                                             sizeof(*entry), trace_ctx);
3227         if (!event)
3228                 goto out_drop_count;
3229         entry   = ring_buffer_event_data(event);
3230
3231         entry->tgid             = current->tgid;
3232         memset(&entry->caller, 0, sizeof(entry->caller));
3233
3234         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3235         if (!call_filter_check_discard(call, entry, buffer, event))
3236                 __buffer_unlock_commit(buffer, event);
3237
3238  out_drop_count:
3239         __this_cpu_dec(user_stack_count);
3240  out:
3241         preempt_enable();
3242 }
3243 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3244 static void ftrace_trace_userstack(struct trace_array *tr,
3245                                    struct trace_buffer *buffer,
3246                                    unsigned int trace_ctx)
3247 {
3248 }
3249 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3250
3251 #endif /* CONFIG_STACKTRACE */
3252
3253 static inline void
3254 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3255                           unsigned long long delta)
3256 {
3257         entry->bottom_delta_ts = delta & U32_MAX;
3258         entry->top_delta_ts = (delta >> 32);
3259 }
3260
3261 void trace_last_func_repeats(struct trace_array *tr,
3262                              struct trace_func_repeats *last_info,
3263                              unsigned int trace_ctx)
3264 {
3265         struct trace_buffer *buffer = tr->array_buffer.buffer;
3266         struct func_repeats_entry *entry;
3267         struct ring_buffer_event *event;
3268         u64 delta;
3269
3270         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3271                                             sizeof(*entry), trace_ctx);
3272         if (!event)
3273                 return;
3274
3275         delta = ring_buffer_event_time_stamp(buffer, event) -
3276                 last_info->ts_last_call;
3277
3278         entry = ring_buffer_event_data(event);
3279         entry->ip = last_info->ip;
3280         entry->parent_ip = last_info->parent_ip;
3281         entry->count = last_info->count;
3282         func_repeats_set_delta_ts(entry, delta);
3283
3284         __buffer_unlock_commit(buffer, event);
3285 }
3286
3287 /* created for use with alloc_percpu */
3288 struct trace_buffer_struct {
3289         int nesting;
3290         char buffer[4][TRACE_BUF_SIZE];
3291 };
3292
3293 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3294
3295 /*
3296  * This allows for lockless recording.  If we're nested too deeply, then
3297  * this returns NULL.
3298  */
3299 static char *get_trace_buf(void)
3300 {
3301         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3302
3303         if (!trace_percpu_buffer || buffer->nesting >= 4)
3304                 return NULL;
3305
3306         buffer->nesting++;
3307
3308         /* Interrupts must see nesting incremented before we use the buffer */
3309         barrier();
3310         return &buffer->buffer[buffer->nesting - 1][0];
3311 }
3312
3313 static void put_trace_buf(void)
3314 {
3315         /* Don't let the decrement of nesting leak before this */
3316         barrier();
3317         this_cpu_dec(trace_percpu_buffer->nesting);
3318 }
3319
3320 static int alloc_percpu_trace_buffer(void)
3321 {
3322         struct trace_buffer_struct __percpu *buffers;
3323
3324         if (trace_percpu_buffer)
3325                 return 0;
3326
3327         buffers = alloc_percpu(struct trace_buffer_struct);
3328         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3329                 return -ENOMEM;
3330
3331         trace_percpu_buffer = buffers;
3332         return 0;
3333 }
3334
3335 static int buffers_allocated;
3336
3337 void trace_printk_init_buffers(void)
3338 {
3339         if (buffers_allocated)
3340                 return;
3341
3342         if (alloc_percpu_trace_buffer())
3343                 return;
3344
3345         /* trace_printk() is for debug use only. Don't use it in production. */
3346
3347         pr_warn("\n");
3348         pr_warn("**********************************************************\n");
3349         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3350         pr_warn("**                                                      **\n");
3351         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3352         pr_warn("**                                                      **\n");
3353         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3354         pr_warn("** unsafe for production use.                           **\n");
3355         pr_warn("**                                                      **\n");
3356         pr_warn("** If you see this message and you are not debugging    **\n");
3357         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3358         pr_warn("**                                                      **\n");
3359         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3360         pr_warn("**********************************************************\n");
3361
3362         /* Expand the buffers to set size */
3363         tracing_update_buffers();
3364
3365         buffers_allocated = 1;
3366
3367         /*
3368          * trace_printk_init_buffers() can be called by modules.
3369          * If that happens, then we need to start cmdline recording
3370          * directly here. If the global_trace.buffer is already
3371          * allocated here, then this was called by module code.
3372          */
3373         if (global_trace.array_buffer.buffer)
3374                 tracing_start_cmdline_record();
3375 }
3376 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3377
3378 void trace_printk_start_comm(void)
3379 {
3380         /* Start tracing comms if trace printk is set */
3381         if (!buffers_allocated)
3382                 return;
3383         tracing_start_cmdline_record();
3384 }
3385
3386 static void trace_printk_start_stop_comm(int enabled)
3387 {
3388         if (!buffers_allocated)
3389                 return;
3390
3391         if (enabled)
3392                 tracing_start_cmdline_record();
3393         else
3394                 tracing_stop_cmdline_record();
3395 }
3396
3397 /**
3398  * trace_vbprintk - write binary msg to tracing buffer
3399  * @ip:    The address of the caller
3400  * @fmt:   The string format to write to the buffer
3401  * @args:  Arguments for @fmt
3402  */
3403 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3404 {
3405         struct trace_event_call *call = &event_bprint;
3406         struct ring_buffer_event *event;
3407         struct trace_buffer *buffer;
3408         struct trace_array *tr = &global_trace;
3409         struct bprint_entry *entry;
3410         unsigned int trace_ctx;
3411         char *tbuffer;
3412         int len = 0, size;
3413
3414         if (unlikely(tracing_selftest_running || tracing_disabled))
3415                 return 0;
3416
3417         /* Don't pollute graph traces with trace_vprintk internals */
3418         pause_graph_tracing();
3419
3420         trace_ctx = tracing_gen_ctx();
3421         preempt_disable_notrace();
3422
3423         tbuffer = get_trace_buf();
3424         if (!tbuffer) {
3425                 len = 0;
3426                 goto out_nobuffer;
3427         }
3428
3429         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3430
3431         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3432                 goto out_put;
3433
3434         size = sizeof(*entry) + sizeof(u32) * len;
3435         buffer = tr->array_buffer.buffer;
3436         ring_buffer_nest_start(buffer);
3437         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3438                                             trace_ctx);
3439         if (!event)
3440                 goto out;
3441         entry = ring_buffer_event_data(event);
3442         entry->ip                       = ip;
3443         entry->fmt                      = fmt;
3444
3445         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3446         if (!call_filter_check_discard(call, entry, buffer, event)) {
3447                 __buffer_unlock_commit(buffer, event);
3448                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3449         }
3450
3451 out:
3452         ring_buffer_nest_end(buffer);
3453 out_put:
3454         put_trace_buf();
3455
3456 out_nobuffer:
3457         preempt_enable_notrace();
3458         unpause_graph_tracing();
3459
3460         return len;
3461 }
3462 EXPORT_SYMBOL_GPL(trace_vbprintk);
3463
3464 __printf(3, 0)
3465 static int
3466 __trace_array_vprintk(struct trace_buffer *buffer,
3467                       unsigned long ip, const char *fmt, va_list args)
3468 {
3469         struct trace_event_call *call = &event_print;
3470         struct ring_buffer_event *event;
3471         int len = 0, size;
3472         struct print_entry *entry;
3473         unsigned int trace_ctx;
3474         char *tbuffer;
3475
3476         if (tracing_disabled)
3477                 return 0;
3478
3479         /* Don't pollute graph traces with trace_vprintk internals */
3480         pause_graph_tracing();
3481
3482         trace_ctx = tracing_gen_ctx();
3483         preempt_disable_notrace();
3484
3485
3486         tbuffer = get_trace_buf();
3487         if (!tbuffer) {
3488                 len = 0;
3489                 goto out_nobuffer;
3490         }
3491
3492         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3493
3494         size = sizeof(*entry) + len + 1;
3495         ring_buffer_nest_start(buffer);
3496         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3497                                             trace_ctx);
3498         if (!event)
3499                 goto out;
3500         entry = ring_buffer_event_data(event);
3501         entry->ip = ip;
3502
3503         memcpy(&entry->buf, tbuffer, len + 1);
3504         if (!call_filter_check_discard(call, entry, buffer, event)) {
3505                 __buffer_unlock_commit(buffer, event);
3506                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3507         }
3508
3509 out:
3510         ring_buffer_nest_end(buffer);
3511         put_trace_buf();
3512
3513 out_nobuffer:
3514         preempt_enable_notrace();
3515         unpause_graph_tracing();
3516
3517         return len;
3518 }
3519
3520 __printf(3, 0)
3521 int trace_array_vprintk(struct trace_array *tr,
3522                         unsigned long ip, const char *fmt, va_list args)
3523 {
3524         if (tracing_selftest_running && tr == &global_trace)
3525                 return 0;
3526
3527         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3528 }
3529
3530 /**
3531  * trace_array_printk - Print a message to a specific instance
3532  * @tr: The instance trace_array descriptor
3533  * @ip: The instruction pointer that this is called from.
3534  * @fmt: The format to print (printf format)
3535  *
3536  * If a subsystem sets up its own instance, they have the right to
3537  * printk strings into their tracing instance buffer using this
3538  * function. Note, this function will not write into the top level
3539  * buffer (use trace_printk() for that), as writing into the top level
3540  * buffer should only have events that can be individually disabled.
3541  * trace_printk() is only used for debugging a kernel, and should not
3542  * be ever incorporated in normal use.
3543  *
3544  * trace_array_printk() can be used, as it will not add noise to the
3545  * top level tracing buffer.
3546  *
3547  * Note, trace_array_init_printk() must be called on @tr before this
3548  * can be used.
3549  */
3550 __printf(3, 0)
3551 int trace_array_printk(struct trace_array *tr,
3552                        unsigned long ip, const char *fmt, ...)
3553 {
3554         int ret;
3555         va_list ap;
3556
3557         if (!tr)
3558                 return -ENOENT;
3559
3560         /* This is only allowed for created instances */
3561         if (tr == &global_trace)
3562                 return 0;
3563
3564         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3565                 return 0;
3566
3567         va_start(ap, fmt);
3568         ret = trace_array_vprintk(tr, ip, fmt, ap);
3569         va_end(ap);
3570         return ret;
3571 }
3572 EXPORT_SYMBOL_GPL(trace_array_printk);
3573
3574 /**
3575  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3576  * @tr: The trace array to initialize the buffers for
3577  *
3578  * As trace_array_printk() only writes into instances, they are OK to
3579  * have in the kernel (unlike trace_printk()). This needs to be called
3580  * before trace_array_printk() can be used on a trace_array.
3581  */
3582 int trace_array_init_printk(struct trace_array *tr)
3583 {
3584         if (!tr)
3585                 return -ENOENT;
3586
3587         /* This is only allowed for created instances */
3588         if (tr == &global_trace)
3589                 return -EINVAL;
3590
3591         return alloc_percpu_trace_buffer();
3592 }
3593 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3594
3595 __printf(3, 4)
3596 int trace_array_printk_buf(struct trace_buffer *buffer,
3597                            unsigned long ip, const char *fmt, ...)
3598 {
3599         int ret;
3600         va_list ap;
3601
3602         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3603                 return 0;
3604
3605         va_start(ap, fmt);
3606         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3607         va_end(ap);
3608         return ret;
3609 }
3610
3611 __printf(2, 0)
3612 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3613 {
3614         return trace_array_vprintk(&global_trace, ip, fmt, args);
3615 }
3616 EXPORT_SYMBOL_GPL(trace_vprintk);
3617
3618 static void trace_iterator_increment(struct trace_iterator *iter)
3619 {
3620         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3621
3622         iter->idx++;
3623         if (buf_iter)
3624                 ring_buffer_iter_advance(buf_iter);
3625 }
3626
3627 static struct trace_entry *
3628 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3629                 unsigned long *lost_events)
3630 {
3631         struct ring_buffer_event *event;
3632         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3633
3634         if (buf_iter) {
3635                 event = ring_buffer_iter_peek(buf_iter, ts);
3636                 if (lost_events)
3637                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3638                                 (unsigned long)-1 : 0;
3639         } else {
3640                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3641                                          lost_events);
3642         }
3643
3644         if (event) {
3645                 iter->ent_size = ring_buffer_event_length(event);
3646                 return ring_buffer_event_data(event);
3647         }
3648         iter->ent_size = 0;
3649         return NULL;
3650 }
3651
3652 static struct trace_entry *
3653 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3654                   unsigned long *missing_events, u64 *ent_ts)
3655 {
3656         struct trace_buffer *buffer = iter->array_buffer->buffer;
3657         struct trace_entry *ent, *next = NULL;
3658         unsigned long lost_events = 0, next_lost = 0;
3659         int cpu_file = iter->cpu_file;
3660         u64 next_ts = 0, ts;
3661         int next_cpu = -1;
3662         int next_size = 0;
3663         int cpu;
3664
3665         /*
3666          * If we are in a per_cpu trace file, don't bother by iterating over
3667          * all cpu and peek directly.
3668          */
3669         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3670                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3671                         return NULL;
3672                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3673                 if (ent_cpu)
3674                         *ent_cpu = cpu_file;
3675
3676                 return ent;
3677         }
3678
3679         for_each_tracing_cpu(cpu) {
3680
3681                 if (ring_buffer_empty_cpu(buffer, cpu))
3682                         continue;
3683
3684                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3685
3686                 /*
3687                  * Pick the entry with the smallest timestamp:
3688                  */
3689                 if (ent && (!next || ts < next_ts)) {
3690                         next = ent;
3691                         next_cpu = cpu;
3692                         next_ts = ts;
3693                         next_lost = lost_events;
3694                         next_size = iter->ent_size;
3695                 }
3696         }
3697
3698         iter->ent_size = next_size;
3699
3700         if (ent_cpu)
3701                 *ent_cpu = next_cpu;
3702
3703         if (ent_ts)
3704                 *ent_ts = next_ts;
3705
3706         if (missing_events)
3707                 *missing_events = next_lost;
3708
3709         return next;
3710 }
3711
3712 #define STATIC_FMT_BUF_SIZE     128
3713 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3714
3715 char *trace_iter_expand_format(struct trace_iterator *iter)
3716 {
3717         char *tmp;
3718
3719         /*
3720          * iter->tr is NULL when used with tp_printk, which makes
3721          * this get called where it is not safe to call krealloc().
3722          */
3723         if (!iter->tr || iter->fmt == static_fmt_buf)
3724                 return NULL;
3725
3726         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3727                        GFP_KERNEL);
3728         if (tmp) {
3729                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3730                 iter->fmt = tmp;
3731         }
3732
3733         return tmp;
3734 }
3735
3736 /* Returns true if the string is safe to dereference from an event */
3737 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3738                            bool star, int len)
3739 {
3740         unsigned long addr = (unsigned long)str;
3741         struct trace_event *trace_event;
3742         struct trace_event_call *event;
3743
3744         /* Ignore strings with no length */
3745         if (star && !len)
3746                 return true;
3747
3748         /* OK if part of the event data */
3749         if ((addr >= (unsigned long)iter->ent) &&
3750             (addr < (unsigned long)iter->ent + iter->ent_size))
3751                 return true;
3752
3753         /* OK if part of the temp seq buffer */
3754         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3755             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3756                 return true;
3757
3758         /* Core rodata can not be freed */
3759         if (is_kernel_rodata(addr))
3760                 return true;
3761
3762         if (trace_is_tracepoint_string(str))
3763                 return true;
3764
3765         /*
3766          * Now this could be a module event, referencing core module
3767          * data, which is OK.
3768          */
3769         if (!iter->ent)
3770                 return false;
3771
3772         trace_event = ftrace_find_event(iter->ent->type);
3773         if (!trace_event)
3774                 return false;
3775
3776         event = container_of(trace_event, struct trace_event_call, event);
3777         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3778                 return false;
3779
3780         /* Would rather have rodata, but this will suffice */
3781         if (within_module_core(addr, event->module))
3782                 return true;
3783
3784         return false;
3785 }
3786
3787 static const char *show_buffer(struct trace_seq *s)
3788 {
3789         struct seq_buf *seq = &s->seq;
3790
3791         seq_buf_terminate(seq);
3792
3793         return seq->buffer;
3794 }
3795
3796 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3797
3798 static int test_can_verify_check(const char *fmt, ...)
3799 {
3800         char buf[16];
3801         va_list ap;
3802         int ret;
3803
3804         /*
3805          * The verifier is dependent on vsnprintf() modifies the va_list
3806          * passed to it, where it is sent as a reference. Some architectures
3807          * (like x86_32) passes it by value, which means that vsnprintf()
3808          * does not modify the va_list passed to it, and the verifier
3809          * would then need to be able to understand all the values that
3810          * vsnprintf can use. If it is passed by value, then the verifier
3811          * is disabled.
3812          */
3813         va_start(ap, fmt);
3814         vsnprintf(buf, 16, "%d", ap);
3815         ret = va_arg(ap, int);
3816         va_end(ap);
3817
3818         return ret;
3819 }
3820
3821 static void test_can_verify(void)
3822 {
3823         if (!test_can_verify_check("%d %d", 0, 1)) {
3824                 pr_info("trace event string verifier disabled\n");
3825                 static_branch_inc(&trace_no_verify);
3826         }
3827 }
3828
3829 /**
3830  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3831  * @iter: The iterator that holds the seq buffer and the event being printed
3832  * @fmt: The format used to print the event
3833  * @ap: The va_list holding the data to print from @fmt.
3834  *
3835  * This writes the data into the @iter->seq buffer using the data from
3836  * @fmt and @ap. If the format has a %s, then the source of the string
3837  * is examined to make sure it is safe to print, otherwise it will
3838  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3839  * pointer.
3840  */
3841 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3842                          va_list ap)
3843 {
3844         const char *p = fmt;
3845         const char *str;
3846         int i, j;
3847
3848         if (WARN_ON_ONCE(!fmt))
3849                 return;
3850
3851         if (static_branch_unlikely(&trace_no_verify))
3852                 goto print;
3853
3854         /* Don't bother checking when doing a ftrace_dump() */
3855         if (iter->fmt == static_fmt_buf)
3856                 goto print;
3857
3858         while (*p) {
3859                 bool star = false;
3860                 int len = 0;
3861
3862                 j = 0;
3863
3864                 /* We only care about %s and variants */
3865                 for (i = 0; p[i]; i++) {
3866                         if (i + 1 >= iter->fmt_size) {
3867                                 /*
3868                                  * If we can't expand the copy buffer,
3869                                  * just print it.
3870                                  */
3871                                 if (!trace_iter_expand_format(iter))
3872                                         goto print;
3873                         }
3874
3875                         if (p[i] == '\\' && p[i+1]) {
3876                                 i++;
3877                                 continue;
3878                         }
3879                         if (p[i] == '%') {
3880                                 /* Need to test cases like %08.*s */
3881                                 for (j = 1; p[i+j]; j++) {
3882                                         if (isdigit(p[i+j]) ||
3883                                             p[i+j] == '.')
3884                                                 continue;
3885                                         if (p[i+j] == '*') {
3886                                                 star = true;
3887                                                 continue;
3888                                         }
3889                                         break;
3890                                 }
3891                                 if (p[i+j] == 's')
3892                                         break;
3893                                 star = false;
3894                         }
3895                         j = 0;
3896                 }
3897                 /* If no %s found then just print normally */
3898                 if (!p[i])
3899                         break;
3900
3901                 /* Copy up to the %s, and print that */
3902                 strncpy(iter->fmt, p, i);
3903                 iter->fmt[i] = '\0';
3904                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3905
3906                 /*
3907                  * If iter->seq is full, the above call no longer guarantees
3908                  * that ap is in sync with fmt processing, and further calls
3909                  * to va_arg() can return wrong positional arguments.
3910                  *
3911                  * Ensure that ap is no longer used in this case.
3912                  */
3913                 if (iter->seq.full) {
3914                         p = "";
3915                         break;
3916                 }
3917
3918                 if (star)
3919                         len = va_arg(ap, int);
3920
3921                 /* The ap now points to the string data of the %s */
3922                 str = va_arg(ap, const char *);
3923
3924                 /*
3925                  * If you hit this warning, it is likely that the
3926                  * trace event in question used %s on a string that
3927                  * was saved at the time of the event, but may not be
3928                  * around when the trace is read. Use __string(),
3929                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3930                  * instead. See samples/trace_events/trace-events-sample.h
3931                  * for reference.
3932                  */
3933                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3934                               "fmt: '%s' current_buffer: '%s'",
3935                               fmt, show_buffer(&iter->seq))) {
3936                         int ret;
3937
3938                         /* Try to safely read the string */
3939                         if (star) {
3940                                 if (len + 1 > iter->fmt_size)
3941                                         len = iter->fmt_size - 1;
3942                                 if (len < 0)
3943                                         len = 0;
3944                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3945                                 iter->fmt[len] = 0;
3946                                 star = false;
3947                         } else {
3948                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3949                                                                   iter->fmt_size);
3950                         }
3951                         if (ret < 0)
3952                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3953                         else
3954                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3955                                                  str, iter->fmt);
3956                         str = "[UNSAFE-MEMORY]";
3957                         strcpy(iter->fmt, "%s");
3958                 } else {
3959                         strncpy(iter->fmt, p + i, j + 1);
3960                         iter->fmt[j+1] = '\0';
3961                 }
3962                 if (star)
3963                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3964                 else
3965                         trace_seq_printf(&iter->seq, iter->fmt, str);
3966
3967                 p += i + j + 1;
3968         }
3969  print:
3970         if (*p)
3971                 trace_seq_vprintf(&iter->seq, p, ap);
3972 }
3973
3974 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3975 {
3976         const char *p, *new_fmt;
3977         char *q;
3978
3979         if (WARN_ON_ONCE(!fmt))
3980                 return fmt;
3981
3982         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3983                 return fmt;
3984
3985         p = fmt;
3986         new_fmt = q = iter->fmt;
3987         while (*p) {
3988                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3989                         if (!trace_iter_expand_format(iter))
3990                                 return fmt;
3991
3992                         q += iter->fmt - new_fmt;
3993                         new_fmt = iter->fmt;
3994                 }
3995
3996                 *q++ = *p++;
3997
3998                 /* Replace %p with %px */
3999                 if (p[-1] == '%') {
4000                         if (p[0] == '%') {
4001                                 *q++ = *p++;
4002                         } else if (p[0] == 'p' && !isalnum(p[1])) {
4003                                 *q++ = *p++;
4004                                 *q++ = 'x';
4005                         }
4006                 }
4007         }
4008         *q = '\0';
4009
4010         return new_fmt;
4011 }
4012
4013 #define STATIC_TEMP_BUF_SIZE    128
4014 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4015
4016 /* Find the next real entry, without updating the iterator itself */
4017 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4018                                           int *ent_cpu, u64 *ent_ts)
4019 {
4020         /* __find_next_entry will reset ent_size */
4021         int ent_size = iter->ent_size;
4022         struct trace_entry *entry;
4023
4024         /*
4025          * If called from ftrace_dump(), then the iter->temp buffer
4026          * will be the static_temp_buf and not created from kmalloc.
4027          * If the entry size is greater than the buffer, we can
4028          * not save it. Just return NULL in that case. This is only
4029          * used to add markers when two consecutive events' time
4030          * stamps have a large delta. See trace_print_lat_context()
4031          */
4032         if (iter->temp == static_temp_buf &&
4033             STATIC_TEMP_BUF_SIZE < ent_size)
4034                 return NULL;
4035
4036         /*
4037          * The __find_next_entry() may call peek_next_entry(), which may
4038          * call ring_buffer_peek() that may make the contents of iter->ent
4039          * undefined. Need to copy iter->ent now.
4040          */
4041         if (iter->ent && iter->ent != iter->temp) {
4042                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4043                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4044                         void *temp;
4045                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
4046                         if (!temp)
4047                                 return NULL;
4048                         kfree(iter->temp);
4049                         iter->temp = temp;
4050                         iter->temp_size = iter->ent_size;
4051                 }
4052                 memcpy(iter->temp, iter->ent, iter->ent_size);
4053                 iter->ent = iter->temp;
4054         }
4055         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4056         /* Put back the original ent_size */
4057         iter->ent_size = ent_size;
4058
4059         return entry;
4060 }
4061
4062 /* Find the next real entry, and increment the iterator to the next entry */
4063 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4064 {
4065         iter->ent = __find_next_entry(iter, &iter->cpu,
4066                                       &iter->lost_events, &iter->ts);
4067
4068         if (iter->ent)
4069                 trace_iterator_increment(iter);
4070
4071         return iter->ent ? iter : NULL;
4072 }
4073
4074 static void trace_consume(struct trace_iterator *iter)
4075 {
4076         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4077                             &iter->lost_events);
4078 }
4079
4080 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4081 {
4082         struct trace_iterator *iter = m->private;
4083         int i = (int)*pos;
4084         void *ent;
4085
4086         WARN_ON_ONCE(iter->leftover);
4087
4088         (*pos)++;
4089
4090         /* can't go backwards */
4091         if (iter->idx > i)
4092                 return NULL;
4093
4094         if (iter->idx < 0)
4095                 ent = trace_find_next_entry_inc(iter);
4096         else
4097                 ent = iter;
4098
4099         while (ent && iter->idx < i)
4100                 ent = trace_find_next_entry_inc(iter);
4101
4102         iter->pos = *pos;
4103
4104         return ent;
4105 }
4106
4107 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4108 {
4109         struct ring_buffer_iter *buf_iter;
4110         unsigned long entries = 0;
4111         u64 ts;
4112
4113         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4114
4115         buf_iter = trace_buffer_iter(iter, cpu);
4116         if (!buf_iter)
4117                 return;
4118
4119         ring_buffer_iter_reset(buf_iter);
4120
4121         /*
4122          * We could have the case with the max latency tracers
4123          * that a reset never took place on a cpu. This is evident
4124          * by the timestamp being before the start of the buffer.
4125          */
4126         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4127                 if (ts >= iter->array_buffer->time_start)
4128                         break;
4129                 entries++;
4130                 ring_buffer_iter_advance(buf_iter);
4131         }
4132
4133         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4134 }
4135
4136 /*
4137  * The current tracer is copied to avoid a global locking
4138  * all around.
4139  */
4140 static void *s_start(struct seq_file *m, loff_t *pos)
4141 {
4142         struct trace_iterator *iter = m->private;
4143         struct trace_array *tr = iter->tr;
4144         int cpu_file = iter->cpu_file;
4145         void *p = NULL;
4146         loff_t l = 0;
4147         int cpu;
4148
4149         mutex_lock(&trace_types_lock);
4150         if (unlikely(tr->current_trace != iter->trace)) {
4151                 /* Close iter->trace before switching to the new current tracer */
4152                 if (iter->trace->close)
4153                         iter->trace->close(iter);
4154                 iter->trace = tr->current_trace;
4155                 /* Reopen the new current tracer */
4156                 if (iter->trace->open)
4157                         iter->trace->open(iter);
4158         }
4159         mutex_unlock(&trace_types_lock);
4160
4161 #ifdef CONFIG_TRACER_MAX_TRACE
4162         if (iter->snapshot && iter->trace->use_max_tr)
4163                 return ERR_PTR(-EBUSY);
4164 #endif
4165
4166         if (*pos != iter->pos) {
4167                 iter->ent = NULL;
4168                 iter->cpu = 0;
4169                 iter->idx = -1;
4170
4171                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4172                         for_each_tracing_cpu(cpu)
4173                                 tracing_iter_reset(iter, cpu);
4174                 } else
4175                         tracing_iter_reset(iter, cpu_file);
4176
4177                 iter->leftover = 0;
4178                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4179                         ;
4180
4181         } else {
4182                 /*
4183                  * If we overflowed the seq_file before, then we want
4184                  * to just reuse the trace_seq buffer again.
4185                  */
4186                 if (iter->leftover)
4187                         p = iter;
4188                 else {
4189                         l = *pos - 1;
4190                         p = s_next(m, p, &l);
4191                 }
4192         }
4193
4194         trace_event_read_lock();
4195         trace_access_lock(cpu_file);
4196         return p;
4197 }
4198
4199 static void s_stop(struct seq_file *m, void *p)
4200 {
4201         struct trace_iterator *iter = m->private;
4202
4203 #ifdef CONFIG_TRACER_MAX_TRACE
4204         if (iter->snapshot && iter->trace->use_max_tr)
4205                 return;
4206 #endif
4207
4208         trace_access_unlock(iter->cpu_file);
4209         trace_event_read_unlock();
4210 }
4211
4212 static void
4213 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4214                       unsigned long *entries, int cpu)
4215 {
4216         unsigned long count;
4217
4218         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4219         /*
4220          * If this buffer has skipped entries, then we hold all
4221          * entries for the trace and we need to ignore the
4222          * ones before the time stamp.
4223          */
4224         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4225                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4226                 /* total is the same as the entries */
4227                 *total = count;
4228         } else
4229                 *total = count +
4230                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4231         *entries = count;
4232 }
4233
4234 static void
4235 get_total_entries(struct array_buffer *buf,
4236                   unsigned long *total, unsigned long *entries)
4237 {
4238         unsigned long t, e;
4239         int cpu;
4240
4241         *total = 0;
4242         *entries = 0;
4243
4244         for_each_tracing_cpu(cpu) {
4245                 get_total_entries_cpu(buf, &t, &e, cpu);
4246                 *total += t;
4247                 *entries += e;
4248         }
4249 }
4250
4251 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4252 {
4253         unsigned long total, entries;
4254
4255         if (!tr)
4256                 tr = &global_trace;
4257
4258         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4259
4260         return entries;
4261 }
4262
4263 unsigned long trace_total_entries(struct trace_array *tr)
4264 {
4265         unsigned long total, entries;
4266
4267         if (!tr)
4268                 tr = &global_trace;
4269
4270         get_total_entries(&tr->array_buffer, &total, &entries);
4271
4272         return entries;
4273 }
4274
4275 static void print_lat_help_header(struct seq_file *m)
4276 {
4277         seq_puts(m, "#                    _------=> CPU#            \n"
4278                     "#                   / _-----=> irqs-off/BH-disabled\n"
4279                     "#                  | / _----=> need-resched    \n"
4280                     "#                  || / _---=> hardirq/softirq \n"
4281                     "#                  ||| / _--=> preempt-depth   \n"
4282                     "#                  |||| / _-=> migrate-disable \n"
4283                     "#                  ||||| /     delay           \n"
4284                     "#  cmd     pid     |||||| time  |   caller     \n"
4285                     "#     \\   /        ||||||  \\    |    /       \n");
4286 }
4287
4288 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4289 {
4290         unsigned long total;
4291         unsigned long entries;
4292
4293         get_total_entries(buf, &total, &entries);
4294         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4295                    entries, total, num_online_cpus());
4296         seq_puts(m, "#\n");
4297 }
4298
4299 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4300                                    unsigned int flags)
4301 {
4302         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4303
4304         print_event_info(buf, m);
4305
4306         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4307         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4308 }
4309
4310 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4311                                        unsigned int flags)
4312 {
4313         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4314         static const char space[] = "            ";
4315         int prec = tgid ? 12 : 2;
4316
4317         print_event_info(buf, m);
4318
4319         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4320         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4321         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4322         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4323         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4324         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4325         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4326         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4327 }
4328
4329 void
4330 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4331 {
4332         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4333         struct array_buffer *buf = iter->array_buffer;
4334         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4335         struct tracer *type = iter->trace;
4336         unsigned long entries;
4337         unsigned long total;
4338         const char *name = type->name;
4339
4340         get_total_entries(buf, &total, &entries);
4341
4342         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4343                    name, UTS_RELEASE);
4344         seq_puts(m, "# -----------------------------------"
4345                  "---------------------------------\n");
4346         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4347                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4348                    nsecs_to_usecs(data->saved_latency),
4349                    entries,
4350                    total,
4351                    buf->cpu,
4352                    preempt_model_none()      ? "server" :
4353                    preempt_model_voluntary() ? "desktop" :
4354                    preempt_model_full()      ? "preempt" :
4355                    preempt_model_rt()        ? "preempt_rt" :
4356                    "unknown",
4357                    /* These are reserved for later use */
4358                    0, 0, 0, 0);
4359 #ifdef CONFIG_SMP
4360         seq_printf(m, " #P:%d)\n", num_online_cpus());
4361 #else
4362         seq_puts(m, ")\n");
4363 #endif
4364         seq_puts(m, "#    -----------------\n");
4365         seq_printf(m, "#    | task: %.16s-%d "
4366                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4367                    data->comm, data->pid,
4368                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4369                    data->policy, data->rt_priority);
4370         seq_puts(m, "#    -----------------\n");
4371
4372         if (data->critical_start) {
4373                 seq_puts(m, "#  => started at: ");
4374                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4375                 trace_print_seq(m, &iter->seq);
4376                 seq_puts(m, "\n#  => ended at:   ");
4377                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4378                 trace_print_seq(m, &iter->seq);
4379                 seq_puts(m, "\n#\n");
4380         }
4381
4382         seq_puts(m, "#\n");
4383 }
4384
4385 static void test_cpu_buff_start(struct trace_iterator *iter)
4386 {
4387         struct trace_seq *s = &iter->seq;
4388         struct trace_array *tr = iter->tr;
4389
4390         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4391                 return;
4392
4393         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4394                 return;
4395
4396         if (cpumask_available(iter->started) &&
4397             cpumask_test_cpu(iter->cpu, iter->started))
4398                 return;
4399
4400         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4401                 return;
4402
4403         if (cpumask_available(iter->started))
4404                 cpumask_set_cpu(iter->cpu, iter->started);
4405
4406         /* Don't print started cpu buffer for the first entry of the trace */
4407         if (iter->idx > 1)
4408                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4409                                 iter->cpu);
4410 }
4411
4412 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4413 {
4414         struct trace_array *tr = iter->tr;
4415         struct trace_seq *s = &iter->seq;
4416         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4417         struct trace_entry *entry;
4418         struct trace_event *event;
4419
4420         entry = iter->ent;
4421
4422         test_cpu_buff_start(iter);
4423
4424         event = ftrace_find_event(entry->type);
4425
4426         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4427                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4428                         trace_print_lat_context(iter);
4429                 else
4430                         trace_print_context(iter);
4431         }
4432
4433         if (trace_seq_has_overflowed(s))
4434                 return TRACE_TYPE_PARTIAL_LINE;
4435
4436         if (event) {
4437                 if (tr->trace_flags & TRACE_ITER_FIELDS)
4438                         return print_event_fields(iter, event);
4439                 return event->funcs->trace(iter, sym_flags, event);
4440         }
4441
4442         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4443
4444         return trace_handle_return(s);
4445 }
4446
4447 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4448 {
4449         struct trace_array *tr = iter->tr;
4450         struct trace_seq *s = &iter->seq;
4451         struct trace_entry *entry;
4452         struct trace_event *event;
4453
4454         entry = iter->ent;
4455
4456         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4457                 trace_seq_printf(s, "%d %d %llu ",
4458                                  entry->pid, iter->cpu, iter->ts);
4459
4460         if (trace_seq_has_overflowed(s))
4461                 return TRACE_TYPE_PARTIAL_LINE;
4462
4463         event = ftrace_find_event(entry->type);
4464         if (event)
4465                 return event->funcs->raw(iter, 0, event);
4466
4467         trace_seq_printf(s, "%d ?\n", entry->type);
4468
4469         return trace_handle_return(s);
4470 }
4471
4472 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4473 {
4474         struct trace_array *tr = iter->tr;
4475         struct trace_seq *s = &iter->seq;
4476         unsigned char newline = '\n';
4477         struct trace_entry *entry;
4478         struct trace_event *event;
4479
4480         entry = iter->ent;
4481
4482         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4483                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4484                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4485                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4486                 if (trace_seq_has_overflowed(s))
4487                         return TRACE_TYPE_PARTIAL_LINE;
4488         }
4489
4490         event = ftrace_find_event(entry->type);
4491         if (event) {
4492                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4493                 if (ret != TRACE_TYPE_HANDLED)
4494                         return ret;
4495         }
4496
4497         SEQ_PUT_FIELD(s, newline);
4498
4499         return trace_handle_return(s);
4500 }
4501
4502 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4503 {
4504         struct trace_array *tr = iter->tr;
4505         struct trace_seq *s = &iter->seq;
4506         struct trace_entry *entry;
4507         struct trace_event *event;
4508
4509         entry = iter->ent;
4510
4511         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4512                 SEQ_PUT_FIELD(s, entry->pid);
4513                 SEQ_PUT_FIELD(s, iter->cpu);
4514                 SEQ_PUT_FIELD(s, iter->ts);
4515                 if (trace_seq_has_overflowed(s))
4516                         return TRACE_TYPE_PARTIAL_LINE;
4517         }
4518
4519         event = ftrace_find_event(entry->type);
4520         return event ? event->funcs->binary(iter, 0, event) :
4521                 TRACE_TYPE_HANDLED;
4522 }
4523
4524 int trace_empty(struct trace_iterator *iter)
4525 {
4526         struct ring_buffer_iter *buf_iter;
4527         int cpu;
4528
4529         /* If we are looking at one CPU buffer, only check that one */
4530         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4531                 cpu = iter->cpu_file;
4532                 buf_iter = trace_buffer_iter(iter, cpu);
4533                 if (buf_iter) {
4534                         if (!ring_buffer_iter_empty(buf_iter))
4535                                 return 0;
4536                 } else {
4537                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4538                                 return 0;
4539                 }
4540                 return 1;
4541         }
4542
4543         for_each_tracing_cpu(cpu) {
4544                 buf_iter = trace_buffer_iter(iter, cpu);
4545                 if (buf_iter) {
4546                         if (!ring_buffer_iter_empty(buf_iter))
4547                                 return 0;
4548                 } else {
4549                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4550                                 return 0;
4551                 }
4552         }
4553
4554         return 1;
4555 }
4556
4557 /*  Called with trace_event_read_lock() held. */
4558 enum print_line_t print_trace_line(struct trace_iterator *iter)
4559 {
4560         struct trace_array *tr = iter->tr;
4561         unsigned long trace_flags = tr->trace_flags;
4562         enum print_line_t ret;
4563
4564         if (iter->lost_events) {
4565                 if (iter->lost_events == (unsigned long)-1)
4566                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4567                                          iter->cpu);
4568                 else
4569                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4570                                          iter->cpu, iter->lost_events);
4571                 if (trace_seq_has_overflowed(&iter->seq))
4572                         return TRACE_TYPE_PARTIAL_LINE;
4573         }
4574
4575         if (iter->trace && iter->trace->print_line) {
4576                 ret = iter->trace->print_line(iter);
4577                 if (ret != TRACE_TYPE_UNHANDLED)
4578                         return ret;
4579         }
4580
4581         if (iter->ent->type == TRACE_BPUTS &&
4582                         trace_flags & TRACE_ITER_PRINTK &&
4583                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4584                 return trace_print_bputs_msg_only(iter);
4585
4586         if (iter->ent->type == TRACE_BPRINT &&
4587                         trace_flags & TRACE_ITER_PRINTK &&
4588                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4589                 return trace_print_bprintk_msg_only(iter);
4590
4591         if (iter->ent->type == TRACE_PRINT &&
4592                         trace_flags & TRACE_ITER_PRINTK &&
4593                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4594                 return trace_print_printk_msg_only(iter);
4595
4596         if (trace_flags & TRACE_ITER_BIN)
4597                 return print_bin_fmt(iter);
4598
4599         if (trace_flags & TRACE_ITER_HEX)
4600                 return print_hex_fmt(iter);
4601
4602         if (trace_flags & TRACE_ITER_RAW)
4603                 return print_raw_fmt(iter);
4604
4605         return print_trace_fmt(iter);
4606 }
4607
4608 void trace_latency_header(struct seq_file *m)
4609 {
4610         struct trace_iterator *iter = m->private;
4611         struct trace_array *tr = iter->tr;
4612
4613         /* print nothing if the buffers are empty */
4614         if (trace_empty(iter))
4615                 return;
4616
4617         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4618                 print_trace_header(m, iter);
4619
4620         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4621                 print_lat_help_header(m);
4622 }
4623
4624 void trace_default_header(struct seq_file *m)
4625 {
4626         struct trace_iterator *iter = m->private;
4627         struct trace_array *tr = iter->tr;
4628         unsigned long trace_flags = tr->trace_flags;
4629
4630         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4631                 return;
4632
4633         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4634                 /* print nothing if the buffers are empty */
4635                 if (trace_empty(iter))
4636                         return;
4637                 print_trace_header(m, iter);
4638                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4639                         print_lat_help_header(m);
4640         } else {
4641                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4642                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4643                                 print_func_help_header_irq(iter->array_buffer,
4644                                                            m, trace_flags);
4645                         else
4646                                 print_func_help_header(iter->array_buffer, m,
4647                                                        trace_flags);
4648                 }
4649         }
4650 }
4651
4652 static void test_ftrace_alive(struct seq_file *m)
4653 {
4654         if (!ftrace_is_dead())
4655                 return;
4656         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4657                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4658 }
4659
4660 #ifdef CONFIG_TRACER_MAX_TRACE
4661 static void show_snapshot_main_help(struct seq_file *m)
4662 {
4663         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4664                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4665                     "#                      Takes a snapshot of the main buffer.\n"
4666                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4667                     "#                      (Doesn't have to be '2' works with any number that\n"
4668                     "#                       is not a '0' or '1')\n");
4669 }
4670
4671 static void show_snapshot_percpu_help(struct seq_file *m)
4672 {
4673         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4674 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4675         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4676                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4677 #else
4678         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4679                     "#                     Must use main snapshot file to allocate.\n");
4680 #endif
4681         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4682                     "#                      (Doesn't have to be '2' works with any number that\n"
4683                     "#                       is not a '0' or '1')\n");
4684 }
4685
4686 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4687 {
4688         if (iter->tr->allocated_snapshot)
4689                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4690         else
4691                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4692
4693         seq_puts(m, "# Snapshot commands:\n");
4694         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4695                 show_snapshot_main_help(m);
4696         else
4697                 show_snapshot_percpu_help(m);
4698 }
4699 #else
4700 /* Should never be called */
4701 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4702 #endif
4703
4704 static int s_show(struct seq_file *m, void *v)
4705 {
4706         struct trace_iterator *iter = v;
4707         int ret;
4708
4709         if (iter->ent == NULL) {
4710                 if (iter->tr) {
4711                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4712                         seq_puts(m, "#\n");
4713                         test_ftrace_alive(m);
4714                 }
4715                 if (iter->snapshot && trace_empty(iter))
4716                         print_snapshot_help(m, iter);
4717                 else if (iter->trace && iter->trace->print_header)
4718                         iter->trace->print_header(m);
4719                 else
4720                         trace_default_header(m);
4721
4722         } else if (iter->leftover) {
4723                 /*
4724                  * If we filled the seq_file buffer earlier, we
4725                  * want to just show it now.
4726                  */
4727                 ret = trace_print_seq(m, &iter->seq);
4728
4729                 /* ret should this time be zero, but you never know */
4730                 iter->leftover = ret;
4731
4732         } else {
4733                 print_trace_line(iter);
4734                 ret = trace_print_seq(m, &iter->seq);
4735                 /*
4736                  * If we overflow the seq_file buffer, then it will
4737                  * ask us for this data again at start up.
4738                  * Use that instead.
4739                  *  ret is 0 if seq_file write succeeded.
4740                  *        -1 otherwise.
4741                  */
4742                 iter->leftover = ret;
4743         }
4744
4745         return 0;
4746 }
4747
4748 /*
4749  * Should be used after trace_array_get(), trace_types_lock
4750  * ensures that i_cdev was already initialized.
4751  */
4752 static inline int tracing_get_cpu(struct inode *inode)
4753 {
4754         if (inode->i_cdev) /* See trace_create_cpu_file() */
4755                 return (long)inode->i_cdev - 1;
4756         return RING_BUFFER_ALL_CPUS;
4757 }
4758
4759 static const struct seq_operations tracer_seq_ops = {
4760         .start          = s_start,
4761         .next           = s_next,
4762         .stop           = s_stop,
4763         .show           = s_show,
4764 };
4765
4766 /*
4767  * Note, as iter itself can be allocated and freed in different
4768  * ways, this function is only used to free its content, and not
4769  * the iterator itself. The only requirement to all the allocations
4770  * is that it must zero all fields (kzalloc), as freeing works with
4771  * ethier allocated content or NULL.
4772  */
4773 static void free_trace_iter_content(struct trace_iterator *iter)
4774 {
4775         /* The fmt is either NULL, allocated or points to static_fmt_buf */
4776         if (iter->fmt != static_fmt_buf)
4777                 kfree(iter->fmt);
4778
4779         kfree(iter->temp);
4780         kfree(iter->buffer_iter);
4781         mutex_destroy(&iter->mutex);
4782         free_cpumask_var(iter->started);
4783 }
4784
4785 static struct trace_iterator *
4786 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4787 {
4788         struct trace_array *tr = inode->i_private;
4789         struct trace_iterator *iter;
4790         int cpu;
4791
4792         if (tracing_disabled)
4793                 return ERR_PTR(-ENODEV);
4794
4795         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4796         if (!iter)
4797                 return ERR_PTR(-ENOMEM);
4798
4799         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4800                                     GFP_KERNEL);
4801         if (!iter->buffer_iter)
4802                 goto release;
4803
4804         /*
4805          * trace_find_next_entry() may need to save off iter->ent.
4806          * It will place it into the iter->temp buffer. As most
4807          * events are less than 128, allocate a buffer of that size.
4808          * If one is greater, then trace_find_next_entry() will
4809          * allocate a new buffer to adjust for the bigger iter->ent.
4810          * It's not critical if it fails to get allocated here.
4811          */
4812         iter->temp = kmalloc(128, GFP_KERNEL);
4813         if (iter->temp)
4814                 iter->temp_size = 128;
4815
4816         /*
4817          * trace_event_printf() may need to modify given format
4818          * string to replace %p with %px so that it shows real address
4819          * instead of hash value. However, that is only for the event
4820          * tracing, other tracer may not need. Defer the allocation
4821          * until it is needed.
4822          */
4823         iter->fmt = NULL;
4824         iter->fmt_size = 0;
4825
4826         mutex_lock(&trace_types_lock);
4827         iter->trace = tr->current_trace;
4828
4829         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4830                 goto fail;
4831
4832         iter->tr = tr;
4833
4834 #ifdef CONFIG_TRACER_MAX_TRACE
4835         /* Currently only the top directory has a snapshot */
4836         if (tr->current_trace->print_max || snapshot)
4837                 iter->array_buffer = &tr->max_buffer;
4838         else
4839 #endif
4840                 iter->array_buffer = &tr->array_buffer;
4841         iter->snapshot = snapshot;
4842         iter->pos = -1;
4843         iter->cpu_file = tracing_get_cpu(inode);
4844         mutex_init(&iter->mutex);
4845
4846         /* Notify the tracer early; before we stop tracing. */
4847         if (iter->trace->open)
4848                 iter->trace->open(iter);
4849
4850         /* Annotate start of buffers if we had overruns */
4851         if (ring_buffer_overruns(iter->array_buffer->buffer))
4852                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4853
4854         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4855         if (trace_clocks[tr->clock_id].in_ns)
4856                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4857
4858         /*
4859          * If pause-on-trace is enabled, then stop the trace while
4860          * dumping, unless this is the "snapshot" file
4861          */
4862         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4863                 tracing_stop_tr(tr);
4864
4865         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4866                 for_each_tracing_cpu(cpu) {
4867                         iter->buffer_iter[cpu] =
4868                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4869                                                          cpu, GFP_KERNEL);
4870                 }
4871                 ring_buffer_read_prepare_sync();
4872                 for_each_tracing_cpu(cpu) {
4873                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4874                         tracing_iter_reset(iter, cpu);
4875                 }
4876         } else {
4877                 cpu = iter->cpu_file;
4878                 iter->buffer_iter[cpu] =
4879                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4880                                                  cpu, GFP_KERNEL);
4881                 ring_buffer_read_prepare_sync();
4882                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4883                 tracing_iter_reset(iter, cpu);
4884         }
4885
4886         mutex_unlock(&trace_types_lock);
4887
4888         return iter;
4889
4890  fail:
4891         mutex_unlock(&trace_types_lock);
4892         free_trace_iter_content(iter);
4893 release:
4894         seq_release_private(inode, file);
4895         return ERR_PTR(-ENOMEM);
4896 }
4897
4898 int tracing_open_generic(struct inode *inode, struct file *filp)
4899 {
4900         int ret;
4901
4902         ret = tracing_check_open_get_tr(NULL);
4903         if (ret)
4904                 return ret;
4905
4906         filp->private_data = inode->i_private;
4907         return 0;
4908 }
4909
4910 bool tracing_is_disabled(void)
4911 {
4912         return (tracing_disabled) ? true: false;
4913 }
4914
4915 /*
4916  * Open and update trace_array ref count.
4917  * Must have the current trace_array passed to it.
4918  */
4919 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4920 {
4921         struct trace_array *tr = inode->i_private;
4922         int ret;
4923
4924         ret = tracing_check_open_get_tr(tr);
4925         if (ret)
4926                 return ret;
4927
4928         filp->private_data = inode->i_private;
4929
4930         return 0;
4931 }
4932
4933 /*
4934  * The private pointer of the inode is the trace_event_file.
4935  * Update the tr ref count associated to it.
4936  */
4937 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4938 {
4939         struct trace_event_file *file = inode->i_private;
4940         int ret;
4941
4942         ret = tracing_check_open_get_tr(file->tr);
4943         if (ret)
4944                 return ret;
4945
4946         mutex_lock(&event_mutex);
4947
4948         /* Fail if the file is marked for removal */
4949         if (file->flags & EVENT_FILE_FL_FREED) {
4950                 trace_array_put(file->tr);
4951                 ret = -ENODEV;
4952         } else {
4953                 event_file_get(file);
4954         }
4955
4956         mutex_unlock(&event_mutex);
4957         if (ret)
4958                 return ret;
4959
4960         filp->private_data = inode->i_private;
4961
4962         return 0;
4963 }
4964
4965 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4966 {
4967         struct trace_event_file *file = inode->i_private;
4968
4969         trace_array_put(file->tr);
4970         event_file_put(file);
4971
4972         return 0;
4973 }
4974
4975 static int tracing_mark_open(struct inode *inode, struct file *filp)
4976 {
4977         stream_open(inode, filp);
4978         return tracing_open_generic_tr(inode, filp);
4979 }
4980
4981 static int tracing_release(struct inode *inode, struct file *file)
4982 {
4983         struct trace_array *tr = inode->i_private;
4984         struct seq_file *m = file->private_data;
4985         struct trace_iterator *iter;
4986         int cpu;
4987
4988         if (!(file->f_mode & FMODE_READ)) {
4989                 trace_array_put(tr);
4990                 return 0;
4991         }
4992
4993         /* Writes do not use seq_file */
4994         iter = m->private;
4995         mutex_lock(&trace_types_lock);
4996
4997         for_each_tracing_cpu(cpu) {
4998                 if (iter->buffer_iter[cpu])
4999                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
5000         }
5001
5002         if (iter->trace && iter->trace->close)
5003                 iter->trace->close(iter);
5004
5005         if (!iter->snapshot && tr->stop_count)
5006                 /* reenable tracing if it was previously enabled */
5007                 tracing_start_tr(tr);
5008
5009         __trace_array_put(tr);
5010
5011         mutex_unlock(&trace_types_lock);
5012
5013         free_trace_iter_content(iter);
5014         seq_release_private(inode, file);
5015
5016         return 0;
5017 }
5018
5019 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
5020 {
5021         struct trace_array *tr = inode->i_private;
5022
5023         trace_array_put(tr);
5024         return 0;
5025 }
5026
5027 static int tracing_single_release_tr(struct inode *inode, struct file *file)
5028 {
5029         struct trace_array *tr = inode->i_private;
5030
5031         trace_array_put(tr);
5032
5033         return single_release(inode, file);
5034 }
5035
5036 static int tracing_open(struct inode *inode, struct file *file)
5037 {
5038         struct trace_array *tr = inode->i_private;
5039         struct trace_iterator *iter;
5040         int ret;
5041
5042         ret = tracing_check_open_get_tr(tr);
5043         if (ret)
5044                 return ret;
5045
5046         /* If this file was open for write, then erase contents */
5047         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5048                 int cpu = tracing_get_cpu(inode);
5049                 struct array_buffer *trace_buf = &tr->array_buffer;
5050
5051 #ifdef CONFIG_TRACER_MAX_TRACE
5052                 if (tr->current_trace->print_max)
5053                         trace_buf = &tr->max_buffer;
5054 #endif
5055
5056                 if (cpu == RING_BUFFER_ALL_CPUS)
5057                         tracing_reset_online_cpus(trace_buf);
5058                 else
5059                         tracing_reset_cpu(trace_buf, cpu);
5060         }
5061
5062         if (file->f_mode & FMODE_READ) {
5063                 iter = __tracing_open(inode, file, false);
5064                 if (IS_ERR(iter))
5065                         ret = PTR_ERR(iter);
5066                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5067                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
5068         }
5069
5070         if (ret < 0)
5071                 trace_array_put(tr);
5072
5073         return ret;
5074 }
5075
5076 /*
5077  * Some tracers are not suitable for instance buffers.
5078  * A tracer is always available for the global array (toplevel)
5079  * or if it explicitly states that it is.
5080  */
5081 static bool
5082 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5083 {
5084         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5085 }
5086
5087 /* Find the next tracer that this trace array may use */
5088 static struct tracer *
5089 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5090 {
5091         while (t && !trace_ok_for_array(t, tr))
5092                 t = t->next;
5093
5094         return t;
5095 }
5096
5097 static void *
5098 t_next(struct seq_file *m, void *v, loff_t *pos)
5099 {
5100         struct trace_array *tr = m->private;
5101         struct tracer *t = v;
5102
5103         (*pos)++;
5104
5105         if (t)
5106                 t = get_tracer_for_array(tr, t->next);
5107
5108         return t;
5109 }
5110
5111 static void *t_start(struct seq_file *m, loff_t *pos)
5112 {
5113         struct trace_array *tr = m->private;
5114         struct tracer *t;
5115         loff_t l = 0;
5116
5117         mutex_lock(&trace_types_lock);
5118
5119         t = get_tracer_for_array(tr, trace_types);
5120         for (; t && l < *pos; t = t_next(m, t, &l))
5121                         ;
5122
5123         return t;
5124 }
5125
5126 static void t_stop(struct seq_file *m, void *p)
5127 {
5128         mutex_unlock(&trace_types_lock);
5129 }
5130
5131 static int t_show(struct seq_file *m, void *v)
5132 {
5133         struct tracer *t = v;
5134
5135         if (!t)
5136                 return 0;
5137
5138         seq_puts(m, t->name);
5139         if (t->next)
5140                 seq_putc(m, ' ');
5141         else
5142                 seq_putc(m, '\n');
5143
5144         return 0;
5145 }
5146
5147 static const struct seq_operations show_traces_seq_ops = {
5148         .start          = t_start,
5149         .next           = t_next,
5150         .stop           = t_stop,
5151         .show           = t_show,
5152 };
5153
5154 static int show_traces_open(struct inode *inode, struct file *file)
5155 {
5156         struct trace_array *tr = inode->i_private;
5157         struct seq_file *m;
5158         int ret;
5159
5160         ret = tracing_check_open_get_tr(tr);
5161         if (ret)
5162                 return ret;
5163
5164         ret = seq_open(file, &show_traces_seq_ops);
5165         if (ret) {
5166                 trace_array_put(tr);
5167                 return ret;
5168         }
5169
5170         m = file->private_data;
5171         m->private = tr;
5172
5173         return 0;
5174 }
5175
5176 static int show_traces_release(struct inode *inode, struct file *file)
5177 {
5178         struct trace_array *tr = inode->i_private;
5179
5180         trace_array_put(tr);
5181         return seq_release(inode, file);
5182 }
5183
5184 static ssize_t
5185 tracing_write_stub(struct file *filp, const char __user *ubuf,
5186                    size_t count, loff_t *ppos)
5187 {
5188         return count;
5189 }
5190
5191 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5192 {
5193         int ret;
5194
5195         if (file->f_mode & FMODE_READ)
5196                 ret = seq_lseek(file, offset, whence);
5197         else
5198                 file->f_pos = ret = 0;
5199
5200         return ret;
5201 }
5202
5203 static const struct file_operations tracing_fops = {
5204         .open           = tracing_open,
5205         .read           = seq_read,
5206         .read_iter      = seq_read_iter,
5207         .splice_read    = copy_splice_read,
5208         .write          = tracing_write_stub,
5209         .llseek         = tracing_lseek,
5210         .release        = tracing_release,
5211 };
5212
5213 static const struct file_operations show_traces_fops = {
5214         .open           = show_traces_open,
5215         .read           = seq_read,
5216         .llseek         = seq_lseek,
5217         .release        = show_traces_release,
5218 };
5219
5220 static ssize_t
5221 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5222                      size_t count, loff_t *ppos)
5223 {
5224         struct trace_array *tr = file_inode(filp)->i_private;
5225         char *mask_str;
5226         int len;
5227
5228         len = snprintf(NULL, 0, "%*pb\n",
5229                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5230         mask_str = kmalloc(len, GFP_KERNEL);
5231         if (!mask_str)
5232                 return -ENOMEM;
5233
5234         len = snprintf(mask_str, len, "%*pb\n",
5235                        cpumask_pr_args(tr->tracing_cpumask));
5236         if (len >= count) {
5237                 count = -EINVAL;
5238                 goto out_err;
5239         }
5240         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5241
5242 out_err:
5243         kfree(mask_str);
5244
5245         return count;
5246 }
5247
5248 int tracing_set_cpumask(struct trace_array *tr,
5249                         cpumask_var_t tracing_cpumask_new)
5250 {
5251         int cpu;
5252
5253         if (!tr)
5254                 return -EINVAL;
5255
5256         local_irq_disable();
5257         arch_spin_lock(&tr->max_lock);
5258         for_each_tracing_cpu(cpu) {
5259                 /*
5260                  * Increase/decrease the disabled counter if we are
5261                  * about to flip a bit in the cpumask:
5262                  */
5263                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5264                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5265                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5266                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5267 #ifdef CONFIG_TRACER_MAX_TRACE
5268                         ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5269 #endif
5270                 }
5271                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5272                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5273                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5274                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5275 #ifdef CONFIG_TRACER_MAX_TRACE
5276                         ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5277 #endif
5278                 }
5279         }
5280         arch_spin_unlock(&tr->max_lock);
5281         local_irq_enable();
5282
5283         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5284
5285         return 0;
5286 }
5287
5288 static ssize_t
5289 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5290                       size_t count, loff_t *ppos)
5291 {
5292         struct trace_array *tr = file_inode(filp)->i_private;
5293         cpumask_var_t tracing_cpumask_new;
5294         int err;
5295
5296         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5297                 return -ENOMEM;
5298
5299         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5300         if (err)
5301                 goto err_free;
5302
5303         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5304         if (err)
5305                 goto err_free;
5306
5307         free_cpumask_var(tracing_cpumask_new);
5308
5309         return count;
5310
5311 err_free:
5312         free_cpumask_var(tracing_cpumask_new);
5313
5314         return err;
5315 }
5316
5317 static const struct file_operations tracing_cpumask_fops = {
5318         .open           = tracing_open_generic_tr,
5319         .read           = tracing_cpumask_read,
5320         .write          = tracing_cpumask_write,
5321         .release        = tracing_release_generic_tr,
5322         .llseek         = generic_file_llseek,
5323 };
5324
5325 static int tracing_trace_options_show(struct seq_file *m, void *v)
5326 {
5327         struct tracer_opt *trace_opts;
5328         struct trace_array *tr = m->private;
5329         u32 tracer_flags;
5330         int i;
5331
5332         mutex_lock(&trace_types_lock);
5333         tracer_flags = tr->current_trace->flags->val;
5334         trace_opts = tr->current_trace->flags->opts;
5335
5336         for (i = 0; trace_options[i]; i++) {
5337                 if (tr->trace_flags & (1 << i))
5338                         seq_printf(m, "%s\n", trace_options[i]);
5339                 else
5340                         seq_printf(m, "no%s\n", trace_options[i]);
5341         }
5342
5343         for (i = 0; trace_opts[i].name; i++) {
5344                 if (tracer_flags & trace_opts[i].bit)
5345                         seq_printf(m, "%s\n", trace_opts[i].name);
5346                 else
5347                         seq_printf(m, "no%s\n", trace_opts[i].name);
5348         }
5349         mutex_unlock(&trace_types_lock);
5350
5351         return 0;
5352 }
5353
5354 static int __set_tracer_option(struct trace_array *tr,
5355                                struct tracer_flags *tracer_flags,
5356                                struct tracer_opt *opts, int neg)
5357 {
5358         struct tracer *trace = tracer_flags->trace;
5359         int ret;
5360
5361         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5362         if (ret)
5363                 return ret;
5364
5365         if (neg)
5366                 tracer_flags->val &= ~opts->bit;
5367         else
5368                 tracer_flags->val |= opts->bit;
5369         return 0;
5370 }
5371
5372 /* Try to assign a tracer specific option */
5373 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5374 {
5375         struct tracer *trace = tr->current_trace;
5376         struct tracer_flags *tracer_flags = trace->flags;
5377         struct tracer_opt *opts = NULL;
5378         int i;
5379
5380         for (i = 0; tracer_flags->opts[i].name; i++) {
5381                 opts = &tracer_flags->opts[i];
5382
5383                 if (strcmp(cmp, opts->name) == 0)
5384                         return __set_tracer_option(tr, trace->flags, opts, neg);
5385         }
5386
5387         return -EINVAL;
5388 }
5389
5390 /* Some tracers require overwrite to stay enabled */
5391 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5392 {
5393         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5394                 return -1;
5395
5396         return 0;
5397 }
5398
5399 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5400 {
5401         int *map;
5402
5403         if ((mask == TRACE_ITER_RECORD_TGID) ||
5404             (mask == TRACE_ITER_RECORD_CMD))
5405                 lockdep_assert_held(&event_mutex);
5406
5407         /* do nothing if flag is already set */
5408         if (!!(tr->trace_flags & mask) == !!enabled)
5409                 return 0;
5410
5411         /* Give the tracer a chance to approve the change */
5412         if (tr->current_trace->flag_changed)
5413                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5414                         return -EINVAL;
5415
5416         if (enabled)
5417                 tr->trace_flags |= mask;
5418         else
5419                 tr->trace_flags &= ~mask;
5420
5421         if (mask == TRACE_ITER_RECORD_CMD)
5422                 trace_event_enable_cmd_record(enabled);
5423
5424         if (mask == TRACE_ITER_RECORD_TGID) {
5425                 if (!tgid_map) {
5426                         tgid_map_max = pid_max;
5427                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5428                                        GFP_KERNEL);
5429
5430                         /*
5431                          * Pairs with smp_load_acquire() in
5432                          * trace_find_tgid_ptr() to ensure that if it observes
5433                          * the tgid_map we just allocated then it also observes
5434                          * the corresponding tgid_map_max value.
5435                          */
5436                         smp_store_release(&tgid_map, map);
5437                 }
5438                 if (!tgid_map) {
5439                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5440                         return -ENOMEM;
5441                 }
5442
5443                 trace_event_enable_tgid_record(enabled);
5444         }
5445
5446         if (mask == TRACE_ITER_EVENT_FORK)
5447                 trace_event_follow_fork(tr, enabled);
5448
5449         if (mask == TRACE_ITER_FUNC_FORK)
5450                 ftrace_pid_follow_fork(tr, enabled);
5451
5452         if (mask == TRACE_ITER_OVERWRITE) {
5453                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5454 #ifdef CONFIG_TRACER_MAX_TRACE
5455                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5456 #endif
5457         }
5458
5459         if (mask == TRACE_ITER_PRINTK) {
5460                 trace_printk_start_stop_comm(enabled);
5461                 trace_printk_control(enabled);
5462         }
5463
5464         return 0;
5465 }
5466
5467 int trace_set_options(struct trace_array *tr, char *option)
5468 {
5469         char *cmp;
5470         int neg = 0;
5471         int ret;
5472         size_t orig_len = strlen(option);
5473         int len;
5474
5475         cmp = strstrip(option);
5476
5477         len = str_has_prefix(cmp, "no");
5478         if (len)
5479                 neg = 1;
5480
5481         cmp += len;
5482
5483         mutex_lock(&event_mutex);
5484         mutex_lock(&trace_types_lock);
5485
5486         ret = match_string(trace_options, -1, cmp);
5487         /* If no option could be set, test the specific tracer options */
5488         if (ret < 0)
5489                 ret = set_tracer_option(tr, cmp, neg);
5490         else
5491                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5492
5493         mutex_unlock(&trace_types_lock);
5494         mutex_unlock(&event_mutex);
5495
5496         /*
5497          * If the first trailing whitespace is replaced with '\0' by strstrip,
5498          * turn it back into a space.
5499          */
5500         if (orig_len > strlen(option))
5501                 option[strlen(option)] = ' ';
5502
5503         return ret;
5504 }
5505
5506 static void __init apply_trace_boot_options(void)
5507 {
5508         char *buf = trace_boot_options_buf;
5509         char *option;
5510
5511         while (true) {
5512                 option = strsep(&buf, ",");
5513
5514                 if (!option)
5515                         break;
5516
5517                 if (*option)
5518                         trace_set_options(&global_trace, option);
5519
5520                 /* Put back the comma to allow this to be called again */
5521                 if (buf)
5522                         *(buf - 1) = ',';
5523         }
5524 }
5525
5526 static ssize_t
5527 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5528                         size_t cnt, loff_t *ppos)
5529 {
5530         struct seq_file *m = filp->private_data;
5531         struct trace_array *tr = m->private;
5532         char buf[64];
5533         int ret;
5534
5535         if (cnt >= sizeof(buf))
5536                 return -EINVAL;
5537
5538         if (copy_from_user(buf, ubuf, cnt))
5539                 return -EFAULT;
5540
5541         buf[cnt] = 0;
5542
5543         ret = trace_set_options(tr, buf);
5544         if (ret < 0)
5545                 return ret;
5546
5547         *ppos += cnt;
5548
5549         return cnt;
5550 }
5551
5552 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5553 {
5554         struct trace_array *tr = inode->i_private;
5555         int ret;
5556
5557         ret = tracing_check_open_get_tr(tr);
5558         if (ret)
5559                 return ret;
5560
5561         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5562         if (ret < 0)
5563                 trace_array_put(tr);
5564
5565         return ret;
5566 }
5567
5568 static const struct file_operations tracing_iter_fops = {
5569         .open           = tracing_trace_options_open,
5570         .read           = seq_read,
5571         .llseek         = seq_lseek,
5572         .release        = tracing_single_release_tr,
5573         .write          = tracing_trace_options_write,
5574 };
5575
5576 static const char readme_msg[] =
5577         "tracing mini-HOWTO:\n\n"
5578         "# echo 0 > tracing_on : quick way to disable tracing\n"
5579         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5580         " Important files:\n"
5581         "  trace\t\t\t- The static contents of the buffer\n"
5582         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5583         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5584         "  current_tracer\t- function and latency tracers\n"
5585         "  available_tracers\t- list of configured tracers for current_tracer\n"
5586         "  error_log\t- error log for failed commands (that support it)\n"
5587         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5588         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5589         "  trace_clock\t\t- change the clock used to order events\n"
5590         "       local:   Per cpu clock but may not be synced across CPUs\n"
5591         "      global:   Synced across CPUs but slows tracing down.\n"
5592         "     counter:   Not a clock, but just an increment\n"
5593         "      uptime:   Jiffy counter from time of boot\n"
5594         "        perf:   Same clock that perf events use\n"
5595 #ifdef CONFIG_X86_64
5596         "     x86-tsc:   TSC cycle counter\n"
5597 #endif
5598         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5599         "       delta:   Delta difference against a buffer-wide timestamp\n"
5600         "    absolute:   Absolute (standalone) timestamp\n"
5601         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5602         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5603         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5604         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5605         "\t\t\t  Remove sub-buffer with rmdir\n"
5606         "  trace_options\t\t- Set format or modify how tracing happens\n"
5607         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5608         "\t\t\t  option name\n"
5609         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5610 #ifdef CONFIG_DYNAMIC_FTRACE
5611         "\n  available_filter_functions - list of functions that can be filtered on\n"
5612         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5613         "\t\t\t  functions\n"
5614         "\t     accepts: func_full_name or glob-matching-pattern\n"
5615         "\t     modules: Can select a group via module\n"
5616         "\t      Format: :mod:<module-name>\n"
5617         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5618         "\t    triggers: a command to perform when function is hit\n"
5619         "\t      Format: <function>:<trigger>[:count]\n"
5620         "\t     trigger: traceon, traceoff\n"
5621         "\t\t      enable_event:<system>:<event>\n"
5622         "\t\t      disable_event:<system>:<event>\n"
5623 #ifdef CONFIG_STACKTRACE
5624         "\t\t      stacktrace\n"
5625 #endif
5626 #ifdef CONFIG_TRACER_SNAPSHOT
5627         "\t\t      snapshot\n"
5628 #endif
5629         "\t\t      dump\n"
5630         "\t\t      cpudump\n"
5631         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5632         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5633         "\t     The first one will disable tracing every time do_fault is hit\n"
5634         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5635         "\t       The first time do trap is hit and it disables tracing, the\n"
5636         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5637         "\t       the counter will not decrement. It only decrements when the\n"
5638         "\t       trigger did work\n"
5639         "\t     To remove trigger without count:\n"
5640         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5641         "\t     To remove trigger with a count:\n"
5642         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5643         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5644         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5645         "\t    modules: Can select a group via module command :mod:\n"
5646         "\t    Does not accept triggers\n"
5647 #endif /* CONFIG_DYNAMIC_FTRACE */
5648 #ifdef CONFIG_FUNCTION_TRACER
5649         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5650         "\t\t    (function)\n"
5651         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5652         "\t\t    (function)\n"
5653 #endif
5654 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5655         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5656         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5657         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5658 #endif
5659 #ifdef CONFIG_TRACER_SNAPSHOT
5660         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5661         "\t\t\t  snapshot buffer. Read the contents for more\n"
5662         "\t\t\t  information\n"
5663 #endif
5664 #ifdef CONFIG_STACK_TRACER
5665         "  stack_trace\t\t- Shows the max stack trace when active\n"
5666         "  stack_max_size\t- Shows current max stack size that was traced\n"
5667         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5668         "\t\t\t  new trace)\n"
5669 #ifdef CONFIG_DYNAMIC_FTRACE
5670         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5671         "\t\t\t  traces\n"
5672 #endif
5673 #endif /* CONFIG_STACK_TRACER */
5674 #ifdef CONFIG_DYNAMIC_EVENTS
5675         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5676         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5677 #endif
5678 #ifdef CONFIG_KPROBE_EVENTS
5679         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5680         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5681 #endif
5682 #ifdef CONFIG_UPROBE_EVENTS
5683         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5684         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5685 #endif
5686 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5687     defined(CONFIG_FPROBE_EVENTS)
5688         "\t  accepts: event-definitions (one definition per line)\n"
5689 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5690         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5691         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5692 #endif
5693 #ifdef CONFIG_FPROBE_EVENTS
5694         "\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5695         "\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5696 #endif
5697 #ifdef CONFIG_HIST_TRIGGERS
5698         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5699 #endif
5700         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5701         "\t           -:[<group>/][<event>]\n"
5702 #ifdef CONFIG_KPROBE_EVENTS
5703         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5704   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5705 #endif
5706 #ifdef CONFIG_UPROBE_EVENTS
5707   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5708 #endif
5709         "\t     args: <name>=fetcharg[:type]\n"
5710         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5711 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5712 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5713         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5714         "\t           <argname>[->field[->field|.field...]],\n"
5715 #else
5716         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5717 #endif
5718 #else
5719         "\t           $stack<index>, $stack, $retval, $comm,\n"
5720 #endif
5721         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5722         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5723         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5724         "\t           symstr, <type>\\[<array-size>\\]\n"
5725 #ifdef CONFIG_HIST_TRIGGERS
5726         "\t    field: <stype> <name>;\n"
5727         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5728         "\t           [unsigned] char/int/long\n"
5729 #endif
5730         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5731         "\t            of the <attached-group>/<attached-event>.\n"
5732 #endif
5733         "  events/\t\t- Directory containing all trace event subsystems:\n"
5734         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5735         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5736         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5737         "\t\t\t  events\n"
5738         "      filter\t\t- If set, only events passing filter are traced\n"
5739         "  events/<system>/<event>/\t- Directory containing control files for\n"
5740         "\t\t\t  <event>:\n"
5741         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5742         "      filter\t\t- If set, only events passing filter are traced\n"
5743         "      trigger\t\t- If set, a command to perform when event is hit\n"
5744         "\t    Format: <trigger>[:count][if <filter>]\n"
5745         "\t   trigger: traceon, traceoff\n"
5746         "\t            enable_event:<system>:<event>\n"
5747         "\t            disable_event:<system>:<event>\n"
5748 #ifdef CONFIG_HIST_TRIGGERS
5749         "\t            enable_hist:<system>:<event>\n"
5750         "\t            disable_hist:<system>:<event>\n"
5751 #endif
5752 #ifdef CONFIG_STACKTRACE
5753         "\t\t    stacktrace\n"
5754 #endif
5755 #ifdef CONFIG_TRACER_SNAPSHOT
5756         "\t\t    snapshot\n"
5757 #endif
5758 #ifdef CONFIG_HIST_TRIGGERS
5759         "\t\t    hist (see below)\n"
5760 #endif
5761         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5762         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5763         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5764         "\t                  events/block/block_unplug/trigger\n"
5765         "\t   The first disables tracing every time block_unplug is hit.\n"
5766         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5767         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5768         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5769         "\t   Like function triggers, the counter is only decremented if it\n"
5770         "\t    enabled or disabled tracing.\n"
5771         "\t   To remove a trigger without a count:\n"
5772         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5773         "\t   To remove a trigger with a count:\n"
5774         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5775         "\t   Filters can be ignored when removing a trigger.\n"
5776 #ifdef CONFIG_HIST_TRIGGERS
5777         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5778         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5779         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5780         "\t            [:values=<field1[,field2,...]>]\n"
5781         "\t            [:sort=<field1[,field2,...]>]\n"
5782         "\t            [:size=#entries]\n"
5783         "\t            [:pause][:continue][:clear]\n"
5784         "\t            [:name=histname1]\n"
5785         "\t            [:nohitcount]\n"
5786         "\t            [:<handler>.<action>]\n"
5787         "\t            [if <filter>]\n\n"
5788         "\t    Note, special fields can be used as well:\n"
5789         "\t            common_timestamp - to record current timestamp\n"
5790         "\t            common_cpu - to record the CPU the event happened on\n"
5791         "\n"
5792         "\t    A hist trigger variable can be:\n"
5793         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5794         "\t        - a reference to another variable e.g. y=$x,\n"
5795         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5796         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5797         "\n"
5798         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5799         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5800         "\t    variable reference, field or numeric literal.\n"
5801         "\n"
5802         "\t    When a matching event is hit, an entry is added to a hash\n"
5803         "\t    table using the key(s) and value(s) named, and the value of a\n"
5804         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5805         "\t    correspond to fields in the event's format description.  Keys\n"
5806         "\t    can be any field, or the special string 'common_stacktrace'.\n"
5807         "\t    Compound keys consisting of up to two fields can be specified\n"
5808         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5809         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5810         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5811         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5812         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5813         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5814         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5815         "\t    its histogram data will be shared with other triggers of the\n"
5816         "\t    same name, and trigger hits will update this common data.\n\n"
5817         "\t    Reading the 'hist' file for the event will dump the hash\n"
5818         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5819         "\t    triggers attached to an event, there will be a table for each\n"
5820         "\t    trigger in the output.  The table displayed for a named\n"
5821         "\t    trigger will be the same as any other instance having the\n"
5822         "\t    same name.  The default format used to display a given field\n"
5823         "\t    can be modified by appending any of the following modifiers\n"
5824         "\t    to the field name, as applicable:\n\n"
5825         "\t            .hex        display a number as a hex value\n"
5826         "\t            .sym        display an address as a symbol\n"
5827         "\t            .sym-offset display an address as a symbol and offset\n"
5828         "\t            .execname   display a common_pid as a program name\n"
5829         "\t            .syscall    display a syscall id as a syscall name\n"
5830         "\t            .log2       display log2 value rather than raw number\n"
5831         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5832         "\t            .usecs      display a common_timestamp in microseconds\n"
5833         "\t            .percent    display a number of percentage value\n"
5834         "\t            .graph      display a bar-graph of a value\n\n"
5835         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5836         "\t    trigger or to start a hist trigger but not log any events\n"
5837         "\t    until told to do so.  'continue' can be used to start or\n"
5838         "\t    restart a paused hist trigger.\n\n"
5839         "\t    The 'clear' parameter will clear the contents of a running\n"
5840         "\t    hist trigger and leave its current paused/active state\n"
5841         "\t    unchanged.\n\n"
5842         "\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5843         "\t    raw hitcount in the histogram.\n\n"
5844         "\t    The enable_hist and disable_hist triggers can be used to\n"
5845         "\t    have one event conditionally start and stop another event's\n"
5846         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5847         "\t    the enable_event and disable_event triggers.\n\n"
5848         "\t    Hist trigger handlers and actions are executed whenever a\n"
5849         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5850         "\t        <handler>.<action>\n\n"
5851         "\t    The available handlers are:\n\n"
5852         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5853         "\t        onmax(var)               - invoke if var exceeds current max\n"
5854         "\t        onchange(var)            - invoke action if var changes\n\n"
5855         "\t    The available actions are:\n\n"
5856         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5857         "\t        save(field,...)                      - save current event fields\n"
5858 #ifdef CONFIG_TRACER_SNAPSHOT
5859         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5860 #endif
5861 #ifdef CONFIG_SYNTH_EVENTS
5862         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5863         "\t  Write into this file to define/undefine new synthetic events.\n"
5864         "\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5865 #endif
5866 #endif
5867 ;
5868
5869 static ssize_t
5870 tracing_readme_read(struct file *filp, char __user *ubuf,
5871                        size_t cnt, loff_t *ppos)
5872 {
5873         return simple_read_from_buffer(ubuf, cnt, ppos,
5874                                         readme_msg, strlen(readme_msg));
5875 }
5876
5877 static const struct file_operations tracing_readme_fops = {
5878         .open           = tracing_open_generic,
5879         .read           = tracing_readme_read,
5880         .llseek         = generic_file_llseek,
5881 };
5882
5883 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5884 {
5885         int pid = ++(*pos);
5886
5887         return trace_find_tgid_ptr(pid);
5888 }
5889
5890 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5891 {
5892         int pid = *pos;
5893
5894         return trace_find_tgid_ptr(pid);
5895 }
5896
5897 static void saved_tgids_stop(struct seq_file *m, void *v)
5898 {
5899 }
5900
5901 static int saved_tgids_show(struct seq_file *m, void *v)
5902 {
5903         int *entry = (int *)v;
5904         int pid = entry - tgid_map;
5905         int tgid = *entry;
5906
5907         if (tgid == 0)
5908                 return SEQ_SKIP;
5909
5910         seq_printf(m, "%d %d\n", pid, tgid);
5911         return 0;
5912 }
5913
5914 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5915         .start          = saved_tgids_start,
5916         .stop           = saved_tgids_stop,
5917         .next           = saved_tgids_next,
5918         .show           = saved_tgids_show,
5919 };
5920
5921 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5922 {
5923         int ret;
5924
5925         ret = tracing_check_open_get_tr(NULL);
5926         if (ret)
5927                 return ret;
5928
5929         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5930 }
5931
5932
5933 static const struct file_operations tracing_saved_tgids_fops = {
5934         .open           = tracing_saved_tgids_open,
5935         .read           = seq_read,
5936         .llseek         = seq_lseek,
5937         .release        = seq_release,
5938 };
5939
5940 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5941 {
5942         unsigned int *ptr = v;
5943
5944         if (*pos || m->count)
5945                 ptr++;
5946
5947         (*pos)++;
5948
5949         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5950              ptr++) {
5951                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5952                         continue;
5953
5954                 return ptr;
5955         }
5956
5957         return NULL;
5958 }
5959
5960 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5961 {
5962         void *v;
5963         loff_t l = 0;
5964
5965         preempt_disable();
5966         arch_spin_lock(&trace_cmdline_lock);
5967
5968         v = &savedcmd->map_cmdline_to_pid[0];
5969         while (l <= *pos) {
5970                 v = saved_cmdlines_next(m, v, &l);
5971                 if (!v)
5972                         return NULL;
5973         }
5974
5975         return v;
5976 }
5977
5978 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5979 {
5980         arch_spin_unlock(&trace_cmdline_lock);
5981         preempt_enable();
5982 }
5983
5984 static int saved_cmdlines_show(struct seq_file *m, void *v)
5985 {
5986         char buf[TASK_COMM_LEN];
5987         unsigned int *pid = v;
5988
5989         __trace_find_cmdline(*pid, buf);
5990         seq_printf(m, "%d %s\n", *pid, buf);
5991         return 0;
5992 }
5993
5994 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5995         .start          = saved_cmdlines_start,
5996         .next           = saved_cmdlines_next,
5997         .stop           = saved_cmdlines_stop,
5998         .show           = saved_cmdlines_show,
5999 };
6000
6001 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
6002 {
6003         int ret;
6004
6005         ret = tracing_check_open_get_tr(NULL);
6006         if (ret)
6007                 return ret;
6008
6009         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
6010 }
6011
6012 static const struct file_operations tracing_saved_cmdlines_fops = {
6013         .open           = tracing_saved_cmdlines_open,
6014         .read           = seq_read,
6015         .llseek         = seq_lseek,
6016         .release        = seq_release,
6017 };
6018
6019 static ssize_t
6020 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
6021                                  size_t cnt, loff_t *ppos)
6022 {
6023         char buf[64];
6024         int r;
6025
6026         preempt_disable();
6027         arch_spin_lock(&trace_cmdline_lock);
6028         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
6029         arch_spin_unlock(&trace_cmdline_lock);
6030         preempt_enable();
6031
6032         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6033 }
6034
6035 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
6036 {
6037         kfree(s->saved_cmdlines);
6038         kfree(s->map_cmdline_to_pid);
6039         kfree(s);
6040 }
6041
6042 static int tracing_resize_saved_cmdlines(unsigned int val)
6043 {
6044         struct saved_cmdlines_buffer *s, *savedcmd_temp;
6045
6046         s = kmalloc(sizeof(*s), GFP_KERNEL);
6047         if (!s)
6048                 return -ENOMEM;
6049
6050         if (allocate_cmdlines_buffer(val, s) < 0) {
6051                 kfree(s);
6052                 return -ENOMEM;
6053         }
6054
6055         preempt_disable();
6056         arch_spin_lock(&trace_cmdline_lock);
6057         savedcmd_temp = savedcmd;
6058         savedcmd = s;
6059         arch_spin_unlock(&trace_cmdline_lock);
6060         preempt_enable();
6061         free_saved_cmdlines_buffer(savedcmd_temp);
6062
6063         return 0;
6064 }
6065
6066 static ssize_t
6067 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6068                                   size_t cnt, loff_t *ppos)
6069 {
6070         unsigned long val;
6071         int ret;
6072
6073         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6074         if (ret)
6075                 return ret;
6076
6077         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
6078         if (!val || val > PID_MAX_DEFAULT)
6079                 return -EINVAL;
6080
6081         ret = tracing_resize_saved_cmdlines((unsigned int)val);
6082         if (ret < 0)
6083                 return ret;
6084
6085         *ppos += cnt;
6086
6087         return cnt;
6088 }
6089
6090 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6091         .open           = tracing_open_generic,
6092         .read           = tracing_saved_cmdlines_size_read,
6093         .write          = tracing_saved_cmdlines_size_write,
6094 };
6095
6096 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6097 static union trace_eval_map_item *
6098 update_eval_map(union trace_eval_map_item *ptr)
6099 {
6100         if (!ptr->map.eval_string) {
6101                 if (ptr->tail.next) {
6102                         ptr = ptr->tail.next;
6103                         /* Set ptr to the next real item (skip head) */
6104                         ptr++;
6105                 } else
6106                         return NULL;
6107         }
6108         return ptr;
6109 }
6110
6111 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6112 {
6113         union trace_eval_map_item *ptr = v;
6114
6115         /*
6116          * Paranoid! If ptr points to end, we don't want to increment past it.
6117          * This really should never happen.
6118          */
6119         (*pos)++;
6120         ptr = update_eval_map(ptr);
6121         if (WARN_ON_ONCE(!ptr))
6122                 return NULL;
6123
6124         ptr++;
6125         ptr = update_eval_map(ptr);
6126
6127         return ptr;
6128 }
6129
6130 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6131 {
6132         union trace_eval_map_item *v;
6133         loff_t l = 0;
6134
6135         mutex_lock(&trace_eval_mutex);
6136
6137         v = trace_eval_maps;
6138         if (v)
6139                 v++;
6140
6141         while (v && l < *pos) {
6142                 v = eval_map_next(m, v, &l);
6143         }
6144
6145         return v;
6146 }
6147
6148 static void eval_map_stop(struct seq_file *m, void *v)
6149 {
6150         mutex_unlock(&trace_eval_mutex);
6151 }
6152
6153 static int eval_map_show(struct seq_file *m, void *v)
6154 {
6155         union trace_eval_map_item *ptr = v;
6156
6157         seq_printf(m, "%s %ld (%s)\n",
6158                    ptr->map.eval_string, ptr->map.eval_value,
6159                    ptr->map.system);
6160
6161         return 0;
6162 }
6163
6164 static const struct seq_operations tracing_eval_map_seq_ops = {
6165         .start          = eval_map_start,
6166         .next           = eval_map_next,
6167         .stop           = eval_map_stop,
6168         .show           = eval_map_show,
6169 };
6170
6171 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6172 {
6173         int ret;
6174
6175         ret = tracing_check_open_get_tr(NULL);
6176         if (ret)
6177                 return ret;
6178
6179         return seq_open(filp, &tracing_eval_map_seq_ops);
6180 }
6181
6182 static const struct file_operations tracing_eval_map_fops = {
6183         .open           = tracing_eval_map_open,
6184         .read           = seq_read,
6185         .llseek         = seq_lseek,
6186         .release        = seq_release,
6187 };
6188
6189 static inline union trace_eval_map_item *
6190 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6191 {
6192         /* Return tail of array given the head */
6193         return ptr + ptr->head.length + 1;
6194 }
6195
6196 static void
6197 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6198                            int len)
6199 {
6200         struct trace_eval_map **stop;
6201         struct trace_eval_map **map;
6202         union trace_eval_map_item *map_array;
6203         union trace_eval_map_item *ptr;
6204
6205         stop = start + len;
6206
6207         /*
6208          * The trace_eval_maps contains the map plus a head and tail item,
6209          * where the head holds the module and length of array, and the
6210          * tail holds a pointer to the next list.
6211          */
6212         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6213         if (!map_array) {
6214                 pr_warn("Unable to allocate trace eval mapping\n");
6215                 return;
6216         }
6217
6218         mutex_lock(&trace_eval_mutex);
6219
6220         if (!trace_eval_maps)
6221                 trace_eval_maps = map_array;
6222         else {
6223                 ptr = trace_eval_maps;
6224                 for (;;) {
6225                         ptr = trace_eval_jmp_to_tail(ptr);
6226                         if (!ptr->tail.next)
6227                                 break;
6228                         ptr = ptr->tail.next;
6229
6230                 }
6231                 ptr->tail.next = map_array;
6232         }
6233         map_array->head.mod = mod;
6234         map_array->head.length = len;
6235         map_array++;
6236
6237         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6238                 map_array->map = **map;
6239                 map_array++;
6240         }
6241         memset(map_array, 0, sizeof(*map_array));
6242
6243         mutex_unlock(&trace_eval_mutex);
6244 }
6245
6246 static void trace_create_eval_file(struct dentry *d_tracer)
6247 {
6248         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6249                           NULL, &tracing_eval_map_fops);
6250 }
6251
6252 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6253 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6254 static inline void trace_insert_eval_map_file(struct module *mod,
6255                               struct trace_eval_map **start, int len) { }
6256 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6257
6258 static void trace_insert_eval_map(struct module *mod,
6259                                   struct trace_eval_map **start, int len)
6260 {
6261         struct trace_eval_map **map;
6262
6263         if (len <= 0)
6264                 return;
6265
6266         map = start;
6267
6268         trace_event_eval_update(map, len);
6269
6270         trace_insert_eval_map_file(mod, start, len);
6271 }
6272
6273 static ssize_t
6274 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6275                        size_t cnt, loff_t *ppos)
6276 {
6277         struct trace_array *tr = filp->private_data;
6278         char buf[MAX_TRACER_SIZE+2];
6279         int r;
6280
6281         mutex_lock(&trace_types_lock);
6282         r = sprintf(buf, "%s\n", tr->current_trace->name);
6283         mutex_unlock(&trace_types_lock);
6284
6285         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6286 }
6287
6288 int tracer_init(struct tracer *t, struct trace_array *tr)
6289 {
6290         tracing_reset_online_cpus(&tr->array_buffer);
6291         return t->init(tr);
6292 }
6293
6294 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6295 {
6296         int cpu;
6297
6298         for_each_tracing_cpu(cpu)
6299                 per_cpu_ptr(buf->data, cpu)->entries = val;
6300 }
6301
6302 static void update_buffer_entries(struct array_buffer *buf, int cpu)
6303 {
6304         if (cpu == RING_BUFFER_ALL_CPUS) {
6305                 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6306         } else {
6307                 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6308         }
6309 }
6310
6311 #ifdef CONFIG_TRACER_MAX_TRACE
6312 /* resize @tr's buffer to the size of @size_tr's entries */
6313 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6314                                         struct array_buffer *size_buf, int cpu_id)
6315 {
6316         int cpu, ret = 0;
6317
6318         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6319                 for_each_tracing_cpu(cpu) {
6320                         ret = ring_buffer_resize(trace_buf->buffer,
6321                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6322                         if (ret < 0)
6323                                 break;
6324                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6325                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6326                 }
6327         } else {
6328                 ret = ring_buffer_resize(trace_buf->buffer,
6329                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6330                 if (ret == 0)
6331                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6332                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6333         }
6334
6335         return ret;
6336 }
6337 #endif /* CONFIG_TRACER_MAX_TRACE */
6338
6339 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6340                                         unsigned long size, int cpu)
6341 {
6342         int ret;
6343
6344         /*
6345          * If kernel or user changes the size of the ring buffer
6346          * we use the size that was given, and we can forget about
6347          * expanding it later.
6348          */
6349         ring_buffer_expanded = true;
6350
6351         /* May be called before buffers are initialized */
6352         if (!tr->array_buffer.buffer)
6353                 return 0;
6354
6355         /* Do not allow tracing while resizing ring buffer */
6356         tracing_stop_tr(tr);
6357
6358         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6359         if (ret < 0)
6360                 goto out_start;
6361
6362 #ifdef CONFIG_TRACER_MAX_TRACE
6363         if (!tr->allocated_snapshot)
6364                 goto out;
6365
6366         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6367         if (ret < 0) {
6368                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6369                                                      &tr->array_buffer, cpu);
6370                 if (r < 0) {
6371                         /*
6372                          * AARGH! We are left with different
6373                          * size max buffer!!!!
6374                          * The max buffer is our "snapshot" buffer.
6375                          * When a tracer needs a snapshot (one of the
6376                          * latency tracers), it swaps the max buffer
6377                          * with the saved snap shot. We succeeded to
6378                          * update the size of the main buffer, but failed to
6379                          * update the size of the max buffer. But when we tried
6380                          * to reset the main buffer to the original size, we
6381                          * failed there too. This is very unlikely to
6382                          * happen, but if it does, warn and kill all
6383                          * tracing.
6384                          */
6385                         WARN_ON(1);
6386                         tracing_disabled = 1;
6387                 }
6388                 goto out_start;
6389         }
6390
6391         update_buffer_entries(&tr->max_buffer, cpu);
6392
6393  out:
6394 #endif /* CONFIG_TRACER_MAX_TRACE */
6395
6396         update_buffer_entries(&tr->array_buffer, cpu);
6397  out_start:
6398         tracing_start_tr(tr);
6399         return ret;
6400 }
6401
6402 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6403                                   unsigned long size, int cpu_id)
6404 {
6405         int ret;
6406
6407         mutex_lock(&trace_types_lock);
6408
6409         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6410                 /* make sure, this cpu is enabled in the mask */
6411                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6412                         ret = -EINVAL;
6413                         goto out;
6414                 }
6415         }
6416
6417         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6418         if (ret < 0)
6419                 ret = -ENOMEM;
6420
6421 out:
6422         mutex_unlock(&trace_types_lock);
6423
6424         return ret;
6425 }
6426
6427
6428 /**
6429  * tracing_update_buffers - used by tracing facility to expand ring buffers
6430  *
6431  * To save on memory when the tracing is never used on a system with it
6432  * configured in. The ring buffers are set to a minimum size. But once
6433  * a user starts to use the tracing facility, then they need to grow
6434  * to their default size.
6435  *
6436  * This function is to be called when a tracer is about to be used.
6437  */
6438 int tracing_update_buffers(void)
6439 {
6440         int ret = 0;
6441
6442         mutex_lock(&trace_types_lock);
6443         if (!ring_buffer_expanded)
6444                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6445                                                 RING_BUFFER_ALL_CPUS);
6446         mutex_unlock(&trace_types_lock);
6447
6448         return ret;
6449 }
6450
6451 struct trace_option_dentry;
6452
6453 static void
6454 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6455
6456 /*
6457  * Used to clear out the tracer before deletion of an instance.
6458  * Must have trace_types_lock held.
6459  */
6460 static void tracing_set_nop(struct trace_array *tr)
6461 {
6462         if (tr->current_trace == &nop_trace)
6463                 return;
6464         
6465         tr->current_trace->enabled--;
6466
6467         if (tr->current_trace->reset)
6468                 tr->current_trace->reset(tr);
6469
6470         tr->current_trace = &nop_trace;
6471 }
6472
6473 static bool tracer_options_updated;
6474
6475 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6476 {
6477         /* Only enable if the directory has been created already. */
6478         if (!tr->dir)
6479                 return;
6480
6481         /* Only create trace option files after update_tracer_options finish */
6482         if (!tracer_options_updated)
6483                 return;
6484
6485         create_trace_option_files(tr, t);
6486 }
6487
6488 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6489 {
6490         struct tracer *t;
6491 #ifdef CONFIG_TRACER_MAX_TRACE
6492         bool had_max_tr;
6493 #endif
6494         int ret = 0;
6495
6496         mutex_lock(&trace_types_lock);
6497
6498         if (!ring_buffer_expanded) {
6499                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6500                                                 RING_BUFFER_ALL_CPUS);
6501                 if (ret < 0)
6502                         goto out;
6503                 ret = 0;
6504         }
6505
6506         for (t = trace_types; t; t = t->next) {
6507                 if (strcmp(t->name, buf) == 0)
6508                         break;
6509         }
6510         if (!t) {
6511                 ret = -EINVAL;
6512                 goto out;
6513         }
6514         if (t == tr->current_trace)
6515                 goto out;
6516
6517 #ifdef CONFIG_TRACER_SNAPSHOT
6518         if (t->use_max_tr) {
6519                 local_irq_disable();
6520                 arch_spin_lock(&tr->max_lock);
6521                 if (tr->cond_snapshot)
6522                         ret = -EBUSY;
6523                 arch_spin_unlock(&tr->max_lock);
6524                 local_irq_enable();
6525                 if (ret)
6526                         goto out;
6527         }
6528 #endif
6529         /* Some tracers won't work on kernel command line */
6530         if (system_state < SYSTEM_RUNNING && t->noboot) {
6531                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6532                         t->name);
6533                 goto out;
6534         }
6535
6536         /* Some tracers are only allowed for the top level buffer */
6537         if (!trace_ok_for_array(t, tr)) {
6538                 ret = -EINVAL;
6539                 goto out;
6540         }
6541
6542         /* If trace pipe files are being read, we can't change the tracer */
6543         if (tr->trace_ref) {
6544                 ret = -EBUSY;
6545                 goto out;
6546         }
6547
6548         trace_branch_disable();
6549
6550         tr->current_trace->enabled--;
6551
6552         if (tr->current_trace->reset)
6553                 tr->current_trace->reset(tr);
6554
6555 #ifdef CONFIG_TRACER_MAX_TRACE
6556         had_max_tr = tr->current_trace->use_max_tr;
6557
6558         /* Current trace needs to be nop_trace before synchronize_rcu */
6559         tr->current_trace = &nop_trace;
6560
6561         if (had_max_tr && !t->use_max_tr) {
6562                 /*
6563                  * We need to make sure that the update_max_tr sees that
6564                  * current_trace changed to nop_trace to keep it from
6565                  * swapping the buffers after we resize it.
6566                  * The update_max_tr is called from interrupts disabled
6567                  * so a synchronized_sched() is sufficient.
6568                  */
6569                 synchronize_rcu();
6570                 free_snapshot(tr);
6571         }
6572
6573         if (t->use_max_tr && !tr->allocated_snapshot) {
6574                 ret = tracing_alloc_snapshot_instance(tr);
6575                 if (ret < 0)
6576                         goto out;
6577         }
6578 #else
6579         tr->current_trace = &nop_trace;
6580 #endif
6581
6582         if (t->init) {
6583                 ret = tracer_init(t, tr);
6584                 if (ret)
6585                         goto out;
6586         }
6587
6588         tr->current_trace = t;
6589         tr->current_trace->enabled++;
6590         trace_branch_enable(tr);
6591  out:
6592         mutex_unlock(&trace_types_lock);
6593
6594         return ret;
6595 }
6596
6597 static ssize_t
6598 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6599                         size_t cnt, loff_t *ppos)
6600 {
6601         struct trace_array *tr = filp->private_data;
6602         char buf[MAX_TRACER_SIZE+1];
6603         char *name;
6604         size_t ret;
6605         int err;
6606
6607         ret = cnt;
6608
6609         if (cnt > MAX_TRACER_SIZE)
6610                 cnt = MAX_TRACER_SIZE;
6611
6612         if (copy_from_user(buf, ubuf, cnt))
6613                 return -EFAULT;
6614
6615         buf[cnt] = 0;
6616
6617         name = strim(buf);
6618
6619         err = tracing_set_tracer(tr, name);
6620         if (err)
6621                 return err;
6622
6623         *ppos += ret;
6624
6625         return ret;
6626 }
6627
6628 static ssize_t
6629 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6630                    size_t cnt, loff_t *ppos)
6631 {
6632         char buf[64];
6633         int r;
6634
6635         r = snprintf(buf, sizeof(buf), "%ld\n",
6636                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6637         if (r > sizeof(buf))
6638                 r = sizeof(buf);
6639         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6640 }
6641
6642 static ssize_t
6643 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6644                     size_t cnt, loff_t *ppos)
6645 {
6646         unsigned long val;
6647         int ret;
6648
6649         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6650         if (ret)
6651                 return ret;
6652
6653         *ptr = val * 1000;
6654
6655         return cnt;
6656 }
6657
6658 static ssize_t
6659 tracing_thresh_read(struct file *filp, char __user *ubuf,
6660                     size_t cnt, loff_t *ppos)
6661 {
6662         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6663 }
6664
6665 static ssize_t
6666 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6667                      size_t cnt, loff_t *ppos)
6668 {
6669         struct trace_array *tr = filp->private_data;
6670         int ret;
6671
6672         mutex_lock(&trace_types_lock);
6673         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6674         if (ret < 0)
6675                 goto out;
6676
6677         if (tr->current_trace->update_thresh) {
6678                 ret = tr->current_trace->update_thresh(tr);
6679                 if (ret < 0)
6680                         goto out;
6681         }
6682
6683         ret = cnt;
6684 out:
6685         mutex_unlock(&trace_types_lock);
6686
6687         return ret;
6688 }
6689
6690 #ifdef CONFIG_TRACER_MAX_TRACE
6691
6692 static ssize_t
6693 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6694                      size_t cnt, loff_t *ppos)
6695 {
6696         struct trace_array *tr = filp->private_data;
6697
6698         return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6699 }
6700
6701 static ssize_t
6702 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6703                       size_t cnt, loff_t *ppos)
6704 {
6705         struct trace_array *tr = filp->private_data;
6706
6707         return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6708 }
6709
6710 #endif
6711
6712 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6713 {
6714         if (cpu == RING_BUFFER_ALL_CPUS) {
6715                 if (cpumask_empty(tr->pipe_cpumask)) {
6716                         cpumask_setall(tr->pipe_cpumask);
6717                         return 0;
6718                 }
6719         } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6720                 cpumask_set_cpu(cpu, tr->pipe_cpumask);
6721                 return 0;
6722         }
6723         return -EBUSY;
6724 }
6725
6726 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6727 {
6728         if (cpu == RING_BUFFER_ALL_CPUS) {
6729                 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6730                 cpumask_clear(tr->pipe_cpumask);
6731         } else {
6732                 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6733                 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6734         }
6735 }
6736
6737 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6738 {
6739         struct trace_array *tr = inode->i_private;
6740         struct trace_iterator *iter;
6741         int cpu;
6742         int ret;
6743
6744         ret = tracing_check_open_get_tr(tr);
6745         if (ret)
6746                 return ret;
6747
6748         mutex_lock(&trace_types_lock);
6749         cpu = tracing_get_cpu(inode);
6750         ret = open_pipe_on_cpu(tr, cpu);
6751         if (ret)
6752                 goto fail_pipe_on_cpu;
6753
6754         /* create a buffer to store the information to pass to userspace */
6755         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6756         if (!iter) {
6757                 ret = -ENOMEM;
6758                 goto fail_alloc_iter;
6759         }
6760
6761         trace_seq_init(&iter->seq);
6762         iter->trace = tr->current_trace;
6763
6764         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6765                 ret = -ENOMEM;
6766                 goto fail;
6767         }
6768
6769         /* trace pipe does not show start of buffer */
6770         cpumask_setall(iter->started);
6771
6772         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6773                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6774
6775         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6776         if (trace_clocks[tr->clock_id].in_ns)
6777                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6778
6779         iter->tr = tr;
6780         iter->array_buffer = &tr->array_buffer;
6781         iter->cpu_file = cpu;
6782         mutex_init(&iter->mutex);
6783         filp->private_data = iter;
6784
6785         if (iter->trace->pipe_open)
6786                 iter->trace->pipe_open(iter);
6787
6788         nonseekable_open(inode, filp);
6789
6790         tr->trace_ref++;
6791
6792         mutex_unlock(&trace_types_lock);
6793         return ret;
6794
6795 fail:
6796         kfree(iter);
6797 fail_alloc_iter:
6798         close_pipe_on_cpu(tr, cpu);
6799 fail_pipe_on_cpu:
6800         __trace_array_put(tr);
6801         mutex_unlock(&trace_types_lock);
6802         return ret;
6803 }
6804
6805 static int tracing_release_pipe(struct inode *inode, struct file *file)
6806 {
6807         struct trace_iterator *iter = file->private_data;
6808         struct trace_array *tr = inode->i_private;
6809
6810         mutex_lock(&trace_types_lock);
6811
6812         tr->trace_ref--;
6813
6814         if (iter->trace->pipe_close)
6815                 iter->trace->pipe_close(iter);
6816         close_pipe_on_cpu(tr, iter->cpu_file);
6817         mutex_unlock(&trace_types_lock);
6818
6819         free_trace_iter_content(iter);
6820         kfree(iter);
6821
6822         trace_array_put(tr);
6823
6824         return 0;
6825 }
6826
6827 static __poll_t
6828 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6829 {
6830         struct trace_array *tr = iter->tr;
6831
6832         /* Iterators are static, they should be filled or empty */
6833         if (trace_buffer_iter(iter, iter->cpu_file))
6834                 return EPOLLIN | EPOLLRDNORM;
6835
6836         if (tr->trace_flags & TRACE_ITER_BLOCK)
6837                 /*
6838                  * Always select as readable when in blocking mode
6839                  */
6840                 return EPOLLIN | EPOLLRDNORM;
6841         else
6842                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6843                                              filp, poll_table, iter->tr->buffer_percent);
6844 }
6845
6846 static __poll_t
6847 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6848 {
6849         struct trace_iterator *iter = filp->private_data;
6850
6851         return trace_poll(iter, filp, poll_table);
6852 }
6853
6854 /* Must be called with iter->mutex held. */
6855 static int tracing_wait_pipe(struct file *filp)
6856 {
6857         struct trace_iterator *iter = filp->private_data;
6858         int ret;
6859
6860         while (trace_empty(iter)) {
6861
6862                 if ((filp->f_flags & O_NONBLOCK)) {
6863                         return -EAGAIN;
6864                 }
6865
6866                 /*
6867                  * We block until we read something and tracing is disabled.
6868                  * We still block if tracing is disabled, but we have never
6869                  * read anything. This allows a user to cat this file, and
6870                  * then enable tracing. But after we have read something,
6871                  * we give an EOF when tracing is again disabled.
6872                  *
6873                  * iter->pos will be 0 if we haven't read anything.
6874                  */
6875                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6876                         break;
6877
6878                 mutex_unlock(&iter->mutex);
6879
6880                 ret = wait_on_pipe(iter, 0);
6881
6882                 mutex_lock(&iter->mutex);
6883
6884                 if (ret)
6885                         return ret;
6886         }
6887
6888         return 1;
6889 }
6890
6891 /*
6892  * Consumer reader.
6893  */
6894 static ssize_t
6895 tracing_read_pipe(struct file *filp, char __user *ubuf,
6896                   size_t cnt, loff_t *ppos)
6897 {
6898         struct trace_iterator *iter = filp->private_data;
6899         ssize_t sret;
6900
6901         /*
6902          * Avoid more than one consumer on a single file descriptor
6903          * This is just a matter of traces coherency, the ring buffer itself
6904          * is protected.
6905          */
6906         mutex_lock(&iter->mutex);
6907
6908         /* return any leftover data */
6909         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6910         if (sret != -EBUSY)
6911                 goto out;
6912
6913         trace_seq_init(&iter->seq);
6914
6915         if (iter->trace->read) {
6916                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6917                 if (sret)
6918                         goto out;
6919         }
6920
6921 waitagain:
6922         sret = tracing_wait_pipe(filp);
6923         if (sret <= 0)
6924                 goto out;
6925
6926         /* stop when tracing is finished */
6927         if (trace_empty(iter)) {
6928                 sret = 0;
6929                 goto out;
6930         }
6931
6932         if (cnt >= PAGE_SIZE)
6933                 cnt = PAGE_SIZE - 1;
6934
6935         /* reset all but tr, trace, and overruns */
6936         trace_iterator_reset(iter);
6937         cpumask_clear(iter->started);
6938         trace_seq_init(&iter->seq);
6939
6940         trace_event_read_lock();
6941         trace_access_lock(iter->cpu_file);
6942         while (trace_find_next_entry_inc(iter) != NULL) {
6943                 enum print_line_t ret;
6944                 int save_len = iter->seq.seq.len;
6945
6946                 ret = print_trace_line(iter);
6947                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6948                         /*
6949                          * If one print_trace_line() fills entire trace_seq in one shot,
6950                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6951                          * In this case, we need to consume it, otherwise, loop will peek
6952                          * this event next time, resulting in an infinite loop.
6953                          */
6954                         if (save_len == 0) {
6955                                 iter->seq.full = 0;
6956                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6957                                 trace_consume(iter);
6958                                 break;
6959                         }
6960
6961                         /* In other cases, don't print partial lines */
6962                         iter->seq.seq.len = save_len;
6963                         break;
6964                 }
6965                 if (ret != TRACE_TYPE_NO_CONSUME)
6966                         trace_consume(iter);
6967
6968                 if (trace_seq_used(&iter->seq) >= cnt)
6969                         break;
6970
6971                 /*
6972                  * Setting the full flag means we reached the trace_seq buffer
6973                  * size and we should leave by partial output condition above.
6974                  * One of the trace_seq_* functions is not used properly.
6975                  */
6976                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6977                           iter->ent->type);
6978         }
6979         trace_access_unlock(iter->cpu_file);
6980         trace_event_read_unlock();
6981
6982         /* Now copy what we have to the user */
6983         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6984         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6985                 trace_seq_init(&iter->seq);
6986
6987         /*
6988          * If there was nothing to send to user, in spite of consuming trace
6989          * entries, go back to wait for more entries.
6990          */
6991         if (sret == -EBUSY)
6992                 goto waitagain;
6993
6994 out:
6995         mutex_unlock(&iter->mutex);
6996
6997         return sret;
6998 }
6999
7000 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
7001                                      unsigned int idx)
7002 {
7003         __free_page(spd->pages[idx]);
7004 }
7005
7006 static size_t
7007 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
7008 {
7009         size_t count;
7010         int save_len;
7011         int ret;
7012
7013         /* Seq buffer is page-sized, exactly what we need. */
7014         for (;;) {
7015                 save_len = iter->seq.seq.len;
7016                 ret = print_trace_line(iter);
7017
7018                 if (trace_seq_has_overflowed(&iter->seq)) {
7019                         iter->seq.seq.len = save_len;
7020                         break;
7021                 }
7022
7023                 /*
7024                  * This should not be hit, because it should only
7025                  * be set if the iter->seq overflowed. But check it
7026                  * anyway to be safe.
7027                  */
7028                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
7029                         iter->seq.seq.len = save_len;
7030                         break;
7031                 }
7032
7033                 count = trace_seq_used(&iter->seq) - save_len;
7034                 if (rem < count) {
7035                         rem = 0;
7036                         iter->seq.seq.len = save_len;
7037                         break;
7038                 }
7039
7040                 if (ret != TRACE_TYPE_NO_CONSUME)
7041                         trace_consume(iter);
7042                 rem -= count;
7043                 if (!trace_find_next_entry_inc(iter))   {
7044                         rem = 0;
7045                         iter->ent = NULL;
7046                         break;
7047                 }
7048         }
7049
7050         return rem;
7051 }
7052
7053 static ssize_t tracing_splice_read_pipe(struct file *filp,
7054                                         loff_t *ppos,
7055                                         struct pipe_inode_info *pipe,
7056                                         size_t len,
7057                                         unsigned int flags)
7058 {
7059         struct page *pages_def[PIPE_DEF_BUFFERS];
7060         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7061         struct trace_iterator *iter = filp->private_data;
7062         struct splice_pipe_desc spd = {
7063                 .pages          = pages_def,
7064                 .partial        = partial_def,
7065                 .nr_pages       = 0, /* This gets updated below. */
7066                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7067                 .ops            = &default_pipe_buf_ops,
7068                 .spd_release    = tracing_spd_release_pipe,
7069         };
7070         ssize_t ret;
7071         size_t rem;
7072         unsigned int i;
7073
7074         if (splice_grow_spd(pipe, &spd))
7075                 return -ENOMEM;
7076
7077         mutex_lock(&iter->mutex);
7078
7079         if (iter->trace->splice_read) {
7080                 ret = iter->trace->splice_read(iter, filp,
7081                                                ppos, pipe, len, flags);
7082                 if (ret)
7083                         goto out_err;
7084         }
7085
7086         ret = tracing_wait_pipe(filp);
7087         if (ret <= 0)
7088                 goto out_err;
7089
7090         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7091                 ret = -EFAULT;
7092                 goto out_err;
7093         }
7094
7095         trace_event_read_lock();
7096         trace_access_lock(iter->cpu_file);
7097
7098         /* Fill as many pages as possible. */
7099         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7100                 spd.pages[i] = alloc_page(GFP_KERNEL);
7101                 if (!spd.pages[i])
7102                         break;
7103
7104                 rem = tracing_fill_pipe_page(rem, iter);
7105
7106                 /* Copy the data into the page, so we can start over. */
7107                 ret = trace_seq_to_buffer(&iter->seq,
7108                                           page_address(spd.pages[i]),
7109                                           trace_seq_used(&iter->seq));
7110                 if (ret < 0) {
7111                         __free_page(spd.pages[i]);
7112                         break;
7113                 }
7114                 spd.partial[i].offset = 0;
7115                 spd.partial[i].len = trace_seq_used(&iter->seq);
7116
7117                 trace_seq_init(&iter->seq);
7118         }
7119
7120         trace_access_unlock(iter->cpu_file);
7121         trace_event_read_unlock();
7122         mutex_unlock(&iter->mutex);
7123
7124         spd.nr_pages = i;
7125
7126         if (i)
7127                 ret = splice_to_pipe(pipe, &spd);
7128         else
7129                 ret = 0;
7130 out:
7131         splice_shrink_spd(&spd);
7132         return ret;
7133
7134 out_err:
7135         mutex_unlock(&iter->mutex);
7136         goto out;
7137 }
7138
7139 static ssize_t
7140 tracing_entries_read(struct file *filp, char __user *ubuf,
7141                      size_t cnt, loff_t *ppos)
7142 {
7143         struct inode *inode = file_inode(filp);
7144         struct trace_array *tr = inode->i_private;
7145         int cpu = tracing_get_cpu(inode);
7146         char buf[64];
7147         int r = 0;
7148         ssize_t ret;
7149
7150         mutex_lock(&trace_types_lock);
7151
7152         if (cpu == RING_BUFFER_ALL_CPUS) {
7153                 int cpu, buf_size_same;
7154                 unsigned long size;
7155
7156                 size = 0;
7157                 buf_size_same = 1;
7158                 /* check if all cpu sizes are same */
7159                 for_each_tracing_cpu(cpu) {
7160                         /* fill in the size from first enabled cpu */
7161                         if (size == 0)
7162                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7163                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7164                                 buf_size_same = 0;
7165                                 break;
7166                         }
7167                 }
7168
7169                 if (buf_size_same) {
7170                         if (!ring_buffer_expanded)
7171                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
7172                                             size >> 10,
7173                                             trace_buf_size >> 10);
7174                         else
7175                                 r = sprintf(buf, "%lu\n", size >> 10);
7176                 } else
7177                         r = sprintf(buf, "X\n");
7178         } else
7179                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7180
7181         mutex_unlock(&trace_types_lock);
7182
7183         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7184         return ret;
7185 }
7186
7187 static ssize_t
7188 tracing_entries_write(struct file *filp, const char __user *ubuf,
7189                       size_t cnt, loff_t *ppos)
7190 {
7191         struct inode *inode = file_inode(filp);
7192         struct trace_array *tr = inode->i_private;
7193         unsigned long val;
7194         int ret;
7195
7196         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7197         if (ret)
7198                 return ret;
7199
7200         /* must have at least 1 entry */
7201         if (!val)
7202                 return -EINVAL;
7203
7204         /* value is in KB */
7205         val <<= 10;
7206         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7207         if (ret < 0)
7208                 return ret;
7209
7210         *ppos += cnt;
7211
7212         return cnt;
7213 }
7214
7215 static ssize_t
7216 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7217                                 size_t cnt, loff_t *ppos)
7218 {
7219         struct trace_array *tr = filp->private_data;
7220         char buf[64];
7221         int r, cpu;
7222         unsigned long size = 0, expanded_size = 0;
7223
7224         mutex_lock(&trace_types_lock);
7225         for_each_tracing_cpu(cpu) {
7226                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7227                 if (!ring_buffer_expanded)
7228                         expanded_size += trace_buf_size >> 10;
7229         }
7230         if (ring_buffer_expanded)
7231                 r = sprintf(buf, "%lu\n", size);
7232         else
7233                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7234         mutex_unlock(&trace_types_lock);
7235
7236         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7237 }
7238
7239 static ssize_t
7240 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7241                           size_t cnt, loff_t *ppos)
7242 {
7243         /*
7244          * There is no need to read what the user has written, this function
7245          * is just to make sure that there is no error when "echo" is used
7246          */
7247
7248         *ppos += cnt;
7249
7250         return cnt;
7251 }
7252
7253 static int
7254 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7255 {
7256         struct trace_array *tr = inode->i_private;
7257
7258         /* disable tracing ? */
7259         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7260                 tracer_tracing_off(tr);
7261         /* resize the ring buffer to 0 */
7262         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7263
7264         trace_array_put(tr);
7265
7266         return 0;
7267 }
7268
7269 static ssize_t
7270 tracing_mark_write(struct file *filp, const char __user *ubuf,
7271                                         size_t cnt, loff_t *fpos)
7272 {
7273         struct trace_array *tr = filp->private_data;
7274         struct ring_buffer_event *event;
7275         enum event_trigger_type tt = ETT_NONE;
7276         struct trace_buffer *buffer;
7277         struct print_entry *entry;
7278         ssize_t written;
7279         int size;
7280         int len;
7281
7282 /* Used in tracing_mark_raw_write() as well */
7283 #define FAULTED_STR "<faulted>"
7284 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7285
7286         if (tracing_disabled)
7287                 return -EINVAL;
7288
7289         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7290                 return -EINVAL;
7291
7292         if (cnt > TRACE_BUF_SIZE)
7293                 cnt = TRACE_BUF_SIZE;
7294
7295         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7296
7297         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7298
7299         /* If less than "<faulted>", then make sure we can still add that */
7300         if (cnt < FAULTED_SIZE)
7301                 size += FAULTED_SIZE - cnt;
7302
7303         buffer = tr->array_buffer.buffer;
7304         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7305                                             tracing_gen_ctx());
7306         if (unlikely(!event))
7307                 /* Ring buffer disabled, return as if not open for write */
7308                 return -EBADF;
7309
7310         entry = ring_buffer_event_data(event);
7311         entry->ip = _THIS_IP_;
7312
7313         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7314         if (len) {
7315                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7316                 cnt = FAULTED_SIZE;
7317                 written = -EFAULT;
7318         } else
7319                 written = cnt;
7320
7321         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7322                 /* do not add \n before testing triggers, but add \0 */
7323                 entry->buf[cnt] = '\0';
7324                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7325         }
7326
7327         if (entry->buf[cnt - 1] != '\n') {
7328                 entry->buf[cnt] = '\n';
7329                 entry->buf[cnt + 1] = '\0';
7330         } else
7331                 entry->buf[cnt] = '\0';
7332
7333         if (static_branch_unlikely(&trace_marker_exports_enabled))
7334                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7335         __buffer_unlock_commit(buffer, event);
7336
7337         if (tt)
7338                 event_triggers_post_call(tr->trace_marker_file, tt);
7339
7340         return written;
7341 }
7342
7343 /* Limit it for now to 3K (including tag) */
7344 #define RAW_DATA_MAX_SIZE (1024*3)
7345
7346 static ssize_t
7347 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7348                                         size_t cnt, loff_t *fpos)
7349 {
7350         struct trace_array *tr = filp->private_data;
7351         struct ring_buffer_event *event;
7352         struct trace_buffer *buffer;
7353         struct raw_data_entry *entry;
7354         ssize_t written;
7355         int size;
7356         int len;
7357
7358 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7359
7360         if (tracing_disabled)
7361                 return -EINVAL;
7362
7363         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7364                 return -EINVAL;
7365
7366         /* The marker must at least have a tag id */
7367         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7368                 return -EINVAL;
7369
7370         if (cnt > TRACE_BUF_SIZE)
7371                 cnt = TRACE_BUF_SIZE;
7372
7373         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7374
7375         size = sizeof(*entry) + cnt;
7376         if (cnt < FAULT_SIZE_ID)
7377                 size += FAULT_SIZE_ID - cnt;
7378
7379         buffer = tr->array_buffer.buffer;
7380         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7381                                             tracing_gen_ctx());
7382         if (!event)
7383                 /* Ring buffer disabled, return as if not open for write */
7384                 return -EBADF;
7385
7386         entry = ring_buffer_event_data(event);
7387
7388         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7389         if (len) {
7390                 entry->id = -1;
7391                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7392                 written = -EFAULT;
7393         } else
7394                 written = cnt;
7395
7396         __buffer_unlock_commit(buffer, event);
7397
7398         return written;
7399 }
7400
7401 static int tracing_clock_show(struct seq_file *m, void *v)
7402 {
7403         struct trace_array *tr = m->private;
7404         int i;
7405
7406         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7407                 seq_printf(m,
7408                         "%s%s%s%s", i ? " " : "",
7409                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7410                         i == tr->clock_id ? "]" : "");
7411         seq_putc(m, '\n');
7412
7413         return 0;
7414 }
7415
7416 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7417 {
7418         int i;
7419
7420         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7421                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7422                         break;
7423         }
7424         if (i == ARRAY_SIZE(trace_clocks))
7425                 return -EINVAL;
7426
7427         mutex_lock(&trace_types_lock);
7428
7429         tr->clock_id = i;
7430
7431         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7432
7433         /*
7434          * New clock may not be consistent with the previous clock.
7435          * Reset the buffer so that it doesn't have incomparable timestamps.
7436          */
7437         tracing_reset_online_cpus(&tr->array_buffer);
7438
7439 #ifdef CONFIG_TRACER_MAX_TRACE
7440         if (tr->max_buffer.buffer)
7441                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7442         tracing_reset_online_cpus(&tr->max_buffer);
7443 #endif
7444
7445         mutex_unlock(&trace_types_lock);
7446
7447         return 0;
7448 }
7449
7450 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7451                                    size_t cnt, loff_t *fpos)
7452 {
7453         struct seq_file *m = filp->private_data;
7454         struct trace_array *tr = m->private;
7455         char buf[64];
7456         const char *clockstr;
7457         int ret;
7458
7459         if (cnt >= sizeof(buf))
7460                 return -EINVAL;
7461
7462         if (copy_from_user(buf, ubuf, cnt))
7463                 return -EFAULT;
7464
7465         buf[cnt] = 0;
7466
7467         clockstr = strstrip(buf);
7468
7469         ret = tracing_set_clock(tr, clockstr);
7470         if (ret)
7471                 return ret;
7472
7473         *fpos += cnt;
7474
7475         return cnt;
7476 }
7477
7478 static int tracing_clock_open(struct inode *inode, struct file *file)
7479 {
7480         struct trace_array *tr = inode->i_private;
7481         int ret;
7482
7483         ret = tracing_check_open_get_tr(tr);
7484         if (ret)
7485                 return ret;
7486
7487         ret = single_open(file, tracing_clock_show, inode->i_private);
7488         if (ret < 0)
7489                 trace_array_put(tr);
7490
7491         return ret;
7492 }
7493
7494 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7495 {
7496         struct trace_array *tr = m->private;
7497
7498         mutex_lock(&trace_types_lock);
7499
7500         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7501                 seq_puts(m, "delta [absolute]\n");
7502         else
7503                 seq_puts(m, "[delta] absolute\n");
7504
7505         mutex_unlock(&trace_types_lock);
7506
7507         return 0;
7508 }
7509
7510 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7511 {
7512         struct trace_array *tr = inode->i_private;
7513         int ret;
7514
7515         ret = tracing_check_open_get_tr(tr);
7516         if (ret)
7517                 return ret;
7518
7519         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7520         if (ret < 0)
7521                 trace_array_put(tr);
7522
7523         return ret;
7524 }
7525
7526 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7527 {
7528         if (rbe == this_cpu_read(trace_buffered_event))
7529                 return ring_buffer_time_stamp(buffer);
7530
7531         return ring_buffer_event_time_stamp(buffer, rbe);
7532 }
7533
7534 /*
7535  * Set or disable using the per CPU trace_buffer_event when possible.
7536  */
7537 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7538 {
7539         int ret = 0;
7540
7541         mutex_lock(&trace_types_lock);
7542
7543         if (set && tr->no_filter_buffering_ref++)
7544                 goto out;
7545
7546         if (!set) {
7547                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7548                         ret = -EINVAL;
7549                         goto out;
7550                 }
7551
7552                 --tr->no_filter_buffering_ref;
7553         }
7554  out:
7555         mutex_unlock(&trace_types_lock);
7556
7557         return ret;
7558 }
7559
7560 struct ftrace_buffer_info {
7561         struct trace_iterator   iter;
7562         void                    *spare;
7563         unsigned int            spare_cpu;
7564         unsigned int            read;
7565 };
7566
7567 #ifdef CONFIG_TRACER_SNAPSHOT
7568 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7569 {
7570         struct trace_array *tr = inode->i_private;
7571         struct trace_iterator *iter;
7572         struct seq_file *m;
7573         int ret;
7574
7575         ret = tracing_check_open_get_tr(tr);
7576         if (ret)
7577                 return ret;
7578
7579         if (file->f_mode & FMODE_READ) {
7580                 iter = __tracing_open(inode, file, true);
7581                 if (IS_ERR(iter))
7582                         ret = PTR_ERR(iter);
7583         } else {
7584                 /* Writes still need the seq_file to hold the private data */
7585                 ret = -ENOMEM;
7586                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7587                 if (!m)
7588                         goto out;
7589                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7590                 if (!iter) {
7591                         kfree(m);
7592                         goto out;
7593                 }
7594                 ret = 0;
7595
7596                 iter->tr = tr;
7597                 iter->array_buffer = &tr->max_buffer;
7598                 iter->cpu_file = tracing_get_cpu(inode);
7599                 m->private = iter;
7600                 file->private_data = m;
7601         }
7602 out:
7603         if (ret < 0)
7604                 trace_array_put(tr);
7605
7606         return ret;
7607 }
7608
7609 static void tracing_swap_cpu_buffer(void *tr)
7610 {
7611         update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7612 }
7613
7614 static ssize_t
7615 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7616                        loff_t *ppos)
7617 {
7618         struct seq_file *m = filp->private_data;
7619         struct trace_iterator *iter = m->private;
7620         struct trace_array *tr = iter->tr;
7621         unsigned long val;
7622         int ret;
7623
7624         ret = tracing_update_buffers();
7625         if (ret < 0)
7626                 return ret;
7627
7628         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7629         if (ret)
7630                 return ret;
7631
7632         mutex_lock(&trace_types_lock);
7633
7634         if (tr->current_trace->use_max_tr) {
7635                 ret = -EBUSY;
7636                 goto out;
7637         }
7638
7639         local_irq_disable();
7640         arch_spin_lock(&tr->max_lock);
7641         if (tr->cond_snapshot)
7642                 ret = -EBUSY;
7643         arch_spin_unlock(&tr->max_lock);
7644         local_irq_enable();
7645         if (ret)
7646                 goto out;
7647
7648         switch (val) {
7649         case 0:
7650                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7651                         ret = -EINVAL;
7652                         break;
7653                 }
7654                 if (tr->allocated_snapshot)
7655                         free_snapshot(tr);
7656                 break;
7657         case 1:
7658 /* Only allow per-cpu swap if the ring buffer supports it */
7659 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7660                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7661                         ret = -EINVAL;
7662                         break;
7663                 }
7664 #endif
7665                 if (tr->allocated_snapshot)
7666                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7667                                         &tr->array_buffer, iter->cpu_file);
7668                 else
7669                         ret = tracing_alloc_snapshot_instance(tr);
7670                 if (ret < 0)
7671                         break;
7672                 /* Now, we're going to swap */
7673                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7674                         local_irq_disable();
7675                         update_max_tr(tr, current, smp_processor_id(), NULL);
7676                         local_irq_enable();
7677                 } else {
7678                         smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7679                                                  (void *)tr, 1);
7680                 }
7681                 break;
7682         default:
7683                 if (tr->allocated_snapshot) {
7684                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7685                                 tracing_reset_online_cpus(&tr->max_buffer);
7686                         else
7687                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7688                 }
7689                 break;
7690         }
7691
7692         if (ret >= 0) {
7693                 *ppos += cnt;
7694                 ret = cnt;
7695         }
7696 out:
7697         mutex_unlock(&trace_types_lock);
7698         return ret;
7699 }
7700
7701 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7702 {
7703         struct seq_file *m = file->private_data;
7704         int ret;
7705
7706         ret = tracing_release(inode, file);
7707
7708         if (file->f_mode & FMODE_READ)
7709                 return ret;
7710
7711         /* If write only, the seq_file is just a stub */
7712         if (m)
7713                 kfree(m->private);
7714         kfree(m);
7715
7716         return 0;
7717 }
7718
7719 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7720 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7721                                     size_t count, loff_t *ppos);
7722 static int tracing_buffers_release(struct inode *inode, struct file *file);
7723 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7724                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7725
7726 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7727 {
7728         struct ftrace_buffer_info *info;
7729         int ret;
7730
7731         /* The following checks for tracefs lockdown */
7732         ret = tracing_buffers_open(inode, filp);
7733         if (ret < 0)
7734                 return ret;
7735
7736         info = filp->private_data;
7737
7738         if (info->iter.trace->use_max_tr) {
7739                 tracing_buffers_release(inode, filp);
7740                 return -EBUSY;
7741         }
7742
7743         info->iter.snapshot = true;
7744         info->iter.array_buffer = &info->iter.tr->max_buffer;
7745
7746         return ret;
7747 }
7748
7749 #endif /* CONFIG_TRACER_SNAPSHOT */
7750
7751
7752 static const struct file_operations tracing_thresh_fops = {
7753         .open           = tracing_open_generic,
7754         .read           = tracing_thresh_read,
7755         .write          = tracing_thresh_write,
7756         .llseek         = generic_file_llseek,
7757 };
7758
7759 #ifdef CONFIG_TRACER_MAX_TRACE
7760 static const struct file_operations tracing_max_lat_fops = {
7761         .open           = tracing_open_generic_tr,
7762         .read           = tracing_max_lat_read,
7763         .write          = tracing_max_lat_write,
7764         .llseek         = generic_file_llseek,
7765         .release        = tracing_release_generic_tr,
7766 };
7767 #endif
7768
7769 static const struct file_operations set_tracer_fops = {
7770         .open           = tracing_open_generic_tr,
7771         .read           = tracing_set_trace_read,
7772         .write          = tracing_set_trace_write,
7773         .llseek         = generic_file_llseek,
7774         .release        = tracing_release_generic_tr,
7775 };
7776
7777 static const struct file_operations tracing_pipe_fops = {
7778         .open           = tracing_open_pipe,
7779         .poll           = tracing_poll_pipe,
7780         .read           = tracing_read_pipe,
7781         .splice_read    = tracing_splice_read_pipe,
7782         .release        = tracing_release_pipe,
7783         .llseek         = no_llseek,
7784 };
7785
7786 static const struct file_operations tracing_entries_fops = {
7787         .open           = tracing_open_generic_tr,
7788         .read           = tracing_entries_read,
7789         .write          = tracing_entries_write,
7790         .llseek         = generic_file_llseek,
7791         .release        = tracing_release_generic_tr,
7792 };
7793
7794 static const struct file_operations tracing_total_entries_fops = {
7795         .open           = tracing_open_generic_tr,
7796         .read           = tracing_total_entries_read,
7797         .llseek         = generic_file_llseek,
7798         .release        = tracing_release_generic_tr,
7799 };
7800
7801 static const struct file_operations tracing_free_buffer_fops = {
7802         .open           = tracing_open_generic_tr,
7803         .write          = tracing_free_buffer_write,
7804         .release        = tracing_free_buffer_release,
7805 };
7806
7807 static const struct file_operations tracing_mark_fops = {
7808         .open           = tracing_mark_open,
7809         .write          = tracing_mark_write,
7810         .release        = tracing_release_generic_tr,
7811 };
7812
7813 static const struct file_operations tracing_mark_raw_fops = {
7814         .open           = tracing_mark_open,
7815         .write          = tracing_mark_raw_write,
7816         .release        = tracing_release_generic_tr,
7817 };
7818
7819 static const struct file_operations trace_clock_fops = {
7820         .open           = tracing_clock_open,
7821         .read           = seq_read,
7822         .llseek         = seq_lseek,
7823         .release        = tracing_single_release_tr,
7824         .write          = tracing_clock_write,
7825 };
7826
7827 static const struct file_operations trace_time_stamp_mode_fops = {
7828         .open           = tracing_time_stamp_mode_open,
7829         .read           = seq_read,
7830         .llseek         = seq_lseek,
7831         .release        = tracing_single_release_tr,
7832 };
7833
7834 #ifdef CONFIG_TRACER_SNAPSHOT
7835 static const struct file_operations snapshot_fops = {
7836         .open           = tracing_snapshot_open,
7837         .read           = seq_read,
7838         .write          = tracing_snapshot_write,
7839         .llseek         = tracing_lseek,
7840         .release        = tracing_snapshot_release,
7841 };
7842
7843 static const struct file_operations snapshot_raw_fops = {
7844         .open           = snapshot_raw_open,
7845         .read           = tracing_buffers_read,
7846         .release        = tracing_buffers_release,
7847         .splice_read    = tracing_buffers_splice_read,
7848         .llseek         = no_llseek,
7849 };
7850
7851 #endif /* CONFIG_TRACER_SNAPSHOT */
7852
7853 /*
7854  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7855  * @filp: The active open file structure
7856  * @ubuf: The userspace provided buffer to read value into
7857  * @cnt: The maximum number of bytes to read
7858  * @ppos: The current "file" position
7859  *
7860  * This function implements the write interface for a struct trace_min_max_param.
7861  * The filp->private_data must point to a trace_min_max_param structure that
7862  * defines where to write the value, the min and the max acceptable values,
7863  * and a lock to protect the write.
7864  */
7865 static ssize_t
7866 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7867 {
7868         struct trace_min_max_param *param = filp->private_data;
7869         u64 val;
7870         int err;
7871
7872         if (!param)
7873                 return -EFAULT;
7874
7875         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7876         if (err)
7877                 return err;
7878
7879         if (param->lock)
7880                 mutex_lock(param->lock);
7881
7882         if (param->min && val < *param->min)
7883                 err = -EINVAL;
7884
7885         if (param->max && val > *param->max)
7886                 err = -EINVAL;
7887
7888         if (!err)
7889                 *param->val = val;
7890
7891         if (param->lock)
7892                 mutex_unlock(param->lock);
7893
7894         if (err)
7895                 return err;
7896
7897         return cnt;
7898 }
7899
7900 /*
7901  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7902  * @filp: The active open file structure
7903  * @ubuf: The userspace provided buffer to read value into
7904  * @cnt: The maximum number of bytes to read
7905  * @ppos: The current "file" position
7906  *
7907  * This function implements the read interface for a struct trace_min_max_param.
7908  * The filp->private_data must point to a trace_min_max_param struct with valid
7909  * data.
7910  */
7911 static ssize_t
7912 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7913 {
7914         struct trace_min_max_param *param = filp->private_data;
7915         char buf[U64_STR_SIZE];
7916         int len;
7917         u64 val;
7918
7919         if (!param)
7920                 return -EFAULT;
7921
7922         val = *param->val;
7923
7924         if (cnt > sizeof(buf))
7925                 cnt = sizeof(buf);
7926
7927         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7928
7929         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7930 }
7931
7932 const struct file_operations trace_min_max_fops = {
7933         .open           = tracing_open_generic,
7934         .read           = trace_min_max_read,
7935         .write          = trace_min_max_write,
7936 };
7937
7938 #define TRACING_LOG_ERRS_MAX    8
7939 #define TRACING_LOG_LOC_MAX     128
7940
7941 #define CMD_PREFIX "  Command: "
7942
7943 struct err_info {
7944         const char      **errs; /* ptr to loc-specific array of err strings */
7945         u8              type;   /* index into errs -> specific err string */
7946         u16             pos;    /* caret position */
7947         u64             ts;
7948 };
7949
7950 struct tracing_log_err {
7951         struct list_head        list;
7952         struct err_info         info;
7953         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7954         char                    *cmd;                     /* what caused err */
7955 };
7956
7957 static DEFINE_MUTEX(tracing_err_log_lock);
7958
7959 static struct tracing_log_err *alloc_tracing_log_err(int len)
7960 {
7961         struct tracing_log_err *err;
7962
7963         err = kzalloc(sizeof(*err), GFP_KERNEL);
7964         if (!err)
7965                 return ERR_PTR(-ENOMEM);
7966
7967         err->cmd = kzalloc(len, GFP_KERNEL);
7968         if (!err->cmd) {
7969                 kfree(err);
7970                 return ERR_PTR(-ENOMEM);
7971         }
7972
7973         return err;
7974 }
7975
7976 static void free_tracing_log_err(struct tracing_log_err *err)
7977 {
7978         kfree(err->cmd);
7979         kfree(err);
7980 }
7981
7982 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7983                                                    int len)
7984 {
7985         struct tracing_log_err *err;
7986         char *cmd;
7987
7988         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7989                 err = alloc_tracing_log_err(len);
7990                 if (PTR_ERR(err) != -ENOMEM)
7991                         tr->n_err_log_entries++;
7992
7993                 return err;
7994         }
7995         cmd = kzalloc(len, GFP_KERNEL);
7996         if (!cmd)
7997                 return ERR_PTR(-ENOMEM);
7998         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7999         kfree(err->cmd);
8000         err->cmd = cmd;
8001         list_del(&err->list);
8002
8003         return err;
8004 }
8005
8006 /**
8007  * err_pos - find the position of a string within a command for error careting
8008  * @cmd: The tracing command that caused the error
8009  * @str: The string to position the caret at within @cmd
8010  *
8011  * Finds the position of the first occurrence of @str within @cmd.  The
8012  * return value can be passed to tracing_log_err() for caret placement
8013  * within @cmd.
8014  *
8015  * Returns the index within @cmd of the first occurrence of @str or 0
8016  * if @str was not found.
8017  */
8018 unsigned int err_pos(char *cmd, const char *str)
8019 {
8020         char *found;
8021
8022         if (WARN_ON(!strlen(cmd)))
8023                 return 0;
8024
8025         found = strstr(cmd, str);
8026         if (found)
8027                 return found - cmd;
8028
8029         return 0;
8030 }
8031
8032 /**
8033  * tracing_log_err - write an error to the tracing error log
8034  * @tr: The associated trace array for the error (NULL for top level array)
8035  * @loc: A string describing where the error occurred
8036  * @cmd: The tracing command that caused the error
8037  * @errs: The array of loc-specific static error strings
8038  * @type: The index into errs[], which produces the specific static err string
8039  * @pos: The position the caret should be placed in the cmd
8040  *
8041  * Writes an error into tracing/error_log of the form:
8042  *
8043  * <loc>: error: <text>
8044  *   Command: <cmd>
8045  *              ^
8046  *
8047  * tracing/error_log is a small log file containing the last
8048  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8049  * unless there has been a tracing error, and the error log can be
8050  * cleared and have its memory freed by writing the empty string in
8051  * truncation mode to it i.e. echo > tracing/error_log.
8052  *
8053  * NOTE: the @errs array along with the @type param are used to
8054  * produce a static error string - this string is not copied and saved
8055  * when the error is logged - only a pointer to it is saved.  See
8056  * existing callers for examples of how static strings are typically
8057  * defined for use with tracing_log_err().
8058  */
8059 void tracing_log_err(struct trace_array *tr,
8060                      const char *loc, const char *cmd,
8061                      const char **errs, u8 type, u16 pos)
8062 {
8063         struct tracing_log_err *err;
8064         int len = 0;
8065
8066         if (!tr)
8067                 tr = &global_trace;
8068
8069         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8070
8071         mutex_lock(&tracing_err_log_lock);
8072         err = get_tracing_log_err(tr, len);
8073         if (PTR_ERR(err) == -ENOMEM) {
8074                 mutex_unlock(&tracing_err_log_lock);
8075                 return;
8076         }
8077
8078         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8079         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8080
8081         err->info.errs = errs;
8082         err->info.type = type;
8083         err->info.pos = pos;
8084         err->info.ts = local_clock();
8085
8086         list_add_tail(&err->list, &tr->err_log);
8087         mutex_unlock(&tracing_err_log_lock);
8088 }
8089
8090 static void clear_tracing_err_log(struct trace_array *tr)
8091 {
8092         struct tracing_log_err *err, *next;
8093
8094         mutex_lock(&tracing_err_log_lock);
8095         list_for_each_entry_safe(err, next, &tr->err_log, list) {
8096                 list_del(&err->list);
8097                 free_tracing_log_err(err);
8098         }
8099
8100         tr->n_err_log_entries = 0;
8101         mutex_unlock(&tracing_err_log_lock);
8102 }
8103
8104 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8105 {
8106         struct trace_array *tr = m->private;
8107
8108         mutex_lock(&tracing_err_log_lock);
8109
8110         return seq_list_start(&tr->err_log, *pos);
8111 }
8112
8113 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8114 {
8115         struct trace_array *tr = m->private;
8116
8117         return seq_list_next(v, &tr->err_log, pos);
8118 }
8119
8120 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8121 {
8122         mutex_unlock(&tracing_err_log_lock);
8123 }
8124
8125 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8126 {
8127         u16 i;
8128
8129         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8130                 seq_putc(m, ' ');
8131         for (i = 0; i < pos; i++)
8132                 seq_putc(m, ' ');
8133         seq_puts(m, "^\n");
8134 }
8135
8136 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8137 {
8138         struct tracing_log_err *err = v;
8139
8140         if (err) {
8141                 const char *err_text = err->info.errs[err->info.type];
8142                 u64 sec = err->info.ts;
8143                 u32 nsec;
8144
8145                 nsec = do_div(sec, NSEC_PER_SEC);
8146                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8147                            err->loc, err_text);
8148                 seq_printf(m, "%s", err->cmd);
8149                 tracing_err_log_show_pos(m, err->info.pos);
8150         }
8151
8152         return 0;
8153 }
8154
8155 static const struct seq_operations tracing_err_log_seq_ops = {
8156         .start  = tracing_err_log_seq_start,
8157         .next   = tracing_err_log_seq_next,
8158         .stop   = tracing_err_log_seq_stop,
8159         .show   = tracing_err_log_seq_show
8160 };
8161
8162 static int tracing_err_log_open(struct inode *inode, struct file *file)
8163 {
8164         struct trace_array *tr = inode->i_private;
8165         int ret = 0;
8166
8167         ret = tracing_check_open_get_tr(tr);
8168         if (ret)
8169                 return ret;
8170
8171         /* If this file was opened for write, then erase contents */
8172         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8173                 clear_tracing_err_log(tr);
8174
8175         if (file->f_mode & FMODE_READ) {
8176                 ret = seq_open(file, &tracing_err_log_seq_ops);
8177                 if (!ret) {
8178                         struct seq_file *m = file->private_data;
8179                         m->private = tr;
8180                 } else {
8181                         trace_array_put(tr);
8182                 }
8183         }
8184         return ret;
8185 }
8186
8187 static ssize_t tracing_err_log_write(struct file *file,
8188                                      const char __user *buffer,
8189                                      size_t count, loff_t *ppos)
8190 {
8191         return count;
8192 }
8193
8194 static int tracing_err_log_release(struct inode *inode, struct file *file)
8195 {
8196         struct trace_array *tr = inode->i_private;
8197
8198         trace_array_put(tr);
8199
8200         if (file->f_mode & FMODE_READ)
8201                 seq_release(inode, file);
8202
8203         return 0;
8204 }
8205
8206 static const struct file_operations tracing_err_log_fops = {
8207         .open           = tracing_err_log_open,
8208         .write          = tracing_err_log_write,
8209         .read           = seq_read,
8210         .llseek         = tracing_lseek,
8211         .release        = tracing_err_log_release,
8212 };
8213
8214 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8215 {
8216         struct trace_array *tr = inode->i_private;
8217         struct ftrace_buffer_info *info;
8218         int ret;
8219
8220         ret = tracing_check_open_get_tr(tr);
8221         if (ret)
8222                 return ret;
8223
8224         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8225         if (!info) {
8226                 trace_array_put(tr);
8227                 return -ENOMEM;
8228         }
8229
8230         mutex_lock(&trace_types_lock);
8231
8232         info->iter.tr           = tr;
8233         info->iter.cpu_file     = tracing_get_cpu(inode);
8234         info->iter.trace        = tr->current_trace;
8235         info->iter.array_buffer = &tr->array_buffer;
8236         info->spare             = NULL;
8237         /* Force reading ring buffer for first read */
8238         info->read              = (unsigned int)-1;
8239
8240         filp->private_data = info;
8241
8242         tr->trace_ref++;
8243
8244         mutex_unlock(&trace_types_lock);
8245
8246         ret = nonseekable_open(inode, filp);
8247         if (ret < 0)
8248                 trace_array_put(tr);
8249
8250         return ret;
8251 }
8252
8253 static __poll_t
8254 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8255 {
8256         struct ftrace_buffer_info *info = filp->private_data;
8257         struct trace_iterator *iter = &info->iter;
8258
8259         return trace_poll(iter, filp, poll_table);
8260 }
8261
8262 static ssize_t
8263 tracing_buffers_read(struct file *filp, char __user *ubuf,
8264                      size_t count, loff_t *ppos)
8265 {
8266         struct ftrace_buffer_info *info = filp->private_data;
8267         struct trace_iterator *iter = &info->iter;
8268         ssize_t ret = 0;
8269         ssize_t size;
8270
8271         if (!count)
8272                 return 0;
8273
8274 #ifdef CONFIG_TRACER_MAX_TRACE
8275         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8276                 return -EBUSY;
8277 #endif
8278
8279         if (!info->spare) {
8280                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8281                                                           iter->cpu_file);
8282                 if (IS_ERR(info->spare)) {
8283                         ret = PTR_ERR(info->spare);
8284                         info->spare = NULL;
8285                 } else {
8286                         info->spare_cpu = iter->cpu_file;
8287                 }
8288         }
8289         if (!info->spare)
8290                 return ret;
8291
8292         /* Do we have previous read data to read? */
8293         if (info->read < PAGE_SIZE)
8294                 goto read;
8295
8296  again:
8297         trace_access_lock(iter->cpu_file);
8298         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8299                                     &info->spare,
8300                                     count,
8301                                     iter->cpu_file, 0);
8302         trace_access_unlock(iter->cpu_file);
8303
8304         if (ret < 0) {
8305                 if (trace_empty(iter)) {
8306                         if ((filp->f_flags & O_NONBLOCK))
8307                                 return -EAGAIN;
8308
8309                         ret = wait_on_pipe(iter, 0);
8310                         if (ret)
8311                                 return ret;
8312
8313                         goto again;
8314                 }
8315                 return 0;
8316         }
8317
8318         info->read = 0;
8319  read:
8320         size = PAGE_SIZE - info->read;
8321         if (size > count)
8322                 size = count;
8323
8324         ret = copy_to_user(ubuf, info->spare + info->read, size);
8325         if (ret == size)
8326                 return -EFAULT;
8327
8328         size -= ret;
8329
8330         *ppos += size;
8331         info->read += size;
8332
8333         return size;
8334 }
8335
8336 static int tracing_buffers_release(struct inode *inode, struct file *file)
8337 {
8338         struct ftrace_buffer_info *info = file->private_data;
8339         struct trace_iterator *iter = &info->iter;
8340
8341         mutex_lock(&trace_types_lock);
8342
8343         iter->tr->trace_ref--;
8344
8345         __trace_array_put(iter->tr);
8346
8347         iter->wait_index++;
8348         /* Make sure the waiters see the new wait_index */
8349         smp_wmb();
8350
8351         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8352
8353         if (info->spare)
8354                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8355                                            info->spare_cpu, info->spare);
8356         kvfree(info);
8357
8358         mutex_unlock(&trace_types_lock);
8359
8360         return 0;
8361 }
8362
8363 struct buffer_ref {
8364         struct trace_buffer     *buffer;
8365         void                    *page;
8366         int                     cpu;
8367         refcount_t              refcount;
8368 };
8369
8370 static void buffer_ref_release(struct buffer_ref *ref)
8371 {
8372         if (!refcount_dec_and_test(&ref->refcount))
8373                 return;
8374         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8375         kfree(ref);
8376 }
8377
8378 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8379                                     struct pipe_buffer *buf)
8380 {
8381         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8382
8383         buffer_ref_release(ref);
8384         buf->private = 0;
8385 }
8386
8387 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8388                                 struct pipe_buffer *buf)
8389 {
8390         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8391
8392         if (refcount_read(&ref->refcount) > INT_MAX/2)
8393                 return false;
8394
8395         refcount_inc(&ref->refcount);
8396         return true;
8397 }
8398
8399 /* Pipe buffer operations for a buffer. */
8400 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8401         .release                = buffer_pipe_buf_release,
8402         .get                    = buffer_pipe_buf_get,
8403 };
8404
8405 /*
8406  * Callback from splice_to_pipe(), if we need to release some pages
8407  * at the end of the spd in case we error'ed out in filling the pipe.
8408  */
8409 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8410 {
8411         struct buffer_ref *ref =
8412                 (struct buffer_ref *)spd->partial[i].private;
8413
8414         buffer_ref_release(ref);
8415         spd->partial[i].private = 0;
8416 }
8417
8418 static ssize_t
8419 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8420                             struct pipe_inode_info *pipe, size_t len,
8421                             unsigned int flags)
8422 {
8423         struct ftrace_buffer_info *info = file->private_data;
8424         struct trace_iterator *iter = &info->iter;
8425         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8426         struct page *pages_def[PIPE_DEF_BUFFERS];
8427         struct splice_pipe_desc spd = {
8428                 .pages          = pages_def,
8429                 .partial        = partial_def,
8430                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8431                 .ops            = &buffer_pipe_buf_ops,
8432                 .spd_release    = buffer_spd_release,
8433         };
8434         struct buffer_ref *ref;
8435         int entries, i;
8436         ssize_t ret = 0;
8437
8438 #ifdef CONFIG_TRACER_MAX_TRACE
8439         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8440                 return -EBUSY;
8441 #endif
8442
8443         if (*ppos & (PAGE_SIZE - 1))
8444                 return -EINVAL;
8445
8446         if (len & (PAGE_SIZE - 1)) {
8447                 if (len < PAGE_SIZE)
8448                         return -EINVAL;
8449                 len &= PAGE_MASK;
8450         }
8451
8452         if (splice_grow_spd(pipe, &spd))
8453                 return -ENOMEM;
8454
8455  again:
8456         trace_access_lock(iter->cpu_file);
8457         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8458
8459         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8460                 struct page *page;
8461                 int r;
8462
8463                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8464                 if (!ref) {
8465                         ret = -ENOMEM;
8466                         break;
8467                 }
8468
8469                 refcount_set(&ref->refcount, 1);
8470                 ref->buffer = iter->array_buffer->buffer;
8471                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8472                 if (IS_ERR(ref->page)) {
8473                         ret = PTR_ERR(ref->page);
8474                         ref->page = NULL;
8475                         kfree(ref);
8476                         break;
8477                 }
8478                 ref->cpu = iter->cpu_file;
8479
8480                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8481                                           len, iter->cpu_file, 1);
8482                 if (r < 0) {
8483                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8484                                                    ref->page);
8485                         kfree(ref);
8486                         break;
8487                 }
8488
8489                 page = virt_to_page(ref->page);
8490
8491                 spd.pages[i] = page;
8492                 spd.partial[i].len = PAGE_SIZE;
8493                 spd.partial[i].offset = 0;
8494                 spd.partial[i].private = (unsigned long)ref;
8495                 spd.nr_pages++;
8496                 *ppos += PAGE_SIZE;
8497
8498                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8499         }
8500
8501         trace_access_unlock(iter->cpu_file);
8502         spd.nr_pages = i;
8503
8504         /* did we read anything? */
8505         if (!spd.nr_pages) {
8506                 long wait_index;
8507
8508                 if (ret)
8509                         goto out;
8510
8511                 ret = -EAGAIN;
8512                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8513                         goto out;
8514
8515                 wait_index = READ_ONCE(iter->wait_index);
8516
8517                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8518                 if (ret)
8519                         goto out;
8520
8521                 /* No need to wait after waking up when tracing is off */
8522                 if (!tracer_tracing_is_on(iter->tr))
8523                         goto out;
8524
8525                 /* Make sure we see the new wait_index */
8526                 smp_rmb();
8527                 if (wait_index != iter->wait_index)
8528                         goto out;
8529
8530                 goto again;
8531         }
8532
8533         ret = splice_to_pipe(pipe, &spd);
8534 out:
8535         splice_shrink_spd(&spd);
8536
8537         return ret;
8538 }
8539
8540 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8541 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8542 {
8543         struct ftrace_buffer_info *info = file->private_data;
8544         struct trace_iterator *iter = &info->iter;
8545
8546         if (cmd)
8547                 return -ENOIOCTLCMD;
8548
8549         mutex_lock(&trace_types_lock);
8550
8551         iter->wait_index++;
8552         /* Make sure the waiters see the new wait_index */
8553         smp_wmb();
8554
8555         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8556
8557         mutex_unlock(&trace_types_lock);
8558         return 0;
8559 }
8560
8561 static const struct file_operations tracing_buffers_fops = {
8562         .open           = tracing_buffers_open,
8563         .read           = tracing_buffers_read,
8564         .poll           = tracing_buffers_poll,
8565         .release        = tracing_buffers_release,
8566         .splice_read    = tracing_buffers_splice_read,
8567         .unlocked_ioctl = tracing_buffers_ioctl,
8568         .llseek         = no_llseek,
8569 };
8570
8571 static ssize_t
8572 tracing_stats_read(struct file *filp, char __user *ubuf,
8573                    size_t count, loff_t *ppos)
8574 {
8575         struct inode *inode = file_inode(filp);
8576         struct trace_array *tr = inode->i_private;
8577         struct array_buffer *trace_buf = &tr->array_buffer;
8578         int cpu = tracing_get_cpu(inode);
8579         struct trace_seq *s;
8580         unsigned long cnt;
8581         unsigned long long t;
8582         unsigned long usec_rem;
8583
8584         s = kmalloc(sizeof(*s), GFP_KERNEL);
8585         if (!s)
8586                 return -ENOMEM;
8587
8588         trace_seq_init(s);
8589
8590         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8591         trace_seq_printf(s, "entries: %ld\n", cnt);
8592
8593         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8594         trace_seq_printf(s, "overrun: %ld\n", cnt);
8595
8596         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8597         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8598
8599         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8600         trace_seq_printf(s, "bytes: %ld\n", cnt);
8601
8602         if (trace_clocks[tr->clock_id].in_ns) {
8603                 /* local or global for trace_clock */
8604                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8605                 usec_rem = do_div(t, USEC_PER_SEC);
8606                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8607                                                                 t, usec_rem);
8608
8609                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8610                 usec_rem = do_div(t, USEC_PER_SEC);
8611                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8612         } else {
8613                 /* counter or tsc mode for trace_clock */
8614                 trace_seq_printf(s, "oldest event ts: %llu\n",
8615                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8616
8617                 trace_seq_printf(s, "now ts: %llu\n",
8618                                 ring_buffer_time_stamp(trace_buf->buffer));
8619         }
8620
8621         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8622         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8623
8624         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8625         trace_seq_printf(s, "read events: %ld\n", cnt);
8626
8627         count = simple_read_from_buffer(ubuf, count, ppos,
8628                                         s->buffer, trace_seq_used(s));
8629
8630         kfree(s);
8631
8632         return count;
8633 }
8634
8635 static const struct file_operations tracing_stats_fops = {
8636         .open           = tracing_open_generic_tr,
8637         .read           = tracing_stats_read,
8638         .llseek         = generic_file_llseek,
8639         .release        = tracing_release_generic_tr,
8640 };
8641
8642 #ifdef CONFIG_DYNAMIC_FTRACE
8643
8644 static ssize_t
8645 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8646                   size_t cnt, loff_t *ppos)
8647 {
8648         ssize_t ret;
8649         char *buf;
8650         int r;
8651
8652         /* 256 should be plenty to hold the amount needed */
8653         buf = kmalloc(256, GFP_KERNEL);
8654         if (!buf)
8655                 return -ENOMEM;
8656
8657         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8658                       ftrace_update_tot_cnt,
8659                       ftrace_number_of_pages,
8660                       ftrace_number_of_groups);
8661
8662         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8663         kfree(buf);
8664         return ret;
8665 }
8666
8667 static const struct file_operations tracing_dyn_info_fops = {
8668         .open           = tracing_open_generic,
8669         .read           = tracing_read_dyn_info,
8670         .llseek         = generic_file_llseek,
8671 };
8672 #endif /* CONFIG_DYNAMIC_FTRACE */
8673
8674 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8675 static void
8676 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8677                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8678                 void *data)
8679 {
8680         tracing_snapshot_instance(tr);
8681 }
8682
8683 static void
8684 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8685                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8686                       void *data)
8687 {
8688         struct ftrace_func_mapper *mapper = data;
8689         long *count = NULL;
8690
8691         if (mapper)
8692                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8693
8694         if (count) {
8695
8696                 if (*count <= 0)
8697                         return;
8698
8699                 (*count)--;
8700         }
8701
8702         tracing_snapshot_instance(tr);
8703 }
8704
8705 static int
8706 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8707                       struct ftrace_probe_ops *ops, void *data)
8708 {
8709         struct ftrace_func_mapper *mapper = data;
8710         long *count = NULL;
8711
8712         seq_printf(m, "%ps:", (void *)ip);
8713
8714         seq_puts(m, "snapshot");
8715
8716         if (mapper)
8717                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8718
8719         if (count)
8720                 seq_printf(m, ":count=%ld\n", *count);
8721         else
8722                 seq_puts(m, ":unlimited\n");
8723
8724         return 0;
8725 }
8726
8727 static int
8728 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8729                      unsigned long ip, void *init_data, void **data)
8730 {
8731         struct ftrace_func_mapper *mapper = *data;
8732
8733         if (!mapper) {
8734                 mapper = allocate_ftrace_func_mapper();
8735                 if (!mapper)
8736                         return -ENOMEM;
8737                 *data = mapper;
8738         }
8739
8740         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8741 }
8742
8743 static void
8744 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8745                      unsigned long ip, void *data)
8746 {
8747         struct ftrace_func_mapper *mapper = data;
8748
8749         if (!ip) {
8750                 if (!mapper)
8751                         return;
8752                 free_ftrace_func_mapper(mapper, NULL);
8753                 return;
8754         }
8755
8756         ftrace_func_mapper_remove_ip(mapper, ip);
8757 }
8758
8759 static struct ftrace_probe_ops snapshot_probe_ops = {
8760         .func                   = ftrace_snapshot,
8761         .print                  = ftrace_snapshot_print,
8762 };
8763
8764 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8765         .func                   = ftrace_count_snapshot,
8766         .print                  = ftrace_snapshot_print,
8767         .init                   = ftrace_snapshot_init,
8768         .free                   = ftrace_snapshot_free,
8769 };
8770
8771 static int
8772 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8773                                char *glob, char *cmd, char *param, int enable)
8774 {
8775         struct ftrace_probe_ops *ops;
8776         void *count = (void *)-1;
8777         char *number;
8778         int ret;
8779
8780         if (!tr)
8781                 return -ENODEV;
8782
8783         /* hash funcs only work with set_ftrace_filter */
8784         if (!enable)
8785                 return -EINVAL;
8786
8787         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8788
8789         if (glob[0] == '!')
8790                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8791
8792         if (!param)
8793                 goto out_reg;
8794
8795         number = strsep(&param, ":");
8796
8797         if (!strlen(number))
8798                 goto out_reg;
8799
8800         /*
8801          * We use the callback data field (which is a pointer)
8802          * as our counter.
8803          */
8804         ret = kstrtoul(number, 0, (unsigned long *)&count);
8805         if (ret)
8806                 return ret;
8807
8808  out_reg:
8809         ret = tracing_alloc_snapshot_instance(tr);
8810         if (ret < 0)
8811                 goto out;
8812
8813         ret = register_ftrace_function_probe(glob, tr, ops, count);
8814
8815  out:
8816         return ret < 0 ? ret : 0;
8817 }
8818
8819 static struct ftrace_func_command ftrace_snapshot_cmd = {
8820         .name                   = "snapshot",
8821         .func                   = ftrace_trace_snapshot_callback,
8822 };
8823
8824 static __init int register_snapshot_cmd(void)
8825 {
8826         return register_ftrace_command(&ftrace_snapshot_cmd);
8827 }
8828 #else
8829 static inline __init int register_snapshot_cmd(void) { return 0; }
8830 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8831
8832 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8833 {
8834         if (WARN_ON(!tr->dir))
8835                 return ERR_PTR(-ENODEV);
8836
8837         /* Top directory uses NULL as the parent */
8838         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8839                 return NULL;
8840
8841         /* All sub buffers have a descriptor */
8842         return tr->dir;
8843 }
8844
8845 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8846 {
8847         struct dentry *d_tracer;
8848
8849         if (tr->percpu_dir)
8850                 return tr->percpu_dir;
8851
8852         d_tracer = tracing_get_dentry(tr);
8853         if (IS_ERR(d_tracer))
8854                 return NULL;
8855
8856         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8857
8858         MEM_FAIL(!tr->percpu_dir,
8859                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8860
8861         return tr->percpu_dir;
8862 }
8863
8864 static struct dentry *
8865 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8866                       void *data, long cpu, const struct file_operations *fops)
8867 {
8868         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8869
8870         if (ret) /* See tracing_get_cpu() */
8871                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8872         return ret;
8873 }
8874
8875 static void
8876 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8877 {
8878         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8879         struct dentry *d_cpu;
8880         char cpu_dir[30]; /* 30 characters should be more than enough */
8881
8882         if (!d_percpu)
8883                 return;
8884
8885         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8886         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8887         if (!d_cpu) {
8888                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8889                 return;
8890         }
8891
8892         /* per cpu trace_pipe */
8893         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8894                                 tr, cpu, &tracing_pipe_fops);
8895
8896         /* per cpu trace */
8897         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8898                                 tr, cpu, &tracing_fops);
8899
8900         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8901                                 tr, cpu, &tracing_buffers_fops);
8902
8903         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8904                                 tr, cpu, &tracing_stats_fops);
8905
8906         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8907                                 tr, cpu, &tracing_entries_fops);
8908
8909 #ifdef CONFIG_TRACER_SNAPSHOT
8910         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8911                                 tr, cpu, &snapshot_fops);
8912
8913         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8914                                 tr, cpu, &snapshot_raw_fops);
8915 #endif
8916 }
8917
8918 #ifdef CONFIG_FTRACE_SELFTEST
8919 /* Let selftest have access to static functions in this file */
8920 #include "trace_selftest.c"
8921 #endif
8922
8923 static ssize_t
8924 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8925                         loff_t *ppos)
8926 {
8927         struct trace_option_dentry *topt = filp->private_data;
8928         char *buf;
8929
8930         if (topt->flags->val & topt->opt->bit)
8931                 buf = "1\n";
8932         else
8933                 buf = "0\n";
8934
8935         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8936 }
8937
8938 static ssize_t
8939 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8940                          loff_t *ppos)
8941 {
8942         struct trace_option_dentry *topt = filp->private_data;
8943         unsigned long val;
8944         int ret;
8945
8946         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8947         if (ret)
8948                 return ret;
8949
8950         if (val != 0 && val != 1)
8951                 return -EINVAL;
8952
8953         if (!!(topt->flags->val & topt->opt->bit) != val) {
8954                 mutex_lock(&trace_types_lock);
8955                 ret = __set_tracer_option(topt->tr, topt->flags,
8956                                           topt->opt, !val);
8957                 mutex_unlock(&trace_types_lock);
8958                 if (ret)
8959                         return ret;
8960         }
8961
8962         *ppos += cnt;
8963
8964         return cnt;
8965 }
8966
8967 static int tracing_open_options(struct inode *inode, struct file *filp)
8968 {
8969         struct trace_option_dentry *topt = inode->i_private;
8970         int ret;
8971
8972         ret = tracing_check_open_get_tr(topt->tr);
8973         if (ret)
8974                 return ret;
8975
8976         filp->private_data = inode->i_private;
8977         return 0;
8978 }
8979
8980 static int tracing_release_options(struct inode *inode, struct file *file)
8981 {
8982         struct trace_option_dentry *topt = file->private_data;
8983
8984         trace_array_put(topt->tr);
8985         return 0;
8986 }
8987
8988 static const struct file_operations trace_options_fops = {
8989         .open = tracing_open_options,
8990         .read = trace_options_read,
8991         .write = trace_options_write,
8992         .llseek = generic_file_llseek,
8993         .release = tracing_release_options,
8994 };
8995
8996 /*
8997  * In order to pass in both the trace_array descriptor as well as the index
8998  * to the flag that the trace option file represents, the trace_array
8999  * has a character array of trace_flags_index[], which holds the index
9000  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9001  * The address of this character array is passed to the flag option file
9002  * read/write callbacks.
9003  *
9004  * In order to extract both the index and the trace_array descriptor,
9005  * get_tr_index() uses the following algorithm.
9006  *
9007  *   idx = *ptr;
9008  *
9009  * As the pointer itself contains the address of the index (remember
9010  * index[1] == 1).
9011  *
9012  * Then to get the trace_array descriptor, by subtracting that index
9013  * from the ptr, we get to the start of the index itself.
9014  *
9015  *   ptr - idx == &index[0]
9016  *
9017  * Then a simple container_of() from that pointer gets us to the
9018  * trace_array descriptor.
9019  */
9020 static void get_tr_index(void *data, struct trace_array **ptr,
9021                          unsigned int *pindex)
9022 {
9023         *pindex = *(unsigned char *)data;
9024
9025         *ptr = container_of(data - *pindex, struct trace_array,
9026                             trace_flags_index);
9027 }
9028
9029 static ssize_t
9030 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9031                         loff_t *ppos)
9032 {
9033         void *tr_index = filp->private_data;
9034         struct trace_array *tr;
9035         unsigned int index;
9036         char *buf;
9037
9038         get_tr_index(tr_index, &tr, &index);
9039
9040         if (tr->trace_flags & (1 << index))
9041                 buf = "1\n";
9042         else
9043                 buf = "0\n";
9044
9045         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9046 }
9047
9048 static ssize_t
9049 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9050                          loff_t *ppos)
9051 {
9052         void *tr_index = filp->private_data;
9053         struct trace_array *tr;
9054         unsigned int index;
9055         unsigned long val;
9056         int ret;
9057
9058         get_tr_index(tr_index, &tr, &index);
9059
9060         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9061         if (ret)
9062                 return ret;
9063
9064         if (val != 0 && val != 1)
9065                 return -EINVAL;
9066
9067         mutex_lock(&event_mutex);
9068         mutex_lock(&trace_types_lock);
9069         ret = set_tracer_flag(tr, 1 << index, val);
9070         mutex_unlock(&trace_types_lock);
9071         mutex_unlock(&event_mutex);
9072
9073         if (ret < 0)
9074                 return ret;
9075
9076         *ppos += cnt;
9077
9078         return cnt;
9079 }
9080
9081 static const struct file_operations trace_options_core_fops = {
9082         .open = tracing_open_generic,
9083         .read = trace_options_core_read,
9084         .write = trace_options_core_write,
9085         .llseek = generic_file_llseek,
9086 };
9087
9088 struct dentry *trace_create_file(const char *name,
9089                                  umode_t mode,
9090                                  struct dentry *parent,
9091                                  void *data,
9092                                  const struct file_operations *fops)
9093 {
9094         struct dentry *ret;
9095
9096         ret = tracefs_create_file(name, mode, parent, data, fops);
9097         if (!ret)
9098                 pr_warn("Could not create tracefs '%s' entry\n", name);
9099
9100         return ret;
9101 }
9102
9103
9104 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9105 {
9106         struct dentry *d_tracer;
9107
9108         if (tr->options)
9109                 return tr->options;
9110
9111         d_tracer = tracing_get_dentry(tr);
9112         if (IS_ERR(d_tracer))
9113                 return NULL;
9114
9115         tr->options = tracefs_create_dir("options", d_tracer);
9116         if (!tr->options) {
9117                 pr_warn("Could not create tracefs directory 'options'\n");
9118                 return NULL;
9119         }
9120
9121         return tr->options;
9122 }
9123
9124 static void
9125 create_trace_option_file(struct trace_array *tr,
9126                          struct trace_option_dentry *topt,
9127                          struct tracer_flags *flags,
9128                          struct tracer_opt *opt)
9129 {
9130         struct dentry *t_options;
9131
9132         t_options = trace_options_init_dentry(tr);
9133         if (!t_options)
9134                 return;
9135
9136         topt->flags = flags;
9137         topt->opt = opt;
9138         topt->tr = tr;
9139
9140         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9141                                         t_options, topt, &trace_options_fops);
9142
9143 }
9144
9145 static void
9146 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9147 {
9148         struct trace_option_dentry *topts;
9149         struct trace_options *tr_topts;
9150         struct tracer_flags *flags;
9151         struct tracer_opt *opts;
9152         int cnt;
9153         int i;
9154
9155         if (!tracer)
9156                 return;
9157
9158         flags = tracer->flags;
9159
9160         if (!flags || !flags->opts)
9161                 return;
9162
9163         /*
9164          * If this is an instance, only create flags for tracers
9165          * the instance may have.
9166          */
9167         if (!trace_ok_for_array(tracer, tr))
9168                 return;
9169
9170         for (i = 0; i < tr->nr_topts; i++) {
9171                 /* Make sure there's no duplicate flags. */
9172                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9173                         return;
9174         }
9175
9176         opts = flags->opts;
9177
9178         for (cnt = 0; opts[cnt].name; cnt++)
9179                 ;
9180
9181         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9182         if (!topts)
9183                 return;
9184
9185         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9186                             GFP_KERNEL);
9187         if (!tr_topts) {
9188                 kfree(topts);
9189                 return;
9190         }
9191
9192         tr->topts = tr_topts;
9193         tr->topts[tr->nr_topts].tracer = tracer;
9194         tr->topts[tr->nr_topts].topts = topts;
9195         tr->nr_topts++;
9196
9197         for (cnt = 0; opts[cnt].name; cnt++) {
9198                 create_trace_option_file(tr, &topts[cnt], flags,
9199                                          &opts[cnt]);
9200                 MEM_FAIL(topts[cnt].entry == NULL,
9201                           "Failed to create trace option: %s",
9202                           opts[cnt].name);
9203         }
9204 }
9205
9206 static struct dentry *
9207 create_trace_option_core_file(struct trace_array *tr,
9208                               const char *option, long index)
9209 {
9210         struct dentry *t_options;
9211
9212         t_options = trace_options_init_dentry(tr);
9213         if (!t_options)
9214                 return NULL;
9215
9216         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9217                                  (void *)&tr->trace_flags_index[index],
9218                                  &trace_options_core_fops);
9219 }
9220
9221 static void create_trace_options_dir(struct trace_array *tr)
9222 {
9223         struct dentry *t_options;
9224         bool top_level = tr == &global_trace;
9225         int i;
9226
9227         t_options = trace_options_init_dentry(tr);
9228         if (!t_options)
9229                 return;
9230
9231         for (i = 0; trace_options[i]; i++) {
9232                 if (top_level ||
9233                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9234                         create_trace_option_core_file(tr, trace_options[i], i);
9235         }
9236 }
9237
9238 static ssize_t
9239 rb_simple_read(struct file *filp, char __user *ubuf,
9240                size_t cnt, loff_t *ppos)
9241 {
9242         struct trace_array *tr = filp->private_data;
9243         char buf[64];
9244         int r;
9245
9246         r = tracer_tracing_is_on(tr);
9247         r = sprintf(buf, "%d\n", r);
9248
9249         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9250 }
9251
9252 static ssize_t
9253 rb_simple_write(struct file *filp, const char __user *ubuf,
9254                 size_t cnt, loff_t *ppos)
9255 {
9256         struct trace_array *tr = filp->private_data;
9257         struct trace_buffer *buffer = tr->array_buffer.buffer;
9258         unsigned long val;
9259         int ret;
9260
9261         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9262         if (ret)
9263                 return ret;
9264
9265         if (buffer) {
9266                 mutex_lock(&trace_types_lock);
9267                 if (!!val == tracer_tracing_is_on(tr)) {
9268                         val = 0; /* do nothing */
9269                 } else if (val) {
9270                         tracer_tracing_on(tr);
9271                         if (tr->current_trace->start)
9272                                 tr->current_trace->start(tr);
9273                 } else {
9274                         tracer_tracing_off(tr);
9275                         if (tr->current_trace->stop)
9276                                 tr->current_trace->stop(tr);
9277                         /* Wake up any waiters */
9278                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9279                 }
9280                 mutex_unlock(&trace_types_lock);
9281         }
9282
9283         (*ppos)++;
9284
9285         return cnt;
9286 }
9287
9288 static const struct file_operations rb_simple_fops = {
9289         .open           = tracing_open_generic_tr,
9290         .read           = rb_simple_read,
9291         .write          = rb_simple_write,
9292         .release        = tracing_release_generic_tr,
9293         .llseek         = default_llseek,
9294 };
9295
9296 static ssize_t
9297 buffer_percent_read(struct file *filp, char __user *ubuf,
9298                     size_t cnt, loff_t *ppos)
9299 {
9300         struct trace_array *tr = filp->private_data;
9301         char buf[64];
9302         int r;
9303
9304         r = tr->buffer_percent;
9305         r = sprintf(buf, "%d\n", r);
9306
9307         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9308 }
9309
9310 static ssize_t
9311 buffer_percent_write(struct file *filp, const char __user *ubuf,
9312                      size_t cnt, loff_t *ppos)
9313 {
9314         struct trace_array *tr = filp->private_data;
9315         unsigned long val;
9316         int ret;
9317
9318         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9319         if (ret)
9320                 return ret;
9321
9322         if (val > 100)
9323                 return -EINVAL;
9324
9325         tr->buffer_percent = val;
9326
9327         (*ppos)++;
9328
9329         return cnt;
9330 }
9331
9332 static const struct file_operations buffer_percent_fops = {
9333         .open           = tracing_open_generic_tr,
9334         .read           = buffer_percent_read,
9335         .write          = buffer_percent_write,
9336         .release        = tracing_release_generic_tr,
9337         .llseek         = default_llseek,
9338 };
9339
9340 static struct dentry *trace_instance_dir;
9341
9342 static void
9343 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9344
9345 static int
9346 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9347 {
9348         enum ring_buffer_flags rb_flags;
9349
9350         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9351
9352         buf->tr = tr;
9353
9354         buf->buffer = ring_buffer_alloc(size, rb_flags);
9355         if (!buf->buffer)
9356                 return -ENOMEM;
9357
9358         buf->data = alloc_percpu(struct trace_array_cpu);
9359         if (!buf->data) {
9360                 ring_buffer_free(buf->buffer);
9361                 buf->buffer = NULL;
9362                 return -ENOMEM;
9363         }
9364
9365         /* Allocate the first page for all buffers */
9366         set_buffer_entries(&tr->array_buffer,
9367                            ring_buffer_size(tr->array_buffer.buffer, 0));
9368
9369         return 0;
9370 }
9371
9372 static void free_trace_buffer(struct array_buffer *buf)
9373 {
9374         if (buf->buffer) {
9375                 ring_buffer_free(buf->buffer);
9376                 buf->buffer = NULL;
9377                 free_percpu(buf->data);
9378                 buf->data = NULL;
9379         }
9380 }
9381
9382 static int allocate_trace_buffers(struct trace_array *tr, int size)
9383 {
9384         int ret;
9385
9386         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9387         if (ret)
9388                 return ret;
9389
9390 #ifdef CONFIG_TRACER_MAX_TRACE
9391         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9392                                     allocate_snapshot ? size : 1);
9393         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9394                 free_trace_buffer(&tr->array_buffer);
9395                 return -ENOMEM;
9396         }
9397         tr->allocated_snapshot = allocate_snapshot;
9398
9399         allocate_snapshot = false;
9400 #endif
9401
9402         return 0;
9403 }
9404
9405 static void free_trace_buffers(struct trace_array *tr)
9406 {
9407         if (!tr)
9408                 return;
9409
9410         free_trace_buffer(&tr->array_buffer);
9411
9412 #ifdef CONFIG_TRACER_MAX_TRACE
9413         free_trace_buffer(&tr->max_buffer);
9414 #endif
9415 }
9416
9417 static void init_trace_flags_index(struct trace_array *tr)
9418 {
9419         int i;
9420
9421         /* Used by the trace options files */
9422         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9423                 tr->trace_flags_index[i] = i;
9424 }
9425
9426 static void __update_tracer_options(struct trace_array *tr)
9427 {
9428         struct tracer *t;
9429
9430         for (t = trace_types; t; t = t->next)
9431                 add_tracer_options(tr, t);
9432 }
9433
9434 static void update_tracer_options(struct trace_array *tr)
9435 {
9436         mutex_lock(&trace_types_lock);
9437         tracer_options_updated = true;
9438         __update_tracer_options(tr);
9439         mutex_unlock(&trace_types_lock);
9440 }
9441
9442 /* Must have trace_types_lock held */
9443 struct trace_array *trace_array_find(const char *instance)
9444 {
9445         struct trace_array *tr, *found = NULL;
9446
9447         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9448                 if (tr->name && strcmp(tr->name, instance) == 0) {
9449                         found = tr;
9450                         break;
9451                 }
9452         }
9453
9454         return found;
9455 }
9456
9457 struct trace_array *trace_array_find_get(const char *instance)
9458 {
9459         struct trace_array *tr;
9460
9461         mutex_lock(&trace_types_lock);
9462         tr = trace_array_find(instance);
9463         if (tr)
9464                 tr->ref++;
9465         mutex_unlock(&trace_types_lock);
9466
9467         return tr;
9468 }
9469
9470 static int trace_array_create_dir(struct trace_array *tr)
9471 {
9472         int ret;
9473
9474         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9475         if (!tr->dir)
9476                 return -EINVAL;
9477
9478         ret = event_trace_add_tracer(tr->dir, tr);
9479         if (ret) {
9480                 tracefs_remove(tr->dir);
9481                 return ret;
9482         }
9483
9484         init_tracer_tracefs(tr, tr->dir);
9485         __update_tracer_options(tr);
9486
9487         return ret;
9488 }
9489
9490 static struct trace_array *trace_array_create(const char *name)
9491 {
9492         struct trace_array *tr;
9493         int ret;
9494
9495         ret = -ENOMEM;
9496         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9497         if (!tr)
9498                 return ERR_PTR(ret);
9499
9500         tr->name = kstrdup(name, GFP_KERNEL);
9501         if (!tr->name)
9502                 goto out_free_tr;
9503
9504         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9505                 goto out_free_tr;
9506
9507         if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9508                 goto out_free_tr;
9509
9510         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9511
9512         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9513
9514         raw_spin_lock_init(&tr->start_lock);
9515
9516         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9517
9518         tr->current_trace = &nop_trace;
9519
9520         INIT_LIST_HEAD(&tr->systems);
9521         INIT_LIST_HEAD(&tr->events);
9522         INIT_LIST_HEAD(&tr->hist_vars);
9523         INIT_LIST_HEAD(&tr->err_log);
9524
9525         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9526                 goto out_free_tr;
9527
9528         if (ftrace_allocate_ftrace_ops(tr) < 0)
9529                 goto out_free_tr;
9530
9531         ftrace_init_trace_array(tr);
9532
9533         init_trace_flags_index(tr);
9534
9535         if (trace_instance_dir) {
9536                 ret = trace_array_create_dir(tr);
9537                 if (ret)
9538                         goto out_free_tr;
9539         } else
9540                 __trace_early_add_events(tr);
9541
9542         list_add(&tr->list, &ftrace_trace_arrays);
9543
9544         tr->ref++;
9545
9546         return tr;
9547
9548  out_free_tr:
9549         ftrace_free_ftrace_ops(tr);
9550         free_trace_buffers(tr);
9551         free_cpumask_var(tr->pipe_cpumask);
9552         free_cpumask_var(tr->tracing_cpumask);
9553         kfree(tr->name);
9554         kfree(tr);
9555
9556         return ERR_PTR(ret);
9557 }
9558
9559 static int instance_mkdir(const char *name)
9560 {
9561         struct trace_array *tr;
9562         int ret;
9563
9564         mutex_lock(&event_mutex);
9565         mutex_lock(&trace_types_lock);
9566
9567         ret = -EEXIST;
9568         if (trace_array_find(name))
9569                 goto out_unlock;
9570
9571         tr = trace_array_create(name);
9572
9573         ret = PTR_ERR_OR_ZERO(tr);
9574
9575 out_unlock:
9576         mutex_unlock(&trace_types_lock);
9577         mutex_unlock(&event_mutex);
9578         return ret;
9579 }
9580
9581 /**
9582  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9583  * @name: The name of the trace array to be looked up/created.
9584  *
9585  * Returns pointer to trace array with given name.
9586  * NULL, if it cannot be created.
9587  *
9588  * NOTE: This function increments the reference counter associated with the
9589  * trace array returned. This makes sure it cannot be freed while in use.
9590  * Use trace_array_put() once the trace array is no longer needed.
9591  * If the trace_array is to be freed, trace_array_destroy() needs to
9592  * be called after the trace_array_put(), or simply let user space delete
9593  * it from the tracefs instances directory. But until the
9594  * trace_array_put() is called, user space can not delete it.
9595  *
9596  */
9597 struct trace_array *trace_array_get_by_name(const char *name)
9598 {
9599         struct trace_array *tr;
9600
9601         mutex_lock(&event_mutex);
9602         mutex_lock(&trace_types_lock);
9603
9604         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9605                 if (tr->name && strcmp(tr->name, name) == 0)
9606                         goto out_unlock;
9607         }
9608
9609         tr = trace_array_create(name);
9610
9611         if (IS_ERR(tr))
9612                 tr = NULL;
9613 out_unlock:
9614         if (tr)
9615                 tr->ref++;
9616
9617         mutex_unlock(&trace_types_lock);
9618         mutex_unlock(&event_mutex);
9619         return tr;
9620 }
9621 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9622
9623 static int __remove_instance(struct trace_array *tr)
9624 {
9625         int i;
9626
9627         /* Reference counter for a newly created trace array = 1. */
9628         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9629                 return -EBUSY;
9630
9631         list_del(&tr->list);
9632
9633         /* Disable all the flags that were enabled coming in */
9634         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9635                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9636                         set_tracer_flag(tr, 1 << i, 0);
9637         }
9638
9639         tracing_set_nop(tr);
9640         clear_ftrace_function_probes(tr);
9641         event_trace_del_tracer(tr);
9642         ftrace_clear_pids(tr);
9643         ftrace_destroy_function_files(tr);
9644         tracefs_remove(tr->dir);
9645         free_percpu(tr->last_func_repeats);
9646         free_trace_buffers(tr);
9647         clear_tracing_err_log(tr);
9648
9649         for (i = 0; i < tr->nr_topts; i++) {
9650                 kfree(tr->topts[i].topts);
9651         }
9652         kfree(tr->topts);
9653
9654         free_cpumask_var(tr->pipe_cpumask);
9655         free_cpumask_var(tr->tracing_cpumask);
9656         kfree(tr->name);
9657         kfree(tr);
9658
9659         return 0;
9660 }
9661
9662 int trace_array_destroy(struct trace_array *this_tr)
9663 {
9664         struct trace_array *tr;
9665         int ret;
9666
9667         if (!this_tr)
9668                 return -EINVAL;
9669
9670         mutex_lock(&event_mutex);
9671         mutex_lock(&trace_types_lock);
9672
9673         ret = -ENODEV;
9674
9675         /* Making sure trace array exists before destroying it. */
9676         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9677                 if (tr == this_tr) {
9678                         ret = __remove_instance(tr);
9679                         break;
9680                 }
9681         }
9682
9683         mutex_unlock(&trace_types_lock);
9684         mutex_unlock(&event_mutex);
9685
9686         return ret;
9687 }
9688 EXPORT_SYMBOL_GPL(trace_array_destroy);
9689
9690 static int instance_rmdir(const char *name)
9691 {
9692         struct trace_array *tr;
9693         int ret;
9694
9695         mutex_lock(&event_mutex);
9696         mutex_lock(&trace_types_lock);
9697
9698         ret = -ENODEV;
9699         tr = trace_array_find(name);
9700         if (tr)
9701                 ret = __remove_instance(tr);
9702
9703         mutex_unlock(&trace_types_lock);
9704         mutex_unlock(&event_mutex);
9705
9706         return ret;
9707 }
9708
9709 static __init void create_trace_instances(struct dentry *d_tracer)
9710 {
9711         struct trace_array *tr;
9712
9713         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9714                                                          instance_mkdir,
9715                                                          instance_rmdir);
9716         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9717                 return;
9718
9719         mutex_lock(&event_mutex);
9720         mutex_lock(&trace_types_lock);
9721
9722         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9723                 if (!tr->name)
9724                         continue;
9725                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9726                              "Failed to create instance directory\n"))
9727                         break;
9728         }
9729
9730         mutex_unlock(&trace_types_lock);
9731         mutex_unlock(&event_mutex);
9732 }
9733
9734 static void
9735 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9736 {
9737         struct trace_event_file *file;
9738         int cpu;
9739
9740         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9741                         tr, &show_traces_fops);
9742
9743         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9744                         tr, &set_tracer_fops);
9745
9746         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9747                           tr, &tracing_cpumask_fops);
9748
9749         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9750                           tr, &tracing_iter_fops);
9751
9752         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9753                           tr, &tracing_fops);
9754
9755         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9756                           tr, &tracing_pipe_fops);
9757
9758         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9759                           tr, &tracing_entries_fops);
9760
9761         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9762                           tr, &tracing_total_entries_fops);
9763
9764         trace_create_file("free_buffer", 0200, d_tracer,
9765                           tr, &tracing_free_buffer_fops);
9766
9767         trace_create_file("trace_marker", 0220, d_tracer,
9768                           tr, &tracing_mark_fops);
9769
9770         file = __find_event_file(tr, "ftrace", "print");
9771         if (file && file->ef)
9772                 eventfs_add_file("trigger", TRACE_MODE_WRITE, file->ef,
9773                                   file, &event_trigger_fops);
9774         tr->trace_marker_file = file;
9775
9776         trace_create_file("trace_marker_raw", 0220, d_tracer,
9777                           tr, &tracing_mark_raw_fops);
9778
9779         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9780                           &trace_clock_fops);
9781
9782         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9783                           tr, &rb_simple_fops);
9784
9785         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9786                           &trace_time_stamp_mode_fops);
9787
9788         tr->buffer_percent = 50;
9789
9790         trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9791                         tr, &buffer_percent_fops);
9792
9793         create_trace_options_dir(tr);
9794
9795 #ifdef CONFIG_TRACER_MAX_TRACE
9796         trace_create_maxlat_file(tr, d_tracer);
9797 #endif
9798
9799         if (ftrace_create_function_files(tr, d_tracer))
9800                 MEM_FAIL(1, "Could not allocate function filter files");
9801
9802 #ifdef CONFIG_TRACER_SNAPSHOT
9803         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9804                           tr, &snapshot_fops);
9805 #endif
9806
9807         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9808                           tr, &tracing_err_log_fops);
9809
9810         for_each_tracing_cpu(cpu)
9811                 tracing_init_tracefs_percpu(tr, cpu);
9812
9813         ftrace_init_tracefs(tr, d_tracer);
9814 }
9815
9816 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9817 {
9818         struct vfsmount *mnt;
9819         struct file_system_type *type;
9820
9821         /*
9822          * To maintain backward compatibility for tools that mount
9823          * debugfs to get to the tracing facility, tracefs is automatically
9824          * mounted to the debugfs/tracing directory.
9825          */
9826         type = get_fs_type("tracefs");
9827         if (!type)
9828                 return NULL;
9829         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9830         put_filesystem(type);
9831         if (IS_ERR(mnt))
9832                 return NULL;
9833         mntget(mnt);
9834
9835         return mnt;
9836 }
9837
9838 /**
9839  * tracing_init_dentry - initialize top level trace array
9840  *
9841  * This is called when creating files or directories in the tracing
9842  * directory. It is called via fs_initcall() by any of the boot up code
9843  * and expects to return the dentry of the top level tracing directory.
9844  */
9845 int tracing_init_dentry(void)
9846 {
9847         struct trace_array *tr = &global_trace;
9848
9849         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9850                 pr_warn("Tracing disabled due to lockdown\n");
9851                 return -EPERM;
9852         }
9853
9854         /* The top level trace array uses  NULL as parent */
9855         if (tr->dir)
9856                 return 0;
9857
9858         if (WARN_ON(!tracefs_initialized()))
9859                 return -ENODEV;
9860
9861         /*
9862          * As there may still be users that expect the tracing
9863          * files to exist in debugfs/tracing, we must automount
9864          * the tracefs file system there, so older tools still
9865          * work with the newer kernel.
9866          */
9867         tr->dir = debugfs_create_automount("tracing", NULL,
9868                                            trace_automount, NULL);
9869
9870         return 0;
9871 }
9872
9873 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9874 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9875
9876 static struct workqueue_struct *eval_map_wq __initdata;
9877 static struct work_struct eval_map_work __initdata;
9878 static struct work_struct tracerfs_init_work __initdata;
9879
9880 static void __init eval_map_work_func(struct work_struct *work)
9881 {
9882         int len;
9883
9884         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9885         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9886 }
9887
9888 static int __init trace_eval_init(void)
9889 {
9890         INIT_WORK(&eval_map_work, eval_map_work_func);
9891
9892         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9893         if (!eval_map_wq) {
9894                 pr_err("Unable to allocate eval_map_wq\n");
9895                 /* Do work here */
9896                 eval_map_work_func(&eval_map_work);
9897                 return -ENOMEM;
9898         }
9899
9900         queue_work(eval_map_wq, &eval_map_work);
9901         return 0;
9902 }
9903
9904 subsys_initcall(trace_eval_init);
9905
9906 static int __init trace_eval_sync(void)
9907 {
9908         /* Make sure the eval map updates are finished */
9909         if (eval_map_wq)
9910                 destroy_workqueue(eval_map_wq);
9911         return 0;
9912 }
9913
9914 late_initcall_sync(trace_eval_sync);
9915
9916
9917 #ifdef CONFIG_MODULES
9918 static void trace_module_add_evals(struct module *mod)
9919 {
9920         if (!mod->num_trace_evals)
9921                 return;
9922
9923         /*
9924          * Modules with bad taint do not have events created, do
9925          * not bother with enums either.
9926          */
9927         if (trace_module_has_bad_taint(mod))
9928                 return;
9929
9930         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9931 }
9932
9933 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9934 static void trace_module_remove_evals(struct module *mod)
9935 {
9936         union trace_eval_map_item *map;
9937         union trace_eval_map_item **last = &trace_eval_maps;
9938
9939         if (!mod->num_trace_evals)
9940                 return;
9941
9942         mutex_lock(&trace_eval_mutex);
9943
9944         map = trace_eval_maps;
9945
9946         while (map) {
9947                 if (map->head.mod == mod)
9948                         break;
9949                 map = trace_eval_jmp_to_tail(map);
9950                 last = &map->tail.next;
9951                 map = map->tail.next;
9952         }
9953         if (!map)
9954                 goto out;
9955
9956         *last = trace_eval_jmp_to_tail(map)->tail.next;
9957         kfree(map);
9958  out:
9959         mutex_unlock(&trace_eval_mutex);
9960 }
9961 #else
9962 static inline void trace_module_remove_evals(struct module *mod) { }
9963 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9964
9965 static int trace_module_notify(struct notifier_block *self,
9966                                unsigned long val, void *data)
9967 {
9968         struct module *mod = data;
9969
9970         switch (val) {
9971         case MODULE_STATE_COMING:
9972                 trace_module_add_evals(mod);
9973                 break;
9974         case MODULE_STATE_GOING:
9975                 trace_module_remove_evals(mod);
9976                 break;
9977         }
9978
9979         return NOTIFY_OK;
9980 }
9981
9982 static struct notifier_block trace_module_nb = {
9983         .notifier_call = trace_module_notify,
9984         .priority = 0,
9985 };
9986 #endif /* CONFIG_MODULES */
9987
9988 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9989 {
9990
9991         event_trace_init();
9992
9993         init_tracer_tracefs(&global_trace, NULL);
9994         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9995
9996         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9997                         &global_trace, &tracing_thresh_fops);
9998
9999         trace_create_file("README", TRACE_MODE_READ, NULL,
10000                         NULL, &tracing_readme_fops);
10001
10002         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10003                         NULL, &tracing_saved_cmdlines_fops);
10004
10005         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10006                           NULL, &tracing_saved_cmdlines_size_fops);
10007
10008         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10009                         NULL, &tracing_saved_tgids_fops);
10010
10011         trace_create_eval_file(NULL);
10012
10013 #ifdef CONFIG_MODULES
10014         register_module_notifier(&trace_module_nb);
10015 #endif
10016
10017 #ifdef CONFIG_DYNAMIC_FTRACE
10018         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10019                         NULL, &tracing_dyn_info_fops);
10020 #endif
10021
10022         create_trace_instances(NULL);
10023
10024         update_tracer_options(&global_trace);
10025 }
10026
10027 static __init int tracer_init_tracefs(void)
10028 {
10029         int ret;
10030
10031         trace_access_lock_init();
10032
10033         ret = tracing_init_dentry();
10034         if (ret)
10035                 return 0;
10036
10037         if (eval_map_wq) {
10038                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10039                 queue_work(eval_map_wq, &tracerfs_init_work);
10040         } else {
10041                 tracer_init_tracefs_work_func(NULL);
10042         }
10043
10044         rv_init_interface();
10045
10046         return 0;
10047 }
10048
10049 fs_initcall(tracer_init_tracefs);
10050
10051 static int trace_die_panic_handler(struct notifier_block *self,
10052                                 unsigned long ev, void *unused);
10053
10054 static struct notifier_block trace_panic_notifier = {
10055         .notifier_call = trace_die_panic_handler,
10056         .priority = INT_MAX - 1,
10057 };
10058
10059 static struct notifier_block trace_die_notifier = {
10060         .notifier_call = trace_die_panic_handler,
10061         .priority = INT_MAX - 1,
10062 };
10063
10064 /*
10065  * The idea is to execute the following die/panic callback early, in order
10066  * to avoid showing irrelevant information in the trace (like other panic
10067  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10068  * warnings get disabled (to prevent potential log flooding).
10069  */
10070 static int trace_die_panic_handler(struct notifier_block *self,
10071                                 unsigned long ev, void *unused)
10072 {
10073         if (!ftrace_dump_on_oops)
10074                 return NOTIFY_DONE;
10075
10076         /* The die notifier requires DIE_OOPS to trigger */
10077         if (self == &trace_die_notifier && ev != DIE_OOPS)
10078                 return NOTIFY_DONE;
10079
10080         ftrace_dump(ftrace_dump_on_oops);
10081
10082         return NOTIFY_DONE;
10083 }
10084
10085 /*
10086  * printk is set to max of 1024, we really don't need it that big.
10087  * Nothing should be printing 1000 characters anyway.
10088  */
10089 #define TRACE_MAX_PRINT         1000
10090
10091 /*
10092  * Define here KERN_TRACE so that we have one place to modify
10093  * it if we decide to change what log level the ftrace dump
10094  * should be at.
10095  */
10096 #define KERN_TRACE              KERN_EMERG
10097
10098 void
10099 trace_printk_seq(struct trace_seq *s)
10100 {
10101         /* Probably should print a warning here. */
10102         if (s->seq.len >= TRACE_MAX_PRINT)
10103                 s->seq.len = TRACE_MAX_PRINT;
10104
10105         /*
10106          * More paranoid code. Although the buffer size is set to
10107          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10108          * an extra layer of protection.
10109          */
10110         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10111                 s->seq.len = s->seq.size - 1;
10112
10113         /* should be zero ended, but we are paranoid. */
10114         s->buffer[s->seq.len] = 0;
10115
10116         printk(KERN_TRACE "%s", s->buffer);
10117
10118         trace_seq_init(s);
10119 }
10120
10121 void trace_init_global_iter(struct trace_iterator *iter)
10122 {
10123         iter->tr = &global_trace;
10124         iter->trace = iter->tr->current_trace;
10125         iter->cpu_file = RING_BUFFER_ALL_CPUS;
10126         iter->array_buffer = &global_trace.array_buffer;
10127
10128         if (iter->trace && iter->trace->open)
10129                 iter->trace->open(iter);
10130
10131         /* Annotate start of buffers if we had overruns */
10132         if (ring_buffer_overruns(iter->array_buffer->buffer))
10133                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10134
10135         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10136         if (trace_clocks[iter->tr->clock_id].in_ns)
10137                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10138
10139         /* Can not use kmalloc for iter.temp and iter.fmt */
10140         iter->temp = static_temp_buf;
10141         iter->temp_size = STATIC_TEMP_BUF_SIZE;
10142         iter->fmt = static_fmt_buf;
10143         iter->fmt_size = STATIC_FMT_BUF_SIZE;
10144 }
10145
10146 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10147 {
10148         /* use static because iter can be a bit big for the stack */
10149         static struct trace_iterator iter;
10150         static atomic_t dump_running;
10151         struct trace_array *tr = &global_trace;
10152         unsigned int old_userobj;
10153         unsigned long flags;
10154         int cnt = 0, cpu;
10155
10156         /* Only allow one dump user at a time. */
10157         if (atomic_inc_return(&dump_running) != 1) {
10158                 atomic_dec(&dump_running);
10159                 return;
10160         }
10161
10162         /*
10163          * Always turn off tracing when we dump.
10164          * We don't need to show trace output of what happens
10165          * between multiple crashes.
10166          *
10167          * If the user does a sysrq-z, then they can re-enable
10168          * tracing with echo 1 > tracing_on.
10169          */
10170         tracing_off();
10171
10172         local_irq_save(flags);
10173
10174         /* Simulate the iterator */
10175         trace_init_global_iter(&iter);
10176
10177         for_each_tracing_cpu(cpu) {
10178                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10179         }
10180
10181         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10182
10183         /* don't look at user memory in panic mode */
10184         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10185
10186         switch (oops_dump_mode) {
10187         case DUMP_ALL:
10188                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10189                 break;
10190         case DUMP_ORIG:
10191                 iter.cpu_file = raw_smp_processor_id();
10192                 break;
10193         case DUMP_NONE:
10194                 goto out_enable;
10195         default:
10196                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10197                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10198         }
10199
10200         printk(KERN_TRACE "Dumping ftrace buffer:\n");
10201
10202         /* Did function tracer already get disabled? */
10203         if (ftrace_is_dead()) {
10204                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10205                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10206         }
10207
10208         /*
10209          * We need to stop all tracing on all CPUS to read
10210          * the next buffer. This is a bit expensive, but is
10211          * not done often. We fill all what we can read,
10212          * and then release the locks again.
10213          */
10214
10215         while (!trace_empty(&iter)) {
10216
10217                 if (!cnt)
10218                         printk(KERN_TRACE "---------------------------------\n");
10219
10220                 cnt++;
10221
10222                 trace_iterator_reset(&iter);
10223                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10224
10225                 if (trace_find_next_entry_inc(&iter) != NULL) {
10226                         int ret;
10227
10228                         ret = print_trace_line(&iter);
10229                         if (ret != TRACE_TYPE_NO_CONSUME)
10230                                 trace_consume(&iter);
10231                 }
10232                 touch_nmi_watchdog();
10233
10234                 trace_printk_seq(&iter.seq);
10235         }
10236
10237         if (!cnt)
10238                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10239         else
10240                 printk(KERN_TRACE "---------------------------------\n");
10241
10242  out_enable:
10243         tr->trace_flags |= old_userobj;
10244
10245         for_each_tracing_cpu(cpu) {
10246                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10247         }
10248         atomic_dec(&dump_running);
10249         local_irq_restore(flags);
10250 }
10251 EXPORT_SYMBOL_GPL(ftrace_dump);
10252
10253 #define WRITE_BUFSIZE  4096
10254
10255 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10256                                 size_t count, loff_t *ppos,
10257                                 int (*createfn)(const char *))
10258 {
10259         char *kbuf, *buf, *tmp;
10260         int ret = 0;
10261         size_t done = 0;
10262         size_t size;
10263
10264         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10265         if (!kbuf)
10266                 return -ENOMEM;
10267
10268         while (done < count) {
10269                 size = count - done;
10270
10271                 if (size >= WRITE_BUFSIZE)
10272                         size = WRITE_BUFSIZE - 1;
10273
10274                 if (copy_from_user(kbuf, buffer + done, size)) {
10275                         ret = -EFAULT;
10276                         goto out;
10277                 }
10278                 kbuf[size] = '\0';
10279                 buf = kbuf;
10280                 do {
10281                         tmp = strchr(buf, '\n');
10282                         if (tmp) {
10283                                 *tmp = '\0';
10284                                 size = tmp - buf + 1;
10285                         } else {
10286                                 size = strlen(buf);
10287                                 if (done + size < count) {
10288                                         if (buf != kbuf)
10289                                                 break;
10290                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10291                                         pr_warn("Line length is too long: Should be less than %d\n",
10292                                                 WRITE_BUFSIZE - 2);
10293                                         ret = -EINVAL;
10294                                         goto out;
10295                                 }
10296                         }
10297                         done += size;
10298
10299                         /* Remove comments */
10300                         tmp = strchr(buf, '#');
10301
10302                         if (tmp)
10303                                 *tmp = '\0';
10304
10305                         ret = createfn(buf);
10306                         if (ret)
10307                                 goto out;
10308                         buf += size;
10309
10310                 } while (done < count);
10311         }
10312         ret = done;
10313
10314 out:
10315         kfree(kbuf);
10316
10317         return ret;
10318 }
10319
10320 #ifdef CONFIG_TRACER_MAX_TRACE
10321 __init static bool tr_needs_alloc_snapshot(const char *name)
10322 {
10323         char *test;
10324         int len = strlen(name);
10325         bool ret;
10326
10327         if (!boot_snapshot_index)
10328                 return false;
10329
10330         if (strncmp(name, boot_snapshot_info, len) == 0 &&
10331             boot_snapshot_info[len] == '\t')
10332                 return true;
10333
10334         test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10335         if (!test)
10336                 return false;
10337
10338         sprintf(test, "\t%s\t", name);
10339         ret = strstr(boot_snapshot_info, test) == NULL;
10340         kfree(test);
10341         return ret;
10342 }
10343
10344 __init static void do_allocate_snapshot(const char *name)
10345 {
10346         if (!tr_needs_alloc_snapshot(name))
10347                 return;
10348
10349         /*
10350          * When allocate_snapshot is set, the next call to
10351          * allocate_trace_buffers() (called by trace_array_get_by_name())
10352          * will allocate the snapshot buffer. That will alse clear
10353          * this flag.
10354          */
10355         allocate_snapshot = true;
10356 }
10357 #else
10358 static inline void do_allocate_snapshot(const char *name) { }
10359 #endif
10360
10361 __init static void enable_instances(void)
10362 {
10363         struct trace_array *tr;
10364         char *curr_str;
10365         char *str;
10366         char *tok;
10367
10368         /* A tab is always appended */
10369         boot_instance_info[boot_instance_index - 1] = '\0';
10370         str = boot_instance_info;
10371
10372         while ((curr_str = strsep(&str, "\t"))) {
10373
10374                 tok = strsep(&curr_str, ",");
10375
10376                 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10377                         do_allocate_snapshot(tok);
10378
10379                 tr = trace_array_get_by_name(tok);
10380                 if (!tr) {
10381                         pr_warn("Failed to create instance buffer %s\n", curr_str);
10382                         continue;
10383                 }
10384                 /* Allow user space to delete it */
10385                 trace_array_put(tr);
10386
10387                 while ((tok = strsep(&curr_str, ","))) {
10388                         early_enable_events(tr, tok, true);
10389                 }
10390         }
10391 }
10392
10393 __init static int tracer_alloc_buffers(void)
10394 {
10395         int ring_buf_size;
10396         int ret = -ENOMEM;
10397
10398
10399         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10400                 pr_warn("Tracing disabled due to lockdown\n");
10401                 return -EPERM;
10402         }
10403
10404         /*
10405          * Make sure we don't accidentally add more trace options
10406          * than we have bits for.
10407          */
10408         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10409
10410         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10411                 goto out;
10412
10413         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10414                 goto out_free_buffer_mask;
10415
10416         /* Only allocate trace_printk buffers if a trace_printk exists */
10417         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10418                 /* Must be called before global_trace.buffer is allocated */
10419                 trace_printk_init_buffers();
10420
10421         /* To save memory, keep the ring buffer size to its minimum */
10422         if (ring_buffer_expanded)
10423                 ring_buf_size = trace_buf_size;
10424         else
10425                 ring_buf_size = 1;
10426
10427         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10428         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10429
10430         raw_spin_lock_init(&global_trace.start_lock);
10431
10432         /*
10433          * The prepare callbacks allocates some memory for the ring buffer. We
10434          * don't free the buffer if the CPU goes down. If we were to free
10435          * the buffer, then the user would lose any trace that was in the
10436          * buffer. The memory will be removed once the "instance" is removed.
10437          */
10438         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10439                                       "trace/RB:prepare", trace_rb_cpu_prepare,
10440                                       NULL);
10441         if (ret < 0)
10442                 goto out_free_cpumask;
10443         /* Used for event triggers */
10444         ret = -ENOMEM;
10445         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10446         if (!temp_buffer)
10447                 goto out_rm_hp_state;
10448
10449         if (trace_create_savedcmd() < 0)
10450                 goto out_free_temp_buffer;
10451
10452         if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10453                 goto out_free_savedcmd;
10454
10455         /* TODO: make the number of buffers hot pluggable with CPUS */
10456         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10457                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10458                 goto out_free_pipe_cpumask;
10459         }
10460         if (global_trace.buffer_disabled)
10461                 tracing_off();
10462
10463         if (trace_boot_clock) {
10464                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10465                 if (ret < 0)
10466                         pr_warn("Trace clock %s not defined, going back to default\n",
10467                                 trace_boot_clock);
10468         }
10469
10470         /*
10471          * register_tracer() might reference current_trace, so it
10472          * needs to be set before we register anything. This is
10473          * just a bootstrap of current_trace anyway.
10474          */
10475         global_trace.current_trace = &nop_trace;
10476
10477         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10478
10479         ftrace_init_global_array_ops(&global_trace);
10480
10481         init_trace_flags_index(&global_trace);
10482
10483         register_tracer(&nop_trace);
10484
10485         /* Function tracing may start here (via kernel command line) */
10486         init_function_trace();
10487
10488         /* All seems OK, enable tracing */
10489         tracing_disabled = 0;
10490
10491         atomic_notifier_chain_register(&panic_notifier_list,
10492                                        &trace_panic_notifier);
10493
10494         register_die_notifier(&trace_die_notifier);
10495
10496         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10497
10498         INIT_LIST_HEAD(&global_trace.systems);
10499         INIT_LIST_HEAD(&global_trace.events);
10500         INIT_LIST_HEAD(&global_trace.hist_vars);
10501         INIT_LIST_HEAD(&global_trace.err_log);
10502         list_add(&global_trace.list, &ftrace_trace_arrays);
10503
10504         apply_trace_boot_options();
10505
10506         register_snapshot_cmd();
10507
10508         test_can_verify();
10509
10510         return 0;
10511
10512 out_free_pipe_cpumask:
10513         free_cpumask_var(global_trace.pipe_cpumask);
10514 out_free_savedcmd:
10515         free_saved_cmdlines_buffer(savedcmd);
10516 out_free_temp_buffer:
10517         ring_buffer_free(temp_buffer);
10518 out_rm_hp_state:
10519         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10520 out_free_cpumask:
10521         free_cpumask_var(global_trace.tracing_cpumask);
10522 out_free_buffer_mask:
10523         free_cpumask_var(tracing_buffer_mask);
10524 out:
10525         return ret;
10526 }
10527
10528 void __init ftrace_boot_snapshot(void)
10529 {
10530 #ifdef CONFIG_TRACER_MAX_TRACE
10531         struct trace_array *tr;
10532
10533         if (!snapshot_at_boot)
10534                 return;
10535
10536         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10537                 if (!tr->allocated_snapshot)
10538                         continue;
10539
10540                 tracing_snapshot_instance(tr);
10541                 trace_array_puts(tr, "** Boot snapshot taken **\n");
10542         }
10543 #endif
10544 }
10545
10546 void __init early_trace_init(void)
10547 {
10548         if (tracepoint_printk) {
10549                 tracepoint_print_iter =
10550                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10551                 if (MEM_FAIL(!tracepoint_print_iter,
10552                              "Failed to allocate trace iterator\n"))
10553                         tracepoint_printk = 0;
10554                 else
10555                         static_key_enable(&tracepoint_printk_key.key);
10556         }
10557         tracer_alloc_buffers();
10558
10559         init_events();
10560 }
10561
10562 void __init trace_init(void)
10563 {
10564         trace_event_init();
10565
10566         if (boot_instance_index)
10567                 enable_instances();
10568 }
10569
10570 __init static void clear_boot_tracer(void)
10571 {
10572         /*
10573          * The default tracer at boot buffer is an init section.
10574          * This function is called in lateinit. If we did not
10575          * find the boot tracer, then clear it out, to prevent
10576          * later registration from accessing the buffer that is
10577          * about to be freed.
10578          */
10579         if (!default_bootup_tracer)
10580                 return;
10581
10582         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10583                default_bootup_tracer);
10584         default_bootup_tracer = NULL;
10585 }
10586
10587 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10588 __init static void tracing_set_default_clock(void)
10589 {
10590         /* sched_clock_stable() is determined in late_initcall */
10591         if (!trace_boot_clock && !sched_clock_stable()) {
10592                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10593                         pr_warn("Can not set tracing clock due to lockdown\n");
10594                         return;
10595                 }
10596
10597                 printk(KERN_WARNING
10598                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10599                        "If you want to keep using the local clock, then add:\n"
10600                        "  \"trace_clock=local\"\n"
10601                        "on the kernel command line\n");
10602                 tracing_set_clock(&global_trace, "global");
10603         }
10604 }
10605 #else
10606 static inline void tracing_set_default_clock(void) { }
10607 #endif
10608
10609 __init static int late_trace_init(void)
10610 {
10611         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10612                 static_key_disable(&tracepoint_printk_key.key);
10613                 tracepoint_printk = 0;
10614         }
10615
10616         tracing_set_default_clock();
10617         clear_boot_tracer();
10618         return 0;
10619 }
10620
10621 late_initcall_sync(late_trace_init);