Merge tag 'objtool_urgent_for_v6.5_rc2' of git://git.kernel.org/pub/scm/linux/kernel...
[platform/kernel/linux-starfive.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
53
54 #include "trace.h"
55 #include "trace_output.h"
56
57 /*
58  * On boot up, the ring buffer is set to the minimum size, so that
59  * we do not waste memory on systems that are not using tracing.
60  */
61 bool ring_buffer_expanded;
62
63 #ifdef CONFIG_FTRACE_STARTUP_TEST
64 /*
65  * We need to change this state when a selftest is running.
66  * A selftest will lurk into the ring-buffer to count the
67  * entries inserted during the selftest although some concurrent
68  * insertions into the ring-buffer such as trace_printk could occurred
69  * at the same time, giving false positive or negative results.
70  */
71 static bool __read_mostly tracing_selftest_running;
72
73 /*
74  * If boot-time tracing including tracers/events via kernel cmdline
75  * is running, we do not want to run SELFTEST.
76  */
77 bool __read_mostly tracing_selftest_disabled;
78
79 void __init disable_tracing_selftest(const char *reason)
80 {
81         if (!tracing_selftest_disabled) {
82                 tracing_selftest_disabled = true;
83                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
84         }
85 }
86 #else
87 #define tracing_selftest_running        0
88 #define tracing_selftest_disabled       0
89 #endif
90
91 /* Pipe tracepoints to printk */
92 static struct trace_iterator *tracepoint_print_iter;
93 int tracepoint_printk;
94 static bool tracepoint_printk_stop_on_boot __initdata;
95 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
96
97 /* For tracers that don't implement custom flags */
98 static struct tracer_opt dummy_tracer_opt[] = {
99         { }
100 };
101
102 static int
103 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
104 {
105         return 0;
106 }
107
108 /*
109  * To prevent the comm cache from being overwritten when no
110  * tracing is active, only save the comm when a trace event
111  * occurred.
112  */
113 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
114
115 /*
116  * Kill all tracing for good (never come back).
117  * It is initialized to 1 but will turn to zero if the initialization
118  * of the tracer is successful. But that is the only place that sets
119  * this back to zero.
120  */
121 static int tracing_disabled = 1;
122
123 cpumask_var_t __read_mostly     tracing_buffer_mask;
124
125 /*
126  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
127  *
128  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
129  * is set, then ftrace_dump is called. This will output the contents
130  * of the ftrace buffers to the console.  This is very useful for
131  * capturing traces that lead to crashes and outputing it to a
132  * serial console.
133  *
134  * It is default off, but you can enable it with either specifying
135  * "ftrace_dump_on_oops" in the kernel command line, or setting
136  * /proc/sys/kernel/ftrace_dump_on_oops
137  * Set 1 if you want to dump buffers of all CPUs
138  * Set 2 if you want to dump the buffer of the CPU that triggered oops
139  */
140
141 enum ftrace_dump_mode ftrace_dump_on_oops;
142
143 /* When set, tracing will stop when a WARN*() is hit */
144 int __disable_trace_on_warning;
145
146 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
147 /* Map of enums to their values, for "eval_map" file */
148 struct trace_eval_map_head {
149         struct module                   *mod;
150         unsigned long                   length;
151 };
152
153 union trace_eval_map_item;
154
155 struct trace_eval_map_tail {
156         /*
157          * "end" is first and points to NULL as it must be different
158          * than "mod" or "eval_string"
159          */
160         union trace_eval_map_item       *next;
161         const char                      *end;   /* points to NULL */
162 };
163
164 static DEFINE_MUTEX(trace_eval_mutex);
165
166 /*
167  * The trace_eval_maps are saved in an array with two extra elements,
168  * one at the beginning, and one at the end. The beginning item contains
169  * the count of the saved maps (head.length), and the module they
170  * belong to if not built in (head.mod). The ending item contains a
171  * pointer to the next array of saved eval_map items.
172  */
173 union trace_eval_map_item {
174         struct trace_eval_map           map;
175         struct trace_eval_map_head      head;
176         struct trace_eval_map_tail      tail;
177 };
178
179 static union trace_eval_map_item *trace_eval_maps;
180 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
181
182 int tracing_set_tracer(struct trace_array *tr, const char *buf);
183 static void ftrace_trace_userstack(struct trace_array *tr,
184                                    struct trace_buffer *buffer,
185                                    unsigned int trace_ctx);
186
187 #define MAX_TRACER_SIZE         100
188 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
189 static char *default_bootup_tracer;
190
191 static bool allocate_snapshot;
192 static bool snapshot_at_boot;
193
194 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
195 static int boot_instance_index;
196
197 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
198 static int boot_snapshot_index;
199
200 static int __init set_cmdline_ftrace(char *str)
201 {
202         strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
203         default_bootup_tracer = bootup_tracer_buf;
204         /* We are using ftrace early, expand it */
205         ring_buffer_expanded = true;
206         return 1;
207 }
208 __setup("ftrace=", set_cmdline_ftrace);
209
210 static int __init set_ftrace_dump_on_oops(char *str)
211 {
212         if (*str++ != '=' || !*str || !strcmp("1", str)) {
213                 ftrace_dump_on_oops = DUMP_ALL;
214                 return 1;
215         }
216
217         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
218                 ftrace_dump_on_oops = DUMP_ORIG;
219                 return 1;
220         }
221
222         return 0;
223 }
224 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
225
226 static int __init stop_trace_on_warning(char *str)
227 {
228         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
229                 __disable_trace_on_warning = 1;
230         return 1;
231 }
232 __setup("traceoff_on_warning", stop_trace_on_warning);
233
234 static int __init boot_alloc_snapshot(char *str)
235 {
236         char *slot = boot_snapshot_info + boot_snapshot_index;
237         int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
238         int ret;
239
240         if (str[0] == '=') {
241                 str++;
242                 if (strlen(str) >= left)
243                         return -1;
244
245                 ret = snprintf(slot, left, "%s\t", str);
246                 boot_snapshot_index += ret;
247         } else {
248                 allocate_snapshot = true;
249                 /* We also need the main ring buffer expanded */
250                 ring_buffer_expanded = true;
251         }
252         return 1;
253 }
254 __setup("alloc_snapshot", boot_alloc_snapshot);
255
256
257 static int __init boot_snapshot(char *str)
258 {
259         snapshot_at_boot = true;
260         boot_alloc_snapshot(str);
261         return 1;
262 }
263 __setup("ftrace_boot_snapshot", boot_snapshot);
264
265
266 static int __init boot_instance(char *str)
267 {
268         char *slot = boot_instance_info + boot_instance_index;
269         int left = sizeof(boot_instance_info) - boot_instance_index;
270         int ret;
271
272         if (strlen(str) >= left)
273                 return -1;
274
275         ret = snprintf(slot, left, "%s\t", str);
276         boot_instance_index += ret;
277
278         return 1;
279 }
280 __setup("trace_instance=", boot_instance);
281
282
283 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
284
285 static int __init set_trace_boot_options(char *str)
286 {
287         strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
288         return 1;
289 }
290 __setup("trace_options=", set_trace_boot_options);
291
292 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
293 static char *trace_boot_clock __initdata;
294
295 static int __init set_trace_boot_clock(char *str)
296 {
297         strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
298         trace_boot_clock = trace_boot_clock_buf;
299         return 1;
300 }
301 __setup("trace_clock=", set_trace_boot_clock);
302
303 static int __init set_tracepoint_printk(char *str)
304 {
305         /* Ignore the "tp_printk_stop_on_boot" param */
306         if (*str == '_')
307                 return 0;
308
309         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
310                 tracepoint_printk = 1;
311         return 1;
312 }
313 __setup("tp_printk", set_tracepoint_printk);
314
315 static int __init set_tracepoint_printk_stop(char *str)
316 {
317         tracepoint_printk_stop_on_boot = true;
318         return 1;
319 }
320 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
321
322 unsigned long long ns2usecs(u64 nsec)
323 {
324         nsec += 500;
325         do_div(nsec, 1000);
326         return nsec;
327 }
328
329 static void
330 trace_process_export(struct trace_export *export,
331                struct ring_buffer_event *event, int flag)
332 {
333         struct trace_entry *entry;
334         unsigned int size = 0;
335
336         if (export->flags & flag) {
337                 entry = ring_buffer_event_data(event);
338                 size = ring_buffer_event_length(event);
339                 export->write(export, entry, size);
340         }
341 }
342
343 static DEFINE_MUTEX(ftrace_export_lock);
344
345 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
346
347 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
348 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
349 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
350
351 static inline void ftrace_exports_enable(struct trace_export *export)
352 {
353         if (export->flags & TRACE_EXPORT_FUNCTION)
354                 static_branch_inc(&trace_function_exports_enabled);
355
356         if (export->flags & TRACE_EXPORT_EVENT)
357                 static_branch_inc(&trace_event_exports_enabled);
358
359         if (export->flags & TRACE_EXPORT_MARKER)
360                 static_branch_inc(&trace_marker_exports_enabled);
361 }
362
363 static inline void ftrace_exports_disable(struct trace_export *export)
364 {
365         if (export->flags & TRACE_EXPORT_FUNCTION)
366                 static_branch_dec(&trace_function_exports_enabled);
367
368         if (export->flags & TRACE_EXPORT_EVENT)
369                 static_branch_dec(&trace_event_exports_enabled);
370
371         if (export->flags & TRACE_EXPORT_MARKER)
372                 static_branch_dec(&trace_marker_exports_enabled);
373 }
374
375 static void ftrace_exports(struct ring_buffer_event *event, int flag)
376 {
377         struct trace_export *export;
378
379         preempt_disable_notrace();
380
381         export = rcu_dereference_raw_check(ftrace_exports_list);
382         while (export) {
383                 trace_process_export(export, event, flag);
384                 export = rcu_dereference_raw_check(export->next);
385         }
386
387         preempt_enable_notrace();
388 }
389
390 static inline void
391 add_trace_export(struct trace_export **list, struct trace_export *export)
392 {
393         rcu_assign_pointer(export->next, *list);
394         /*
395          * We are entering export into the list but another
396          * CPU might be walking that list. We need to make sure
397          * the export->next pointer is valid before another CPU sees
398          * the export pointer included into the list.
399          */
400         rcu_assign_pointer(*list, export);
401 }
402
403 static inline int
404 rm_trace_export(struct trace_export **list, struct trace_export *export)
405 {
406         struct trace_export **p;
407
408         for (p = list; *p != NULL; p = &(*p)->next)
409                 if (*p == export)
410                         break;
411
412         if (*p != export)
413                 return -1;
414
415         rcu_assign_pointer(*p, (*p)->next);
416
417         return 0;
418 }
419
420 static inline void
421 add_ftrace_export(struct trace_export **list, struct trace_export *export)
422 {
423         ftrace_exports_enable(export);
424
425         add_trace_export(list, export);
426 }
427
428 static inline int
429 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
430 {
431         int ret;
432
433         ret = rm_trace_export(list, export);
434         ftrace_exports_disable(export);
435
436         return ret;
437 }
438
439 int register_ftrace_export(struct trace_export *export)
440 {
441         if (WARN_ON_ONCE(!export->write))
442                 return -1;
443
444         mutex_lock(&ftrace_export_lock);
445
446         add_ftrace_export(&ftrace_exports_list, export);
447
448         mutex_unlock(&ftrace_export_lock);
449
450         return 0;
451 }
452 EXPORT_SYMBOL_GPL(register_ftrace_export);
453
454 int unregister_ftrace_export(struct trace_export *export)
455 {
456         int ret;
457
458         mutex_lock(&ftrace_export_lock);
459
460         ret = rm_ftrace_export(&ftrace_exports_list, export);
461
462         mutex_unlock(&ftrace_export_lock);
463
464         return ret;
465 }
466 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
467
468 /* trace_flags holds trace_options default values */
469 #define TRACE_DEFAULT_FLAGS                                             \
470         (FUNCTION_DEFAULT_FLAGS |                                       \
471          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
472          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
473          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
474          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
475          TRACE_ITER_HASH_PTR)
476
477 /* trace_options that are only supported by global_trace */
478 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
479                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
480
481 /* trace_flags that are default zero for instances */
482 #define ZEROED_TRACE_FLAGS \
483         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
484
485 /*
486  * The global_trace is the descriptor that holds the top-level tracing
487  * buffers for the live tracing.
488  */
489 static struct trace_array global_trace = {
490         .trace_flags = TRACE_DEFAULT_FLAGS,
491 };
492
493 LIST_HEAD(ftrace_trace_arrays);
494
495 int trace_array_get(struct trace_array *this_tr)
496 {
497         struct trace_array *tr;
498         int ret = -ENODEV;
499
500         mutex_lock(&trace_types_lock);
501         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
502                 if (tr == this_tr) {
503                         tr->ref++;
504                         ret = 0;
505                         break;
506                 }
507         }
508         mutex_unlock(&trace_types_lock);
509
510         return ret;
511 }
512
513 static void __trace_array_put(struct trace_array *this_tr)
514 {
515         WARN_ON(!this_tr->ref);
516         this_tr->ref--;
517 }
518
519 /**
520  * trace_array_put - Decrement the reference counter for this trace array.
521  * @this_tr : pointer to the trace array
522  *
523  * NOTE: Use this when we no longer need the trace array returned by
524  * trace_array_get_by_name(). This ensures the trace array can be later
525  * destroyed.
526  *
527  */
528 void trace_array_put(struct trace_array *this_tr)
529 {
530         if (!this_tr)
531                 return;
532
533         mutex_lock(&trace_types_lock);
534         __trace_array_put(this_tr);
535         mutex_unlock(&trace_types_lock);
536 }
537 EXPORT_SYMBOL_GPL(trace_array_put);
538
539 int tracing_check_open_get_tr(struct trace_array *tr)
540 {
541         int ret;
542
543         ret = security_locked_down(LOCKDOWN_TRACEFS);
544         if (ret)
545                 return ret;
546
547         if (tracing_disabled)
548                 return -ENODEV;
549
550         if (tr && trace_array_get(tr) < 0)
551                 return -ENODEV;
552
553         return 0;
554 }
555
556 int call_filter_check_discard(struct trace_event_call *call, void *rec,
557                               struct trace_buffer *buffer,
558                               struct ring_buffer_event *event)
559 {
560         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
561             !filter_match_preds(call->filter, rec)) {
562                 __trace_event_discard_commit(buffer, event);
563                 return 1;
564         }
565
566         return 0;
567 }
568
569 /**
570  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
571  * @filtered_pids: The list of pids to check
572  * @search_pid: The PID to find in @filtered_pids
573  *
574  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
575  */
576 bool
577 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
578 {
579         return trace_pid_list_is_set(filtered_pids, search_pid);
580 }
581
582 /**
583  * trace_ignore_this_task - should a task be ignored for tracing
584  * @filtered_pids: The list of pids to check
585  * @filtered_no_pids: The list of pids not to be traced
586  * @task: The task that should be ignored if not filtered
587  *
588  * Checks if @task should be traced or not from @filtered_pids.
589  * Returns true if @task should *NOT* be traced.
590  * Returns false if @task should be traced.
591  */
592 bool
593 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
594                        struct trace_pid_list *filtered_no_pids,
595                        struct task_struct *task)
596 {
597         /*
598          * If filtered_no_pids is not empty, and the task's pid is listed
599          * in filtered_no_pids, then return true.
600          * Otherwise, if filtered_pids is empty, that means we can
601          * trace all tasks. If it has content, then only trace pids
602          * within filtered_pids.
603          */
604
605         return (filtered_pids &&
606                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
607                 (filtered_no_pids &&
608                  trace_find_filtered_pid(filtered_no_pids, task->pid));
609 }
610
611 /**
612  * trace_filter_add_remove_task - Add or remove a task from a pid_list
613  * @pid_list: The list to modify
614  * @self: The current task for fork or NULL for exit
615  * @task: The task to add or remove
616  *
617  * If adding a task, if @self is defined, the task is only added if @self
618  * is also included in @pid_list. This happens on fork and tasks should
619  * only be added when the parent is listed. If @self is NULL, then the
620  * @task pid will be removed from the list, which would happen on exit
621  * of a task.
622  */
623 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
624                                   struct task_struct *self,
625                                   struct task_struct *task)
626 {
627         if (!pid_list)
628                 return;
629
630         /* For forks, we only add if the forking task is listed */
631         if (self) {
632                 if (!trace_find_filtered_pid(pid_list, self->pid))
633                         return;
634         }
635
636         /* "self" is set for forks, and NULL for exits */
637         if (self)
638                 trace_pid_list_set(pid_list, task->pid);
639         else
640                 trace_pid_list_clear(pid_list, task->pid);
641 }
642
643 /**
644  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
645  * @pid_list: The pid list to show
646  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
647  * @pos: The position of the file
648  *
649  * This is used by the seq_file "next" operation to iterate the pids
650  * listed in a trace_pid_list structure.
651  *
652  * Returns the pid+1 as we want to display pid of zero, but NULL would
653  * stop the iteration.
654  */
655 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
656 {
657         long pid = (unsigned long)v;
658         unsigned int next;
659
660         (*pos)++;
661
662         /* pid already is +1 of the actual previous bit */
663         if (trace_pid_list_next(pid_list, pid, &next) < 0)
664                 return NULL;
665
666         pid = next;
667
668         /* Return pid + 1 to allow zero to be represented */
669         return (void *)(pid + 1);
670 }
671
672 /**
673  * trace_pid_start - Used for seq_file to start reading pid lists
674  * @pid_list: The pid list to show
675  * @pos: The position of the file
676  *
677  * This is used by seq_file "start" operation to start the iteration
678  * of listing pids.
679  *
680  * Returns the pid+1 as we want to display pid of zero, but NULL would
681  * stop the iteration.
682  */
683 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
684 {
685         unsigned long pid;
686         unsigned int first;
687         loff_t l = 0;
688
689         if (trace_pid_list_first(pid_list, &first) < 0)
690                 return NULL;
691
692         pid = first;
693
694         /* Return pid + 1 so that zero can be the exit value */
695         for (pid++; pid && l < *pos;
696              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
697                 ;
698         return (void *)pid;
699 }
700
701 /**
702  * trace_pid_show - show the current pid in seq_file processing
703  * @m: The seq_file structure to write into
704  * @v: A void pointer of the pid (+1) value to display
705  *
706  * Can be directly used by seq_file operations to display the current
707  * pid value.
708  */
709 int trace_pid_show(struct seq_file *m, void *v)
710 {
711         unsigned long pid = (unsigned long)v - 1;
712
713         seq_printf(m, "%lu\n", pid);
714         return 0;
715 }
716
717 /* 128 should be much more than enough */
718 #define PID_BUF_SIZE            127
719
720 int trace_pid_write(struct trace_pid_list *filtered_pids,
721                     struct trace_pid_list **new_pid_list,
722                     const char __user *ubuf, size_t cnt)
723 {
724         struct trace_pid_list *pid_list;
725         struct trace_parser parser;
726         unsigned long val;
727         int nr_pids = 0;
728         ssize_t read = 0;
729         ssize_t ret;
730         loff_t pos;
731         pid_t pid;
732
733         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
734                 return -ENOMEM;
735
736         /*
737          * Always recreate a new array. The write is an all or nothing
738          * operation. Always create a new array when adding new pids by
739          * the user. If the operation fails, then the current list is
740          * not modified.
741          */
742         pid_list = trace_pid_list_alloc();
743         if (!pid_list) {
744                 trace_parser_put(&parser);
745                 return -ENOMEM;
746         }
747
748         if (filtered_pids) {
749                 /* copy the current bits to the new max */
750                 ret = trace_pid_list_first(filtered_pids, &pid);
751                 while (!ret) {
752                         trace_pid_list_set(pid_list, pid);
753                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
754                         nr_pids++;
755                 }
756         }
757
758         ret = 0;
759         while (cnt > 0) {
760
761                 pos = 0;
762
763                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
764                 if (ret < 0)
765                         break;
766
767                 read += ret;
768                 ubuf += ret;
769                 cnt -= ret;
770
771                 if (!trace_parser_loaded(&parser))
772                         break;
773
774                 ret = -EINVAL;
775                 if (kstrtoul(parser.buffer, 0, &val))
776                         break;
777
778                 pid = (pid_t)val;
779
780                 if (trace_pid_list_set(pid_list, pid) < 0) {
781                         ret = -1;
782                         break;
783                 }
784                 nr_pids++;
785
786                 trace_parser_clear(&parser);
787                 ret = 0;
788         }
789         trace_parser_put(&parser);
790
791         if (ret < 0) {
792                 trace_pid_list_free(pid_list);
793                 return ret;
794         }
795
796         if (!nr_pids) {
797                 /* Cleared the list of pids */
798                 trace_pid_list_free(pid_list);
799                 pid_list = NULL;
800         }
801
802         *new_pid_list = pid_list;
803
804         return read;
805 }
806
807 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
808 {
809         u64 ts;
810
811         /* Early boot up does not have a buffer yet */
812         if (!buf->buffer)
813                 return trace_clock_local();
814
815         ts = ring_buffer_time_stamp(buf->buffer);
816         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
817
818         return ts;
819 }
820
821 u64 ftrace_now(int cpu)
822 {
823         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
824 }
825
826 /**
827  * tracing_is_enabled - Show if global_trace has been enabled
828  *
829  * Shows if the global trace has been enabled or not. It uses the
830  * mirror flag "buffer_disabled" to be used in fast paths such as for
831  * the irqsoff tracer. But it may be inaccurate due to races. If you
832  * need to know the accurate state, use tracing_is_on() which is a little
833  * slower, but accurate.
834  */
835 int tracing_is_enabled(void)
836 {
837         /*
838          * For quick access (irqsoff uses this in fast path), just
839          * return the mirror variable of the state of the ring buffer.
840          * It's a little racy, but we don't really care.
841          */
842         smp_rmb();
843         return !global_trace.buffer_disabled;
844 }
845
846 /*
847  * trace_buf_size is the size in bytes that is allocated
848  * for a buffer. Note, the number of bytes is always rounded
849  * to page size.
850  *
851  * This number is purposely set to a low number of 16384.
852  * If the dump on oops happens, it will be much appreciated
853  * to not have to wait for all that output. Anyway this can be
854  * boot time and run time configurable.
855  */
856 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
857
858 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
859
860 /* trace_types holds a link list of available tracers. */
861 static struct tracer            *trace_types __read_mostly;
862
863 /*
864  * trace_types_lock is used to protect the trace_types list.
865  */
866 DEFINE_MUTEX(trace_types_lock);
867
868 /*
869  * serialize the access of the ring buffer
870  *
871  * ring buffer serializes readers, but it is low level protection.
872  * The validity of the events (which returns by ring_buffer_peek() ..etc)
873  * are not protected by ring buffer.
874  *
875  * The content of events may become garbage if we allow other process consumes
876  * these events concurrently:
877  *   A) the page of the consumed events may become a normal page
878  *      (not reader page) in ring buffer, and this page will be rewritten
879  *      by events producer.
880  *   B) The page of the consumed events may become a page for splice_read,
881  *      and this page will be returned to system.
882  *
883  * These primitives allow multi process access to different cpu ring buffer
884  * concurrently.
885  *
886  * These primitives don't distinguish read-only and read-consume access.
887  * Multi read-only access are also serialized.
888  */
889
890 #ifdef CONFIG_SMP
891 static DECLARE_RWSEM(all_cpu_access_lock);
892 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
893
894 static inline void trace_access_lock(int cpu)
895 {
896         if (cpu == RING_BUFFER_ALL_CPUS) {
897                 /* gain it for accessing the whole ring buffer. */
898                 down_write(&all_cpu_access_lock);
899         } else {
900                 /* gain it for accessing a cpu ring buffer. */
901
902                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
903                 down_read(&all_cpu_access_lock);
904
905                 /* Secondly block other access to this @cpu ring buffer. */
906                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
907         }
908 }
909
910 static inline void trace_access_unlock(int cpu)
911 {
912         if (cpu == RING_BUFFER_ALL_CPUS) {
913                 up_write(&all_cpu_access_lock);
914         } else {
915                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
916                 up_read(&all_cpu_access_lock);
917         }
918 }
919
920 static inline void trace_access_lock_init(void)
921 {
922         int cpu;
923
924         for_each_possible_cpu(cpu)
925                 mutex_init(&per_cpu(cpu_access_lock, cpu));
926 }
927
928 #else
929
930 static DEFINE_MUTEX(access_lock);
931
932 static inline void trace_access_lock(int cpu)
933 {
934         (void)cpu;
935         mutex_lock(&access_lock);
936 }
937
938 static inline void trace_access_unlock(int cpu)
939 {
940         (void)cpu;
941         mutex_unlock(&access_lock);
942 }
943
944 static inline void trace_access_lock_init(void)
945 {
946 }
947
948 #endif
949
950 #ifdef CONFIG_STACKTRACE
951 static void __ftrace_trace_stack(struct trace_buffer *buffer,
952                                  unsigned int trace_ctx,
953                                  int skip, struct pt_regs *regs);
954 static inline void ftrace_trace_stack(struct trace_array *tr,
955                                       struct trace_buffer *buffer,
956                                       unsigned int trace_ctx,
957                                       int skip, struct pt_regs *regs);
958
959 #else
960 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
961                                         unsigned int trace_ctx,
962                                         int skip, struct pt_regs *regs)
963 {
964 }
965 static inline void ftrace_trace_stack(struct trace_array *tr,
966                                       struct trace_buffer *buffer,
967                                       unsigned long trace_ctx,
968                                       int skip, struct pt_regs *regs)
969 {
970 }
971
972 #endif
973
974 static __always_inline void
975 trace_event_setup(struct ring_buffer_event *event,
976                   int type, unsigned int trace_ctx)
977 {
978         struct trace_entry *ent = ring_buffer_event_data(event);
979
980         tracing_generic_entry_update(ent, type, trace_ctx);
981 }
982
983 static __always_inline struct ring_buffer_event *
984 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
985                           int type,
986                           unsigned long len,
987                           unsigned int trace_ctx)
988 {
989         struct ring_buffer_event *event;
990
991         event = ring_buffer_lock_reserve(buffer, len);
992         if (event != NULL)
993                 trace_event_setup(event, type, trace_ctx);
994
995         return event;
996 }
997
998 void tracer_tracing_on(struct trace_array *tr)
999 {
1000         if (tr->array_buffer.buffer)
1001                 ring_buffer_record_on(tr->array_buffer.buffer);
1002         /*
1003          * This flag is looked at when buffers haven't been allocated
1004          * yet, or by some tracers (like irqsoff), that just want to
1005          * know if the ring buffer has been disabled, but it can handle
1006          * races of where it gets disabled but we still do a record.
1007          * As the check is in the fast path of the tracers, it is more
1008          * important to be fast than accurate.
1009          */
1010         tr->buffer_disabled = 0;
1011         /* Make the flag seen by readers */
1012         smp_wmb();
1013 }
1014
1015 /**
1016  * tracing_on - enable tracing buffers
1017  *
1018  * This function enables tracing buffers that may have been
1019  * disabled with tracing_off.
1020  */
1021 void tracing_on(void)
1022 {
1023         tracer_tracing_on(&global_trace);
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_on);
1026
1027
1028 static __always_inline void
1029 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1030 {
1031         __this_cpu_write(trace_taskinfo_save, true);
1032
1033         /* If this is the temp buffer, we need to commit fully */
1034         if (this_cpu_read(trace_buffered_event) == event) {
1035                 /* Length is in event->array[0] */
1036                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1037                 /* Release the temp buffer */
1038                 this_cpu_dec(trace_buffered_event_cnt);
1039                 /* ring_buffer_unlock_commit() enables preemption */
1040                 preempt_enable_notrace();
1041         } else
1042                 ring_buffer_unlock_commit(buffer);
1043 }
1044
1045 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1046                        const char *str, int size)
1047 {
1048         struct ring_buffer_event *event;
1049         struct trace_buffer *buffer;
1050         struct print_entry *entry;
1051         unsigned int trace_ctx;
1052         int alloc;
1053
1054         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1055                 return 0;
1056
1057         if (unlikely(tracing_selftest_running && tr == &global_trace))
1058                 return 0;
1059
1060         if (unlikely(tracing_disabled))
1061                 return 0;
1062
1063         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1064
1065         trace_ctx = tracing_gen_ctx();
1066         buffer = tr->array_buffer.buffer;
1067         ring_buffer_nest_start(buffer);
1068         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1069                                             trace_ctx);
1070         if (!event) {
1071                 size = 0;
1072                 goto out;
1073         }
1074
1075         entry = ring_buffer_event_data(event);
1076         entry->ip = ip;
1077
1078         memcpy(&entry->buf, str, size);
1079
1080         /* Add a newline if necessary */
1081         if (entry->buf[size - 1] != '\n') {
1082                 entry->buf[size] = '\n';
1083                 entry->buf[size + 1] = '\0';
1084         } else
1085                 entry->buf[size] = '\0';
1086
1087         __buffer_unlock_commit(buffer, event);
1088         ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1089  out:
1090         ring_buffer_nest_end(buffer);
1091         return size;
1092 }
1093 EXPORT_SYMBOL_GPL(__trace_array_puts);
1094
1095 /**
1096  * __trace_puts - write a constant string into the trace buffer.
1097  * @ip:    The address of the caller
1098  * @str:   The constant string to write
1099  * @size:  The size of the string.
1100  */
1101 int __trace_puts(unsigned long ip, const char *str, int size)
1102 {
1103         return __trace_array_puts(&global_trace, ip, str, size);
1104 }
1105 EXPORT_SYMBOL_GPL(__trace_puts);
1106
1107 /**
1108  * __trace_bputs - write the pointer to a constant string into trace buffer
1109  * @ip:    The address of the caller
1110  * @str:   The constant string to write to the buffer to
1111  */
1112 int __trace_bputs(unsigned long ip, const char *str)
1113 {
1114         struct ring_buffer_event *event;
1115         struct trace_buffer *buffer;
1116         struct bputs_entry *entry;
1117         unsigned int trace_ctx;
1118         int size = sizeof(struct bputs_entry);
1119         int ret = 0;
1120
1121         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1122                 return 0;
1123
1124         if (unlikely(tracing_selftest_running || tracing_disabled))
1125                 return 0;
1126
1127         trace_ctx = tracing_gen_ctx();
1128         buffer = global_trace.array_buffer.buffer;
1129
1130         ring_buffer_nest_start(buffer);
1131         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1132                                             trace_ctx);
1133         if (!event)
1134                 goto out;
1135
1136         entry = ring_buffer_event_data(event);
1137         entry->ip                       = ip;
1138         entry->str                      = str;
1139
1140         __buffer_unlock_commit(buffer, event);
1141         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1142
1143         ret = 1;
1144  out:
1145         ring_buffer_nest_end(buffer);
1146         return ret;
1147 }
1148 EXPORT_SYMBOL_GPL(__trace_bputs);
1149
1150 #ifdef CONFIG_TRACER_SNAPSHOT
1151 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1152                                            void *cond_data)
1153 {
1154         struct tracer *tracer = tr->current_trace;
1155         unsigned long flags;
1156
1157         if (in_nmi()) {
1158                 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1159                 trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1160                 return;
1161         }
1162
1163         if (!tr->allocated_snapshot) {
1164                 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1165                 trace_array_puts(tr, "*** stopping trace here!   ***\n");
1166                 tracer_tracing_off(tr);
1167                 return;
1168         }
1169
1170         /* Note, snapshot can not be used when the tracer uses it */
1171         if (tracer->use_max_tr) {
1172                 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1173                 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1174                 return;
1175         }
1176
1177         local_irq_save(flags);
1178         update_max_tr(tr, current, smp_processor_id(), cond_data);
1179         local_irq_restore(flags);
1180 }
1181
1182 void tracing_snapshot_instance(struct trace_array *tr)
1183 {
1184         tracing_snapshot_instance_cond(tr, NULL);
1185 }
1186
1187 /**
1188  * tracing_snapshot - take a snapshot of the current buffer.
1189  *
1190  * This causes a swap between the snapshot buffer and the current live
1191  * tracing buffer. You can use this to take snapshots of the live
1192  * trace when some condition is triggered, but continue to trace.
1193  *
1194  * Note, make sure to allocate the snapshot with either
1195  * a tracing_snapshot_alloc(), or by doing it manually
1196  * with: echo 1 > /sys/kernel/tracing/snapshot
1197  *
1198  * If the snapshot buffer is not allocated, it will stop tracing.
1199  * Basically making a permanent snapshot.
1200  */
1201 void tracing_snapshot(void)
1202 {
1203         struct trace_array *tr = &global_trace;
1204
1205         tracing_snapshot_instance(tr);
1206 }
1207 EXPORT_SYMBOL_GPL(tracing_snapshot);
1208
1209 /**
1210  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1211  * @tr:         The tracing instance to snapshot
1212  * @cond_data:  The data to be tested conditionally, and possibly saved
1213  *
1214  * This is the same as tracing_snapshot() except that the snapshot is
1215  * conditional - the snapshot will only happen if the
1216  * cond_snapshot.update() implementation receiving the cond_data
1217  * returns true, which means that the trace array's cond_snapshot
1218  * update() operation used the cond_data to determine whether the
1219  * snapshot should be taken, and if it was, presumably saved it along
1220  * with the snapshot.
1221  */
1222 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1223 {
1224         tracing_snapshot_instance_cond(tr, cond_data);
1225 }
1226 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1227
1228 /**
1229  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1230  * @tr:         The tracing instance
1231  *
1232  * When the user enables a conditional snapshot using
1233  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1234  * with the snapshot.  This accessor is used to retrieve it.
1235  *
1236  * Should not be called from cond_snapshot.update(), since it takes
1237  * the tr->max_lock lock, which the code calling
1238  * cond_snapshot.update() has already done.
1239  *
1240  * Returns the cond_data associated with the trace array's snapshot.
1241  */
1242 void *tracing_cond_snapshot_data(struct trace_array *tr)
1243 {
1244         void *cond_data = NULL;
1245
1246         local_irq_disable();
1247         arch_spin_lock(&tr->max_lock);
1248
1249         if (tr->cond_snapshot)
1250                 cond_data = tr->cond_snapshot->cond_data;
1251
1252         arch_spin_unlock(&tr->max_lock);
1253         local_irq_enable();
1254
1255         return cond_data;
1256 }
1257 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1258
1259 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1260                                         struct array_buffer *size_buf, int cpu_id);
1261 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1262
1263 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1264 {
1265         int ret;
1266
1267         if (!tr->allocated_snapshot) {
1268
1269                 /* allocate spare buffer */
1270                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1271                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1272                 if (ret < 0)
1273                         return ret;
1274
1275                 tr->allocated_snapshot = true;
1276         }
1277
1278         return 0;
1279 }
1280
1281 static void free_snapshot(struct trace_array *tr)
1282 {
1283         /*
1284          * We don't free the ring buffer. instead, resize it because
1285          * The max_tr ring buffer has some state (e.g. ring->clock) and
1286          * we want preserve it.
1287          */
1288         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1289         set_buffer_entries(&tr->max_buffer, 1);
1290         tracing_reset_online_cpus(&tr->max_buffer);
1291         tr->allocated_snapshot = false;
1292 }
1293
1294 /**
1295  * tracing_alloc_snapshot - allocate snapshot buffer.
1296  *
1297  * This only allocates the snapshot buffer if it isn't already
1298  * allocated - it doesn't also take a snapshot.
1299  *
1300  * This is meant to be used in cases where the snapshot buffer needs
1301  * to be set up for events that can't sleep but need to be able to
1302  * trigger a snapshot.
1303  */
1304 int tracing_alloc_snapshot(void)
1305 {
1306         struct trace_array *tr = &global_trace;
1307         int ret;
1308
1309         ret = tracing_alloc_snapshot_instance(tr);
1310         WARN_ON(ret < 0);
1311
1312         return ret;
1313 }
1314 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1315
1316 /**
1317  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1318  *
1319  * This is similar to tracing_snapshot(), but it will allocate the
1320  * snapshot buffer if it isn't already allocated. Use this only
1321  * where it is safe to sleep, as the allocation may sleep.
1322  *
1323  * This causes a swap between the snapshot buffer and the current live
1324  * tracing buffer. You can use this to take snapshots of the live
1325  * trace when some condition is triggered, but continue to trace.
1326  */
1327 void tracing_snapshot_alloc(void)
1328 {
1329         int ret;
1330
1331         ret = tracing_alloc_snapshot();
1332         if (ret < 0)
1333                 return;
1334
1335         tracing_snapshot();
1336 }
1337 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1338
1339 /**
1340  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1341  * @tr:         The tracing instance
1342  * @cond_data:  User data to associate with the snapshot
1343  * @update:     Implementation of the cond_snapshot update function
1344  *
1345  * Check whether the conditional snapshot for the given instance has
1346  * already been enabled, or if the current tracer is already using a
1347  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1348  * save the cond_data and update function inside.
1349  *
1350  * Returns 0 if successful, error otherwise.
1351  */
1352 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1353                                  cond_update_fn_t update)
1354 {
1355         struct cond_snapshot *cond_snapshot;
1356         int ret = 0;
1357
1358         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1359         if (!cond_snapshot)
1360                 return -ENOMEM;
1361
1362         cond_snapshot->cond_data = cond_data;
1363         cond_snapshot->update = update;
1364
1365         mutex_lock(&trace_types_lock);
1366
1367         ret = tracing_alloc_snapshot_instance(tr);
1368         if (ret)
1369                 goto fail_unlock;
1370
1371         if (tr->current_trace->use_max_tr) {
1372                 ret = -EBUSY;
1373                 goto fail_unlock;
1374         }
1375
1376         /*
1377          * The cond_snapshot can only change to NULL without the
1378          * trace_types_lock. We don't care if we race with it going
1379          * to NULL, but we want to make sure that it's not set to
1380          * something other than NULL when we get here, which we can
1381          * do safely with only holding the trace_types_lock and not
1382          * having to take the max_lock.
1383          */
1384         if (tr->cond_snapshot) {
1385                 ret = -EBUSY;
1386                 goto fail_unlock;
1387         }
1388
1389         local_irq_disable();
1390         arch_spin_lock(&tr->max_lock);
1391         tr->cond_snapshot = cond_snapshot;
1392         arch_spin_unlock(&tr->max_lock);
1393         local_irq_enable();
1394
1395         mutex_unlock(&trace_types_lock);
1396
1397         return ret;
1398
1399  fail_unlock:
1400         mutex_unlock(&trace_types_lock);
1401         kfree(cond_snapshot);
1402         return ret;
1403 }
1404 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1405
1406 /**
1407  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1408  * @tr:         The tracing instance
1409  *
1410  * Check whether the conditional snapshot for the given instance is
1411  * enabled; if so, free the cond_snapshot associated with it,
1412  * otherwise return -EINVAL.
1413  *
1414  * Returns 0 if successful, error otherwise.
1415  */
1416 int tracing_snapshot_cond_disable(struct trace_array *tr)
1417 {
1418         int ret = 0;
1419
1420         local_irq_disable();
1421         arch_spin_lock(&tr->max_lock);
1422
1423         if (!tr->cond_snapshot)
1424                 ret = -EINVAL;
1425         else {
1426                 kfree(tr->cond_snapshot);
1427                 tr->cond_snapshot = NULL;
1428         }
1429
1430         arch_spin_unlock(&tr->max_lock);
1431         local_irq_enable();
1432
1433         return ret;
1434 }
1435 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1436 #else
1437 void tracing_snapshot(void)
1438 {
1439         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1440 }
1441 EXPORT_SYMBOL_GPL(tracing_snapshot);
1442 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1443 {
1444         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1445 }
1446 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1447 int tracing_alloc_snapshot(void)
1448 {
1449         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1450         return -ENODEV;
1451 }
1452 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1453 void tracing_snapshot_alloc(void)
1454 {
1455         /* Give warning */
1456         tracing_snapshot();
1457 }
1458 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1459 void *tracing_cond_snapshot_data(struct trace_array *tr)
1460 {
1461         return NULL;
1462 }
1463 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1464 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1465 {
1466         return -ENODEV;
1467 }
1468 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1469 int tracing_snapshot_cond_disable(struct trace_array *tr)
1470 {
1471         return false;
1472 }
1473 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1474 #define free_snapshot(tr)       do { } while (0)
1475 #endif /* CONFIG_TRACER_SNAPSHOT */
1476
1477 void tracer_tracing_off(struct trace_array *tr)
1478 {
1479         if (tr->array_buffer.buffer)
1480                 ring_buffer_record_off(tr->array_buffer.buffer);
1481         /*
1482          * This flag is looked at when buffers haven't been allocated
1483          * yet, or by some tracers (like irqsoff), that just want to
1484          * know if the ring buffer has been disabled, but it can handle
1485          * races of where it gets disabled but we still do a record.
1486          * As the check is in the fast path of the tracers, it is more
1487          * important to be fast than accurate.
1488          */
1489         tr->buffer_disabled = 1;
1490         /* Make the flag seen by readers */
1491         smp_wmb();
1492 }
1493
1494 /**
1495  * tracing_off - turn off tracing buffers
1496  *
1497  * This function stops the tracing buffers from recording data.
1498  * It does not disable any overhead the tracers themselves may
1499  * be causing. This function simply causes all recording to
1500  * the ring buffers to fail.
1501  */
1502 void tracing_off(void)
1503 {
1504         tracer_tracing_off(&global_trace);
1505 }
1506 EXPORT_SYMBOL_GPL(tracing_off);
1507
1508 void disable_trace_on_warning(void)
1509 {
1510         if (__disable_trace_on_warning) {
1511                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1512                         "Disabling tracing due to warning\n");
1513                 tracing_off();
1514         }
1515 }
1516
1517 /**
1518  * tracer_tracing_is_on - show real state of ring buffer enabled
1519  * @tr : the trace array to know if ring buffer is enabled
1520  *
1521  * Shows real state of the ring buffer if it is enabled or not.
1522  */
1523 bool tracer_tracing_is_on(struct trace_array *tr)
1524 {
1525         if (tr->array_buffer.buffer)
1526                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1527         return !tr->buffer_disabled;
1528 }
1529
1530 /**
1531  * tracing_is_on - show state of ring buffers enabled
1532  */
1533 int tracing_is_on(void)
1534 {
1535         return tracer_tracing_is_on(&global_trace);
1536 }
1537 EXPORT_SYMBOL_GPL(tracing_is_on);
1538
1539 static int __init set_buf_size(char *str)
1540 {
1541         unsigned long buf_size;
1542
1543         if (!str)
1544                 return 0;
1545         buf_size = memparse(str, &str);
1546         /*
1547          * nr_entries can not be zero and the startup
1548          * tests require some buffer space. Therefore
1549          * ensure we have at least 4096 bytes of buffer.
1550          */
1551         trace_buf_size = max(4096UL, buf_size);
1552         return 1;
1553 }
1554 __setup("trace_buf_size=", set_buf_size);
1555
1556 static int __init set_tracing_thresh(char *str)
1557 {
1558         unsigned long threshold;
1559         int ret;
1560
1561         if (!str)
1562                 return 0;
1563         ret = kstrtoul(str, 0, &threshold);
1564         if (ret < 0)
1565                 return 0;
1566         tracing_thresh = threshold * 1000;
1567         return 1;
1568 }
1569 __setup("tracing_thresh=", set_tracing_thresh);
1570
1571 unsigned long nsecs_to_usecs(unsigned long nsecs)
1572 {
1573         return nsecs / 1000;
1574 }
1575
1576 /*
1577  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1578  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1579  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1580  * of strings in the order that the evals (enum) were defined.
1581  */
1582 #undef C
1583 #define C(a, b) b
1584
1585 /* These must match the bit positions in trace_iterator_flags */
1586 static const char *trace_options[] = {
1587         TRACE_FLAGS
1588         NULL
1589 };
1590
1591 static struct {
1592         u64 (*func)(void);
1593         const char *name;
1594         int in_ns;              /* is this clock in nanoseconds? */
1595 } trace_clocks[] = {
1596         { trace_clock_local,            "local",        1 },
1597         { trace_clock_global,           "global",       1 },
1598         { trace_clock_counter,          "counter",      0 },
1599         { trace_clock_jiffies,          "uptime",       0 },
1600         { trace_clock,                  "perf",         1 },
1601         { ktime_get_mono_fast_ns,       "mono",         1 },
1602         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1603         { ktime_get_boot_fast_ns,       "boot",         1 },
1604         { ktime_get_tai_fast_ns,        "tai",          1 },
1605         ARCH_TRACE_CLOCKS
1606 };
1607
1608 bool trace_clock_in_ns(struct trace_array *tr)
1609 {
1610         if (trace_clocks[tr->clock_id].in_ns)
1611                 return true;
1612
1613         return false;
1614 }
1615
1616 /*
1617  * trace_parser_get_init - gets the buffer for trace parser
1618  */
1619 int trace_parser_get_init(struct trace_parser *parser, int size)
1620 {
1621         memset(parser, 0, sizeof(*parser));
1622
1623         parser->buffer = kmalloc(size, GFP_KERNEL);
1624         if (!parser->buffer)
1625                 return 1;
1626
1627         parser->size = size;
1628         return 0;
1629 }
1630
1631 /*
1632  * trace_parser_put - frees the buffer for trace parser
1633  */
1634 void trace_parser_put(struct trace_parser *parser)
1635 {
1636         kfree(parser->buffer);
1637         parser->buffer = NULL;
1638 }
1639
1640 /*
1641  * trace_get_user - reads the user input string separated by  space
1642  * (matched by isspace(ch))
1643  *
1644  * For each string found the 'struct trace_parser' is updated,
1645  * and the function returns.
1646  *
1647  * Returns number of bytes read.
1648  *
1649  * See kernel/trace/trace.h for 'struct trace_parser' details.
1650  */
1651 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1652         size_t cnt, loff_t *ppos)
1653 {
1654         char ch;
1655         size_t read = 0;
1656         ssize_t ret;
1657
1658         if (!*ppos)
1659                 trace_parser_clear(parser);
1660
1661         ret = get_user(ch, ubuf++);
1662         if (ret)
1663                 goto out;
1664
1665         read++;
1666         cnt--;
1667
1668         /*
1669          * The parser is not finished with the last write,
1670          * continue reading the user input without skipping spaces.
1671          */
1672         if (!parser->cont) {
1673                 /* skip white space */
1674                 while (cnt && isspace(ch)) {
1675                         ret = get_user(ch, ubuf++);
1676                         if (ret)
1677                                 goto out;
1678                         read++;
1679                         cnt--;
1680                 }
1681
1682                 parser->idx = 0;
1683
1684                 /* only spaces were written */
1685                 if (isspace(ch) || !ch) {
1686                         *ppos += read;
1687                         ret = read;
1688                         goto out;
1689                 }
1690         }
1691
1692         /* read the non-space input */
1693         while (cnt && !isspace(ch) && ch) {
1694                 if (parser->idx < parser->size - 1)
1695                         parser->buffer[parser->idx++] = ch;
1696                 else {
1697                         ret = -EINVAL;
1698                         goto out;
1699                 }
1700                 ret = get_user(ch, ubuf++);
1701                 if (ret)
1702                         goto out;
1703                 read++;
1704                 cnt--;
1705         }
1706
1707         /* We either got finished input or we have to wait for another call. */
1708         if (isspace(ch) || !ch) {
1709                 parser->buffer[parser->idx] = 0;
1710                 parser->cont = false;
1711         } else if (parser->idx < parser->size - 1) {
1712                 parser->cont = true;
1713                 parser->buffer[parser->idx++] = ch;
1714                 /* Make sure the parsed string always terminates with '\0'. */
1715                 parser->buffer[parser->idx] = 0;
1716         } else {
1717                 ret = -EINVAL;
1718                 goto out;
1719         }
1720
1721         *ppos += read;
1722         ret = read;
1723
1724 out:
1725         return ret;
1726 }
1727
1728 /* TODO add a seq_buf_to_buffer() */
1729 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1730 {
1731         int len;
1732
1733         if (trace_seq_used(s) <= s->seq.readpos)
1734                 return -EBUSY;
1735
1736         len = trace_seq_used(s) - s->seq.readpos;
1737         if (cnt > len)
1738                 cnt = len;
1739         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1740
1741         s->seq.readpos += cnt;
1742         return cnt;
1743 }
1744
1745 unsigned long __read_mostly     tracing_thresh;
1746
1747 #ifdef CONFIG_TRACER_MAX_TRACE
1748 static const struct file_operations tracing_max_lat_fops;
1749
1750 #ifdef LATENCY_FS_NOTIFY
1751
1752 static struct workqueue_struct *fsnotify_wq;
1753
1754 static void latency_fsnotify_workfn(struct work_struct *work)
1755 {
1756         struct trace_array *tr = container_of(work, struct trace_array,
1757                                               fsnotify_work);
1758         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1759 }
1760
1761 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1762 {
1763         struct trace_array *tr = container_of(iwork, struct trace_array,
1764                                               fsnotify_irqwork);
1765         queue_work(fsnotify_wq, &tr->fsnotify_work);
1766 }
1767
1768 static void trace_create_maxlat_file(struct trace_array *tr,
1769                                      struct dentry *d_tracer)
1770 {
1771         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1772         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1773         tr->d_max_latency = trace_create_file("tracing_max_latency",
1774                                               TRACE_MODE_WRITE,
1775                                               d_tracer, &tr->max_latency,
1776                                               &tracing_max_lat_fops);
1777 }
1778
1779 __init static int latency_fsnotify_init(void)
1780 {
1781         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1782                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1783         if (!fsnotify_wq) {
1784                 pr_err("Unable to allocate tr_max_lat_wq\n");
1785                 return -ENOMEM;
1786         }
1787         return 0;
1788 }
1789
1790 late_initcall_sync(latency_fsnotify_init);
1791
1792 void latency_fsnotify(struct trace_array *tr)
1793 {
1794         if (!fsnotify_wq)
1795                 return;
1796         /*
1797          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1798          * possible that we are called from __schedule() or do_idle(), which
1799          * could cause a deadlock.
1800          */
1801         irq_work_queue(&tr->fsnotify_irqwork);
1802 }
1803
1804 #else /* !LATENCY_FS_NOTIFY */
1805
1806 #define trace_create_maxlat_file(tr, d_tracer)                          \
1807         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1808                           d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1809
1810 #endif
1811
1812 /*
1813  * Copy the new maximum trace into the separate maximum-trace
1814  * structure. (this way the maximum trace is permanently saved,
1815  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1816  */
1817 static void
1818 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1819 {
1820         struct array_buffer *trace_buf = &tr->array_buffer;
1821         struct array_buffer *max_buf = &tr->max_buffer;
1822         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1823         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1824
1825         max_buf->cpu = cpu;
1826         max_buf->time_start = data->preempt_timestamp;
1827
1828         max_data->saved_latency = tr->max_latency;
1829         max_data->critical_start = data->critical_start;
1830         max_data->critical_end = data->critical_end;
1831
1832         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1833         max_data->pid = tsk->pid;
1834         /*
1835          * If tsk == current, then use current_uid(), as that does not use
1836          * RCU. The irq tracer can be called out of RCU scope.
1837          */
1838         if (tsk == current)
1839                 max_data->uid = current_uid();
1840         else
1841                 max_data->uid = task_uid(tsk);
1842
1843         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1844         max_data->policy = tsk->policy;
1845         max_data->rt_priority = tsk->rt_priority;
1846
1847         /* record this tasks comm */
1848         tracing_record_cmdline(tsk);
1849         latency_fsnotify(tr);
1850 }
1851
1852 /**
1853  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1854  * @tr: tracer
1855  * @tsk: the task with the latency
1856  * @cpu: The cpu that initiated the trace.
1857  * @cond_data: User data associated with a conditional snapshot
1858  *
1859  * Flip the buffers between the @tr and the max_tr and record information
1860  * about which task was the cause of this latency.
1861  */
1862 void
1863 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1864               void *cond_data)
1865 {
1866         if (tr->stop_count)
1867                 return;
1868
1869         WARN_ON_ONCE(!irqs_disabled());
1870
1871         if (!tr->allocated_snapshot) {
1872                 /* Only the nop tracer should hit this when disabling */
1873                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1874                 return;
1875         }
1876
1877         arch_spin_lock(&tr->max_lock);
1878
1879         /* Inherit the recordable setting from array_buffer */
1880         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1881                 ring_buffer_record_on(tr->max_buffer.buffer);
1882         else
1883                 ring_buffer_record_off(tr->max_buffer.buffer);
1884
1885 #ifdef CONFIG_TRACER_SNAPSHOT
1886         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1887                 arch_spin_unlock(&tr->max_lock);
1888                 return;
1889         }
1890 #endif
1891         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1892
1893         __update_max_tr(tr, tsk, cpu);
1894
1895         arch_spin_unlock(&tr->max_lock);
1896 }
1897
1898 /**
1899  * update_max_tr_single - only copy one trace over, and reset the rest
1900  * @tr: tracer
1901  * @tsk: task with the latency
1902  * @cpu: the cpu of the buffer to copy.
1903  *
1904  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1905  */
1906 void
1907 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1908 {
1909         int ret;
1910
1911         if (tr->stop_count)
1912                 return;
1913
1914         WARN_ON_ONCE(!irqs_disabled());
1915         if (!tr->allocated_snapshot) {
1916                 /* Only the nop tracer should hit this when disabling */
1917                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1918                 return;
1919         }
1920
1921         arch_spin_lock(&tr->max_lock);
1922
1923         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1924
1925         if (ret == -EBUSY) {
1926                 /*
1927                  * We failed to swap the buffer due to a commit taking
1928                  * place on this CPU. We fail to record, but we reset
1929                  * the max trace buffer (no one writes directly to it)
1930                  * and flag that it failed.
1931                  */
1932                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1933                         "Failed to swap buffers due to commit in progress\n");
1934         }
1935
1936         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1937
1938         __update_max_tr(tr, tsk, cpu);
1939         arch_spin_unlock(&tr->max_lock);
1940 }
1941
1942 #endif /* CONFIG_TRACER_MAX_TRACE */
1943
1944 static int wait_on_pipe(struct trace_iterator *iter, int full)
1945 {
1946         /* Iterators are static, they should be filled or empty */
1947         if (trace_buffer_iter(iter, iter->cpu_file))
1948                 return 0;
1949
1950         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1951                                 full);
1952 }
1953
1954 #ifdef CONFIG_FTRACE_STARTUP_TEST
1955 static bool selftests_can_run;
1956
1957 struct trace_selftests {
1958         struct list_head                list;
1959         struct tracer                   *type;
1960 };
1961
1962 static LIST_HEAD(postponed_selftests);
1963
1964 static int save_selftest(struct tracer *type)
1965 {
1966         struct trace_selftests *selftest;
1967
1968         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1969         if (!selftest)
1970                 return -ENOMEM;
1971
1972         selftest->type = type;
1973         list_add(&selftest->list, &postponed_selftests);
1974         return 0;
1975 }
1976
1977 static int run_tracer_selftest(struct tracer *type)
1978 {
1979         struct trace_array *tr = &global_trace;
1980         struct tracer *saved_tracer = tr->current_trace;
1981         int ret;
1982
1983         if (!type->selftest || tracing_selftest_disabled)
1984                 return 0;
1985
1986         /*
1987          * If a tracer registers early in boot up (before scheduling is
1988          * initialized and such), then do not run its selftests yet.
1989          * Instead, run it a little later in the boot process.
1990          */
1991         if (!selftests_can_run)
1992                 return save_selftest(type);
1993
1994         if (!tracing_is_on()) {
1995                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1996                         type->name);
1997                 return 0;
1998         }
1999
2000         /*
2001          * Run a selftest on this tracer.
2002          * Here we reset the trace buffer, and set the current
2003          * tracer to be this tracer. The tracer can then run some
2004          * internal tracing to verify that everything is in order.
2005          * If we fail, we do not register this tracer.
2006          */
2007         tracing_reset_online_cpus(&tr->array_buffer);
2008
2009         tr->current_trace = type;
2010
2011 #ifdef CONFIG_TRACER_MAX_TRACE
2012         if (type->use_max_tr) {
2013                 /* If we expanded the buffers, make sure the max is expanded too */
2014                 if (ring_buffer_expanded)
2015                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2016                                            RING_BUFFER_ALL_CPUS);
2017                 tr->allocated_snapshot = true;
2018         }
2019 #endif
2020
2021         /* the test is responsible for initializing and enabling */
2022         pr_info("Testing tracer %s: ", type->name);
2023         ret = type->selftest(type, tr);
2024         /* the test is responsible for resetting too */
2025         tr->current_trace = saved_tracer;
2026         if (ret) {
2027                 printk(KERN_CONT "FAILED!\n");
2028                 /* Add the warning after printing 'FAILED' */
2029                 WARN_ON(1);
2030                 return -1;
2031         }
2032         /* Only reset on passing, to avoid touching corrupted buffers */
2033         tracing_reset_online_cpus(&tr->array_buffer);
2034
2035 #ifdef CONFIG_TRACER_MAX_TRACE
2036         if (type->use_max_tr) {
2037                 tr->allocated_snapshot = false;
2038
2039                 /* Shrink the max buffer again */
2040                 if (ring_buffer_expanded)
2041                         ring_buffer_resize(tr->max_buffer.buffer, 1,
2042                                            RING_BUFFER_ALL_CPUS);
2043         }
2044 #endif
2045
2046         printk(KERN_CONT "PASSED\n");
2047         return 0;
2048 }
2049
2050 static int do_run_tracer_selftest(struct tracer *type)
2051 {
2052         int ret;
2053
2054         /*
2055          * Tests can take a long time, especially if they are run one after the
2056          * other, as does happen during bootup when all the tracers are
2057          * registered. This could cause the soft lockup watchdog to trigger.
2058          */
2059         cond_resched();
2060
2061         tracing_selftest_running = true;
2062         ret = run_tracer_selftest(type);
2063         tracing_selftest_running = false;
2064
2065         return ret;
2066 }
2067
2068 static __init int init_trace_selftests(void)
2069 {
2070         struct trace_selftests *p, *n;
2071         struct tracer *t, **last;
2072         int ret;
2073
2074         selftests_can_run = true;
2075
2076         mutex_lock(&trace_types_lock);
2077
2078         if (list_empty(&postponed_selftests))
2079                 goto out;
2080
2081         pr_info("Running postponed tracer tests:\n");
2082
2083         tracing_selftest_running = true;
2084         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2085                 /* This loop can take minutes when sanitizers are enabled, so
2086                  * lets make sure we allow RCU processing.
2087                  */
2088                 cond_resched();
2089                 ret = run_tracer_selftest(p->type);
2090                 /* If the test fails, then warn and remove from available_tracers */
2091                 if (ret < 0) {
2092                         WARN(1, "tracer: %s failed selftest, disabling\n",
2093                              p->type->name);
2094                         last = &trace_types;
2095                         for (t = trace_types; t; t = t->next) {
2096                                 if (t == p->type) {
2097                                         *last = t->next;
2098                                         break;
2099                                 }
2100                                 last = &t->next;
2101                         }
2102                 }
2103                 list_del(&p->list);
2104                 kfree(p);
2105         }
2106         tracing_selftest_running = false;
2107
2108  out:
2109         mutex_unlock(&trace_types_lock);
2110
2111         return 0;
2112 }
2113 core_initcall(init_trace_selftests);
2114 #else
2115 static inline int run_tracer_selftest(struct tracer *type)
2116 {
2117         return 0;
2118 }
2119 static inline int do_run_tracer_selftest(struct tracer *type)
2120 {
2121         return 0;
2122 }
2123 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2124
2125 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2126
2127 static void __init apply_trace_boot_options(void);
2128
2129 /**
2130  * register_tracer - register a tracer with the ftrace system.
2131  * @type: the plugin for the tracer
2132  *
2133  * Register a new plugin tracer.
2134  */
2135 int __init register_tracer(struct tracer *type)
2136 {
2137         struct tracer *t;
2138         int ret = 0;
2139
2140         if (!type->name) {
2141                 pr_info("Tracer must have a name\n");
2142                 return -1;
2143         }
2144
2145         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2146                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2147                 return -1;
2148         }
2149
2150         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2151                 pr_warn("Can not register tracer %s due to lockdown\n",
2152                            type->name);
2153                 return -EPERM;
2154         }
2155
2156         mutex_lock(&trace_types_lock);
2157
2158         for (t = trace_types; t; t = t->next) {
2159                 if (strcmp(type->name, t->name) == 0) {
2160                         /* already found */
2161                         pr_info("Tracer %s already registered\n",
2162                                 type->name);
2163                         ret = -1;
2164                         goto out;
2165                 }
2166         }
2167
2168         if (!type->set_flag)
2169                 type->set_flag = &dummy_set_flag;
2170         if (!type->flags) {
2171                 /*allocate a dummy tracer_flags*/
2172                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2173                 if (!type->flags) {
2174                         ret = -ENOMEM;
2175                         goto out;
2176                 }
2177                 type->flags->val = 0;
2178                 type->flags->opts = dummy_tracer_opt;
2179         } else
2180                 if (!type->flags->opts)
2181                         type->flags->opts = dummy_tracer_opt;
2182
2183         /* store the tracer for __set_tracer_option */
2184         type->flags->trace = type;
2185
2186         ret = do_run_tracer_selftest(type);
2187         if (ret < 0)
2188                 goto out;
2189
2190         type->next = trace_types;
2191         trace_types = type;
2192         add_tracer_options(&global_trace, type);
2193
2194  out:
2195         mutex_unlock(&trace_types_lock);
2196
2197         if (ret || !default_bootup_tracer)
2198                 goto out_unlock;
2199
2200         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2201                 goto out_unlock;
2202
2203         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2204         /* Do we want this tracer to start on bootup? */
2205         tracing_set_tracer(&global_trace, type->name);
2206         default_bootup_tracer = NULL;
2207
2208         apply_trace_boot_options();
2209
2210         /* disable other selftests, since this will break it. */
2211         disable_tracing_selftest("running a tracer");
2212
2213  out_unlock:
2214         return ret;
2215 }
2216
2217 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2218 {
2219         struct trace_buffer *buffer = buf->buffer;
2220
2221         if (!buffer)
2222                 return;
2223
2224         ring_buffer_record_disable(buffer);
2225
2226         /* Make sure all commits have finished */
2227         synchronize_rcu();
2228         ring_buffer_reset_cpu(buffer, cpu);
2229
2230         ring_buffer_record_enable(buffer);
2231 }
2232
2233 void tracing_reset_online_cpus(struct array_buffer *buf)
2234 {
2235         struct trace_buffer *buffer = buf->buffer;
2236
2237         if (!buffer)
2238                 return;
2239
2240         ring_buffer_record_disable(buffer);
2241
2242         /* Make sure all commits have finished */
2243         synchronize_rcu();
2244
2245         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2246
2247         ring_buffer_reset_online_cpus(buffer);
2248
2249         ring_buffer_record_enable(buffer);
2250 }
2251
2252 /* Must have trace_types_lock held */
2253 void tracing_reset_all_online_cpus_unlocked(void)
2254 {
2255         struct trace_array *tr;
2256
2257         lockdep_assert_held(&trace_types_lock);
2258
2259         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2260                 if (!tr->clear_trace)
2261                         continue;
2262                 tr->clear_trace = false;
2263                 tracing_reset_online_cpus(&tr->array_buffer);
2264 #ifdef CONFIG_TRACER_MAX_TRACE
2265                 tracing_reset_online_cpus(&tr->max_buffer);
2266 #endif
2267         }
2268 }
2269
2270 void tracing_reset_all_online_cpus(void)
2271 {
2272         mutex_lock(&trace_types_lock);
2273         tracing_reset_all_online_cpus_unlocked();
2274         mutex_unlock(&trace_types_lock);
2275 }
2276
2277 /*
2278  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2279  * is the tgid last observed corresponding to pid=i.
2280  */
2281 static int *tgid_map;
2282
2283 /* The maximum valid index into tgid_map. */
2284 static size_t tgid_map_max;
2285
2286 #define SAVED_CMDLINES_DEFAULT 128
2287 #define NO_CMDLINE_MAP UINT_MAX
2288 /*
2289  * Preemption must be disabled before acquiring trace_cmdline_lock.
2290  * The various trace_arrays' max_lock must be acquired in a context
2291  * where interrupt is disabled.
2292  */
2293 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2294 struct saved_cmdlines_buffer {
2295         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2296         unsigned *map_cmdline_to_pid;
2297         unsigned cmdline_num;
2298         int cmdline_idx;
2299         char *saved_cmdlines;
2300 };
2301 static struct saved_cmdlines_buffer *savedcmd;
2302
2303 static inline char *get_saved_cmdlines(int idx)
2304 {
2305         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2306 }
2307
2308 static inline void set_cmdline(int idx, const char *cmdline)
2309 {
2310         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2311 }
2312
2313 static int allocate_cmdlines_buffer(unsigned int val,
2314                                     struct saved_cmdlines_buffer *s)
2315 {
2316         s->map_cmdline_to_pid = kmalloc_array(val,
2317                                               sizeof(*s->map_cmdline_to_pid),
2318                                               GFP_KERNEL);
2319         if (!s->map_cmdline_to_pid)
2320                 return -ENOMEM;
2321
2322         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2323         if (!s->saved_cmdlines) {
2324                 kfree(s->map_cmdline_to_pid);
2325                 return -ENOMEM;
2326         }
2327
2328         s->cmdline_idx = 0;
2329         s->cmdline_num = val;
2330         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2331                sizeof(s->map_pid_to_cmdline));
2332         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2333                val * sizeof(*s->map_cmdline_to_pid));
2334
2335         return 0;
2336 }
2337
2338 static int trace_create_savedcmd(void)
2339 {
2340         int ret;
2341
2342         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2343         if (!savedcmd)
2344                 return -ENOMEM;
2345
2346         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2347         if (ret < 0) {
2348                 kfree(savedcmd);
2349                 savedcmd = NULL;
2350                 return -ENOMEM;
2351         }
2352
2353         return 0;
2354 }
2355
2356 int is_tracing_stopped(void)
2357 {
2358         return global_trace.stop_count;
2359 }
2360
2361 /**
2362  * tracing_start - quick start of the tracer
2363  *
2364  * If tracing is enabled but was stopped by tracing_stop,
2365  * this will start the tracer back up.
2366  */
2367 void tracing_start(void)
2368 {
2369         struct trace_buffer *buffer;
2370         unsigned long flags;
2371
2372         if (tracing_disabled)
2373                 return;
2374
2375         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2376         if (--global_trace.stop_count) {
2377                 if (global_trace.stop_count < 0) {
2378                         /* Someone screwed up their debugging */
2379                         WARN_ON_ONCE(1);
2380                         global_trace.stop_count = 0;
2381                 }
2382                 goto out;
2383         }
2384
2385         /* Prevent the buffers from switching */
2386         arch_spin_lock(&global_trace.max_lock);
2387
2388         buffer = global_trace.array_buffer.buffer;
2389         if (buffer)
2390                 ring_buffer_record_enable(buffer);
2391
2392 #ifdef CONFIG_TRACER_MAX_TRACE
2393         buffer = global_trace.max_buffer.buffer;
2394         if (buffer)
2395                 ring_buffer_record_enable(buffer);
2396 #endif
2397
2398         arch_spin_unlock(&global_trace.max_lock);
2399
2400  out:
2401         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2402 }
2403
2404 static void tracing_start_tr(struct trace_array *tr)
2405 {
2406         struct trace_buffer *buffer;
2407         unsigned long flags;
2408
2409         if (tracing_disabled)
2410                 return;
2411
2412         /* If global, we need to also start the max tracer */
2413         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2414                 return tracing_start();
2415
2416         raw_spin_lock_irqsave(&tr->start_lock, flags);
2417
2418         if (--tr->stop_count) {
2419                 if (tr->stop_count < 0) {
2420                         /* Someone screwed up their debugging */
2421                         WARN_ON_ONCE(1);
2422                         tr->stop_count = 0;
2423                 }
2424                 goto out;
2425         }
2426
2427         buffer = tr->array_buffer.buffer;
2428         if (buffer)
2429                 ring_buffer_record_enable(buffer);
2430
2431  out:
2432         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2433 }
2434
2435 /**
2436  * tracing_stop - quick stop of the tracer
2437  *
2438  * Light weight way to stop tracing. Use in conjunction with
2439  * tracing_start.
2440  */
2441 void tracing_stop(void)
2442 {
2443         struct trace_buffer *buffer;
2444         unsigned long flags;
2445
2446         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2447         if (global_trace.stop_count++)
2448                 goto out;
2449
2450         /* Prevent the buffers from switching */
2451         arch_spin_lock(&global_trace.max_lock);
2452
2453         buffer = global_trace.array_buffer.buffer;
2454         if (buffer)
2455                 ring_buffer_record_disable(buffer);
2456
2457 #ifdef CONFIG_TRACER_MAX_TRACE
2458         buffer = global_trace.max_buffer.buffer;
2459         if (buffer)
2460                 ring_buffer_record_disable(buffer);
2461 #endif
2462
2463         arch_spin_unlock(&global_trace.max_lock);
2464
2465  out:
2466         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2467 }
2468
2469 static void tracing_stop_tr(struct trace_array *tr)
2470 {
2471         struct trace_buffer *buffer;
2472         unsigned long flags;
2473
2474         /* If global, we need to also stop the max tracer */
2475         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2476                 return tracing_stop();
2477
2478         raw_spin_lock_irqsave(&tr->start_lock, flags);
2479         if (tr->stop_count++)
2480                 goto out;
2481
2482         buffer = tr->array_buffer.buffer;
2483         if (buffer)
2484                 ring_buffer_record_disable(buffer);
2485
2486  out:
2487         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2488 }
2489
2490 static int trace_save_cmdline(struct task_struct *tsk)
2491 {
2492         unsigned tpid, idx;
2493
2494         /* treat recording of idle task as a success */
2495         if (!tsk->pid)
2496                 return 1;
2497
2498         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2499
2500         /*
2501          * It's not the end of the world if we don't get
2502          * the lock, but we also don't want to spin
2503          * nor do we want to disable interrupts,
2504          * so if we miss here, then better luck next time.
2505          *
2506          * This is called within the scheduler and wake up, so interrupts
2507          * had better been disabled and run queue lock been held.
2508          */
2509         lockdep_assert_preemption_disabled();
2510         if (!arch_spin_trylock(&trace_cmdline_lock))
2511                 return 0;
2512
2513         idx = savedcmd->map_pid_to_cmdline[tpid];
2514         if (idx == NO_CMDLINE_MAP) {
2515                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2516
2517                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2518                 savedcmd->cmdline_idx = idx;
2519         }
2520
2521         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2522         set_cmdline(idx, tsk->comm);
2523
2524         arch_spin_unlock(&trace_cmdline_lock);
2525
2526         return 1;
2527 }
2528
2529 static void __trace_find_cmdline(int pid, char comm[])
2530 {
2531         unsigned map;
2532         int tpid;
2533
2534         if (!pid) {
2535                 strcpy(comm, "<idle>");
2536                 return;
2537         }
2538
2539         if (WARN_ON_ONCE(pid < 0)) {
2540                 strcpy(comm, "<XXX>");
2541                 return;
2542         }
2543
2544         tpid = pid & (PID_MAX_DEFAULT - 1);
2545         map = savedcmd->map_pid_to_cmdline[tpid];
2546         if (map != NO_CMDLINE_MAP) {
2547                 tpid = savedcmd->map_cmdline_to_pid[map];
2548                 if (tpid == pid) {
2549                         strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2550                         return;
2551                 }
2552         }
2553         strcpy(comm, "<...>");
2554 }
2555
2556 void trace_find_cmdline(int pid, char comm[])
2557 {
2558         preempt_disable();
2559         arch_spin_lock(&trace_cmdline_lock);
2560
2561         __trace_find_cmdline(pid, comm);
2562
2563         arch_spin_unlock(&trace_cmdline_lock);
2564         preempt_enable();
2565 }
2566
2567 static int *trace_find_tgid_ptr(int pid)
2568 {
2569         /*
2570          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2571          * if we observe a non-NULL tgid_map then we also observe the correct
2572          * tgid_map_max.
2573          */
2574         int *map = smp_load_acquire(&tgid_map);
2575
2576         if (unlikely(!map || pid > tgid_map_max))
2577                 return NULL;
2578
2579         return &map[pid];
2580 }
2581
2582 int trace_find_tgid(int pid)
2583 {
2584         int *ptr = trace_find_tgid_ptr(pid);
2585
2586         return ptr ? *ptr : 0;
2587 }
2588
2589 static int trace_save_tgid(struct task_struct *tsk)
2590 {
2591         int *ptr;
2592
2593         /* treat recording of idle task as a success */
2594         if (!tsk->pid)
2595                 return 1;
2596
2597         ptr = trace_find_tgid_ptr(tsk->pid);
2598         if (!ptr)
2599                 return 0;
2600
2601         *ptr = tsk->tgid;
2602         return 1;
2603 }
2604
2605 static bool tracing_record_taskinfo_skip(int flags)
2606 {
2607         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2608                 return true;
2609         if (!__this_cpu_read(trace_taskinfo_save))
2610                 return true;
2611         return false;
2612 }
2613
2614 /**
2615  * tracing_record_taskinfo - record the task info of a task
2616  *
2617  * @task:  task to record
2618  * @flags: TRACE_RECORD_CMDLINE for recording comm
2619  *         TRACE_RECORD_TGID for recording tgid
2620  */
2621 void tracing_record_taskinfo(struct task_struct *task, int flags)
2622 {
2623         bool done;
2624
2625         if (tracing_record_taskinfo_skip(flags))
2626                 return;
2627
2628         /*
2629          * Record as much task information as possible. If some fail, continue
2630          * to try to record the others.
2631          */
2632         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2633         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2634
2635         /* If recording any information failed, retry again soon. */
2636         if (!done)
2637                 return;
2638
2639         __this_cpu_write(trace_taskinfo_save, false);
2640 }
2641
2642 /**
2643  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2644  *
2645  * @prev: previous task during sched_switch
2646  * @next: next task during sched_switch
2647  * @flags: TRACE_RECORD_CMDLINE for recording comm
2648  *         TRACE_RECORD_TGID for recording tgid
2649  */
2650 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2651                                           struct task_struct *next, int flags)
2652 {
2653         bool done;
2654
2655         if (tracing_record_taskinfo_skip(flags))
2656                 return;
2657
2658         /*
2659          * Record as much task information as possible. If some fail, continue
2660          * to try to record the others.
2661          */
2662         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2663         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2664         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2665         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2666
2667         /* If recording any information failed, retry again soon. */
2668         if (!done)
2669                 return;
2670
2671         __this_cpu_write(trace_taskinfo_save, false);
2672 }
2673
2674 /* Helpers to record a specific task information */
2675 void tracing_record_cmdline(struct task_struct *task)
2676 {
2677         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2678 }
2679
2680 void tracing_record_tgid(struct task_struct *task)
2681 {
2682         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2683 }
2684
2685 /*
2686  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2687  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2688  * simplifies those functions and keeps them in sync.
2689  */
2690 enum print_line_t trace_handle_return(struct trace_seq *s)
2691 {
2692         return trace_seq_has_overflowed(s) ?
2693                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2694 }
2695 EXPORT_SYMBOL_GPL(trace_handle_return);
2696
2697 static unsigned short migration_disable_value(void)
2698 {
2699 #if defined(CONFIG_SMP)
2700         return current->migration_disabled;
2701 #else
2702         return 0;
2703 #endif
2704 }
2705
2706 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2707 {
2708         unsigned int trace_flags = irqs_status;
2709         unsigned int pc;
2710
2711         pc = preempt_count();
2712
2713         if (pc & NMI_MASK)
2714                 trace_flags |= TRACE_FLAG_NMI;
2715         if (pc & HARDIRQ_MASK)
2716                 trace_flags |= TRACE_FLAG_HARDIRQ;
2717         if (in_serving_softirq())
2718                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2719         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2720                 trace_flags |= TRACE_FLAG_BH_OFF;
2721
2722         if (tif_need_resched())
2723                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2724         if (test_preempt_need_resched())
2725                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2726         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2727                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2728 }
2729
2730 struct ring_buffer_event *
2731 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2732                           int type,
2733                           unsigned long len,
2734                           unsigned int trace_ctx)
2735 {
2736         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2737 }
2738
2739 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2740 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2741 static int trace_buffered_event_ref;
2742
2743 /**
2744  * trace_buffered_event_enable - enable buffering events
2745  *
2746  * When events are being filtered, it is quicker to use a temporary
2747  * buffer to write the event data into if there's a likely chance
2748  * that it will not be committed. The discard of the ring buffer
2749  * is not as fast as committing, and is much slower than copying
2750  * a commit.
2751  *
2752  * When an event is to be filtered, allocate per cpu buffers to
2753  * write the event data into, and if the event is filtered and discarded
2754  * it is simply dropped, otherwise, the entire data is to be committed
2755  * in one shot.
2756  */
2757 void trace_buffered_event_enable(void)
2758 {
2759         struct ring_buffer_event *event;
2760         struct page *page;
2761         int cpu;
2762
2763         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2764
2765         if (trace_buffered_event_ref++)
2766                 return;
2767
2768         for_each_tracing_cpu(cpu) {
2769                 page = alloc_pages_node(cpu_to_node(cpu),
2770                                         GFP_KERNEL | __GFP_NORETRY, 0);
2771                 if (!page)
2772                         goto failed;
2773
2774                 event = page_address(page);
2775                 memset(event, 0, sizeof(*event));
2776
2777                 per_cpu(trace_buffered_event, cpu) = event;
2778
2779                 preempt_disable();
2780                 if (cpu == smp_processor_id() &&
2781                     __this_cpu_read(trace_buffered_event) !=
2782                     per_cpu(trace_buffered_event, cpu))
2783                         WARN_ON_ONCE(1);
2784                 preempt_enable();
2785         }
2786
2787         return;
2788  failed:
2789         trace_buffered_event_disable();
2790 }
2791
2792 static void enable_trace_buffered_event(void *data)
2793 {
2794         /* Probably not needed, but do it anyway */
2795         smp_rmb();
2796         this_cpu_dec(trace_buffered_event_cnt);
2797 }
2798
2799 static void disable_trace_buffered_event(void *data)
2800 {
2801         this_cpu_inc(trace_buffered_event_cnt);
2802 }
2803
2804 /**
2805  * trace_buffered_event_disable - disable buffering events
2806  *
2807  * When a filter is removed, it is faster to not use the buffered
2808  * events, and to commit directly into the ring buffer. Free up
2809  * the temp buffers when there are no more users. This requires
2810  * special synchronization with current events.
2811  */
2812 void trace_buffered_event_disable(void)
2813 {
2814         int cpu;
2815
2816         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2817
2818         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2819                 return;
2820
2821         if (--trace_buffered_event_ref)
2822                 return;
2823
2824         preempt_disable();
2825         /* For each CPU, set the buffer as used. */
2826         smp_call_function_many(tracing_buffer_mask,
2827                                disable_trace_buffered_event, NULL, 1);
2828         preempt_enable();
2829
2830         /* Wait for all current users to finish */
2831         synchronize_rcu();
2832
2833         for_each_tracing_cpu(cpu) {
2834                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2835                 per_cpu(trace_buffered_event, cpu) = NULL;
2836         }
2837         /*
2838          * Make sure trace_buffered_event is NULL before clearing
2839          * trace_buffered_event_cnt.
2840          */
2841         smp_wmb();
2842
2843         preempt_disable();
2844         /* Do the work on each cpu */
2845         smp_call_function_many(tracing_buffer_mask,
2846                                enable_trace_buffered_event, NULL, 1);
2847         preempt_enable();
2848 }
2849
2850 static struct trace_buffer *temp_buffer;
2851
2852 struct ring_buffer_event *
2853 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2854                           struct trace_event_file *trace_file,
2855                           int type, unsigned long len,
2856                           unsigned int trace_ctx)
2857 {
2858         struct ring_buffer_event *entry;
2859         struct trace_array *tr = trace_file->tr;
2860         int val;
2861
2862         *current_rb = tr->array_buffer.buffer;
2863
2864         if (!tr->no_filter_buffering_ref &&
2865             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2866                 preempt_disable_notrace();
2867                 /*
2868                  * Filtering is on, so try to use the per cpu buffer first.
2869                  * This buffer will simulate a ring_buffer_event,
2870                  * where the type_len is zero and the array[0] will
2871                  * hold the full length.
2872                  * (see include/linux/ring-buffer.h for details on
2873                  *  how the ring_buffer_event is structured).
2874                  *
2875                  * Using a temp buffer during filtering and copying it
2876                  * on a matched filter is quicker than writing directly
2877                  * into the ring buffer and then discarding it when
2878                  * it doesn't match. That is because the discard
2879                  * requires several atomic operations to get right.
2880                  * Copying on match and doing nothing on a failed match
2881                  * is still quicker than no copy on match, but having
2882                  * to discard out of the ring buffer on a failed match.
2883                  */
2884                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2885                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2886
2887                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2888
2889                         /*
2890                          * Preemption is disabled, but interrupts and NMIs
2891                          * can still come in now. If that happens after
2892                          * the above increment, then it will have to go
2893                          * back to the old method of allocating the event
2894                          * on the ring buffer, and if the filter fails, it
2895                          * will have to call ring_buffer_discard_commit()
2896                          * to remove it.
2897                          *
2898                          * Need to also check the unlikely case that the
2899                          * length is bigger than the temp buffer size.
2900                          * If that happens, then the reserve is pretty much
2901                          * guaranteed to fail, as the ring buffer currently
2902                          * only allows events less than a page. But that may
2903                          * change in the future, so let the ring buffer reserve
2904                          * handle the failure in that case.
2905                          */
2906                         if (val == 1 && likely(len <= max_len)) {
2907                                 trace_event_setup(entry, type, trace_ctx);
2908                                 entry->array[0] = len;
2909                                 /* Return with preemption disabled */
2910                                 return entry;
2911                         }
2912                         this_cpu_dec(trace_buffered_event_cnt);
2913                 }
2914                 /* __trace_buffer_lock_reserve() disables preemption */
2915                 preempt_enable_notrace();
2916         }
2917
2918         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2919                                             trace_ctx);
2920         /*
2921          * If tracing is off, but we have triggers enabled
2922          * we still need to look at the event data. Use the temp_buffer
2923          * to store the trace event for the trigger to use. It's recursive
2924          * safe and will not be recorded anywhere.
2925          */
2926         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2927                 *current_rb = temp_buffer;
2928                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2929                                                     trace_ctx);
2930         }
2931         return entry;
2932 }
2933 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2934
2935 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2936 static DEFINE_MUTEX(tracepoint_printk_mutex);
2937
2938 static void output_printk(struct trace_event_buffer *fbuffer)
2939 {
2940         struct trace_event_call *event_call;
2941         struct trace_event_file *file;
2942         struct trace_event *event;
2943         unsigned long flags;
2944         struct trace_iterator *iter = tracepoint_print_iter;
2945
2946         /* We should never get here if iter is NULL */
2947         if (WARN_ON_ONCE(!iter))
2948                 return;
2949
2950         event_call = fbuffer->trace_file->event_call;
2951         if (!event_call || !event_call->event.funcs ||
2952             !event_call->event.funcs->trace)
2953                 return;
2954
2955         file = fbuffer->trace_file;
2956         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2957             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2958              !filter_match_preds(file->filter, fbuffer->entry)))
2959                 return;
2960
2961         event = &fbuffer->trace_file->event_call->event;
2962
2963         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2964         trace_seq_init(&iter->seq);
2965         iter->ent = fbuffer->entry;
2966         event_call->event.funcs->trace(iter, 0, event);
2967         trace_seq_putc(&iter->seq, 0);
2968         printk("%s", iter->seq.buffer);
2969
2970         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2971 }
2972
2973 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2974                              void *buffer, size_t *lenp,
2975                              loff_t *ppos)
2976 {
2977         int save_tracepoint_printk;
2978         int ret;
2979
2980         mutex_lock(&tracepoint_printk_mutex);
2981         save_tracepoint_printk = tracepoint_printk;
2982
2983         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2984
2985         /*
2986          * This will force exiting early, as tracepoint_printk
2987          * is always zero when tracepoint_printk_iter is not allocated
2988          */
2989         if (!tracepoint_print_iter)
2990                 tracepoint_printk = 0;
2991
2992         if (save_tracepoint_printk == tracepoint_printk)
2993                 goto out;
2994
2995         if (tracepoint_printk)
2996                 static_key_enable(&tracepoint_printk_key.key);
2997         else
2998                 static_key_disable(&tracepoint_printk_key.key);
2999
3000  out:
3001         mutex_unlock(&tracepoint_printk_mutex);
3002
3003         return ret;
3004 }
3005
3006 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
3007 {
3008         enum event_trigger_type tt = ETT_NONE;
3009         struct trace_event_file *file = fbuffer->trace_file;
3010
3011         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
3012                         fbuffer->entry, &tt))
3013                 goto discard;
3014
3015         if (static_key_false(&tracepoint_printk_key.key))
3016                 output_printk(fbuffer);
3017
3018         if (static_branch_unlikely(&trace_event_exports_enabled))
3019                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
3020
3021         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
3022                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
3023
3024 discard:
3025         if (tt)
3026                 event_triggers_post_call(file, tt);
3027
3028 }
3029 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3030
3031 /*
3032  * Skip 3:
3033  *
3034  *   trace_buffer_unlock_commit_regs()
3035  *   trace_event_buffer_commit()
3036  *   trace_event_raw_event_xxx()
3037  */
3038 # define STACK_SKIP 3
3039
3040 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3041                                      struct trace_buffer *buffer,
3042                                      struct ring_buffer_event *event,
3043                                      unsigned int trace_ctx,
3044                                      struct pt_regs *regs)
3045 {
3046         __buffer_unlock_commit(buffer, event);
3047
3048         /*
3049          * If regs is not set, then skip the necessary functions.
3050          * Note, we can still get here via blktrace, wakeup tracer
3051          * and mmiotrace, but that's ok if they lose a function or
3052          * two. They are not that meaningful.
3053          */
3054         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3055         ftrace_trace_userstack(tr, buffer, trace_ctx);
3056 }
3057
3058 /*
3059  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3060  */
3061 void
3062 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3063                                    struct ring_buffer_event *event)
3064 {
3065         __buffer_unlock_commit(buffer, event);
3066 }
3067
3068 void
3069 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3070                parent_ip, unsigned int trace_ctx)
3071 {
3072         struct trace_event_call *call = &event_function;
3073         struct trace_buffer *buffer = tr->array_buffer.buffer;
3074         struct ring_buffer_event *event;
3075         struct ftrace_entry *entry;
3076
3077         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3078                                             trace_ctx);
3079         if (!event)
3080                 return;
3081         entry   = ring_buffer_event_data(event);
3082         entry->ip                       = ip;
3083         entry->parent_ip                = parent_ip;
3084
3085         if (!call_filter_check_discard(call, entry, buffer, event)) {
3086                 if (static_branch_unlikely(&trace_function_exports_enabled))
3087                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3088                 __buffer_unlock_commit(buffer, event);
3089         }
3090 }
3091
3092 #ifdef CONFIG_STACKTRACE
3093
3094 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3095 #define FTRACE_KSTACK_NESTING   4
3096
3097 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3098
3099 struct ftrace_stack {
3100         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3101 };
3102
3103
3104 struct ftrace_stacks {
3105         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3106 };
3107
3108 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3109 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3110
3111 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3112                                  unsigned int trace_ctx,
3113                                  int skip, struct pt_regs *regs)
3114 {
3115         struct trace_event_call *call = &event_kernel_stack;
3116         struct ring_buffer_event *event;
3117         unsigned int size, nr_entries;
3118         struct ftrace_stack *fstack;
3119         struct stack_entry *entry;
3120         int stackidx;
3121         void *ptr;
3122
3123         /*
3124          * Add one, for this function and the call to save_stack_trace()
3125          * If regs is set, then these functions will not be in the way.
3126          */
3127 #ifndef CONFIG_UNWINDER_ORC
3128         if (!regs)
3129                 skip++;
3130 #endif
3131
3132         preempt_disable_notrace();
3133
3134         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3135
3136         /* This should never happen. If it does, yell once and skip */
3137         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3138                 goto out;
3139
3140         /*
3141          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3142          * interrupt will either see the value pre increment or post
3143          * increment. If the interrupt happens pre increment it will have
3144          * restored the counter when it returns.  We just need a barrier to
3145          * keep gcc from moving things around.
3146          */
3147         barrier();
3148
3149         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3150         size = ARRAY_SIZE(fstack->calls);
3151
3152         if (regs) {
3153                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3154                                                    size, skip);
3155         } else {
3156                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3157         }
3158
3159         size = nr_entries * sizeof(unsigned long);
3160         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3161                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3162                                     trace_ctx);
3163         if (!event)
3164                 goto out;
3165         ptr = ring_buffer_event_data(event);
3166         entry = ptr;
3167
3168         /*
3169          * For backward compatibility reasons, the entry->caller is an
3170          * array of 8 slots to store the stack. This is also exported
3171          * to user space. The amount allocated on the ring buffer actually
3172          * holds enough for the stack specified by nr_entries. This will
3173          * go into the location of entry->caller. Due to string fortifiers
3174          * checking the size of the destination of memcpy() it triggers
3175          * when it detects that size is greater than 8. To hide this from
3176          * the fortifiers, we use "ptr" and pointer arithmetic to assign caller.
3177          *
3178          * The below is really just:
3179          *   memcpy(&entry->caller, fstack->calls, size);
3180          */
3181         ptr += offsetof(typeof(*entry), caller);
3182         memcpy(ptr, fstack->calls, size);
3183
3184         entry->size = nr_entries;
3185
3186         if (!call_filter_check_discard(call, entry, buffer, event))
3187                 __buffer_unlock_commit(buffer, event);
3188
3189  out:
3190         /* Again, don't let gcc optimize things here */
3191         barrier();
3192         __this_cpu_dec(ftrace_stack_reserve);
3193         preempt_enable_notrace();
3194
3195 }
3196
3197 static inline void ftrace_trace_stack(struct trace_array *tr,
3198                                       struct trace_buffer *buffer,
3199                                       unsigned int trace_ctx,
3200                                       int skip, struct pt_regs *regs)
3201 {
3202         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3203                 return;
3204
3205         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3206 }
3207
3208 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3209                    int skip)
3210 {
3211         struct trace_buffer *buffer = tr->array_buffer.buffer;
3212
3213         if (rcu_is_watching()) {
3214                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3215                 return;
3216         }
3217
3218         if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3219                 return;
3220
3221         /*
3222          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3223          * but if the above rcu_is_watching() failed, then the NMI
3224          * triggered someplace critical, and ct_irq_enter() should
3225          * not be called from NMI.
3226          */
3227         if (unlikely(in_nmi()))
3228                 return;
3229
3230         ct_irq_enter_irqson();
3231         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3232         ct_irq_exit_irqson();
3233 }
3234
3235 /**
3236  * trace_dump_stack - record a stack back trace in the trace buffer
3237  * @skip: Number of functions to skip (helper handlers)
3238  */
3239 void trace_dump_stack(int skip)
3240 {
3241         if (tracing_disabled || tracing_selftest_running)
3242                 return;
3243
3244 #ifndef CONFIG_UNWINDER_ORC
3245         /* Skip 1 to skip this function. */
3246         skip++;
3247 #endif
3248         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3249                              tracing_gen_ctx(), skip, NULL);
3250 }
3251 EXPORT_SYMBOL_GPL(trace_dump_stack);
3252
3253 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3254 static DEFINE_PER_CPU(int, user_stack_count);
3255
3256 static void
3257 ftrace_trace_userstack(struct trace_array *tr,
3258                        struct trace_buffer *buffer, unsigned int trace_ctx)
3259 {
3260         struct trace_event_call *call = &event_user_stack;
3261         struct ring_buffer_event *event;
3262         struct userstack_entry *entry;
3263
3264         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3265                 return;
3266
3267         /*
3268          * NMIs can not handle page faults, even with fix ups.
3269          * The save user stack can (and often does) fault.
3270          */
3271         if (unlikely(in_nmi()))
3272                 return;
3273
3274         /*
3275          * prevent recursion, since the user stack tracing may
3276          * trigger other kernel events.
3277          */
3278         preempt_disable();
3279         if (__this_cpu_read(user_stack_count))
3280                 goto out;
3281
3282         __this_cpu_inc(user_stack_count);
3283
3284         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3285                                             sizeof(*entry), trace_ctx);
3286         if (!event)
3287                 goto out_drop_count;
3288         entry   = ring_buffer_event_data(event);
3289
3290         entry->tgid             = current->tgid;
3291         memset(&entry->caller, 0, sizeof(entry->caller));
3292
3293         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3294         if (!call_filter_check_discard(call, entry, buffer, event))
3295                 __buffer_unlock_commit(buffer, event);
3296
3297  out_drop_count:
3298         __this_cpu_dec(user_stack_count);
3299  out:
3300         preempt_enable();
3301 }
3302 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3303 static void ftrace_trace_userstack(struct trace_array *tr,
3304                                    struct trace_buffer *buffer,
3305                                    unsigned int trace_ctx)
3306 {
3307 }
3308 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3309
3310 #endif /* CONFIG_STACKTRACE */
3311
3312 static inline void
3313 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3314                           unsigned long long delta)
3315 {
3316         entry->bottom_delta_ts = delta & U32_MAX;
3317         entry->top_delta_ts = (delta >> 32);
3318 }
3319
3320 void trace_last_func_repeats(struct trace_array *tr,
3321                              struct trace_func_repeats *last_info,
3322                              unsigned int trace_ctx)
3323 {
3324         struct trace_buffer *buffer = tr->array_buffer.buffer;
3325         struct func_repeats_entry *entry;
3326         struct ring_buffer_event *event;
3327         u64 delta;
3328
3329         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3330                                             sizeof(*entry), trace_ctx);
3331         if (!event)
3332                 return;
3333
3334         delta = ring_buffer_event_time_stamp(buffer, event) -
3335                 last_info->ts_last_call;
3336
3337         entry = ring_buffer_event_data(event);
3338         entry->ip = last_info->ip;
3339         entry->parent_ip = last_info->parent_ip;
3340         entry->count = last_info->count;
3341         func_repeats_set_delta_ts(entry, delta);
3342
3343         __buffer_unlock_commit(buffer, event);
3344 }
3345
3346 /* created for use with alloc_percpu */
3347 struct trace_buffer_struct {
3348         int nesting;
3349         char buffer[4][TRACE_BUF_SIZE];
3350 };
3351
3352 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3353
3354 /*
3355  * This allows for lockless recording.  If we're nested too deeply, then
3356  * this returns NULL.
3357  */
3358 static char *get_trace_buf(void)
3359 {
3360         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3361
3362         if (!trace_percpu_buffer || buffer->nesting >= 4)
3363                 return NULL;
3364
3365         buffer->nesting++;
3366
3367         /* Interrupts must see nesting incremented before we use the buffer */
3368         barrier();
3369         return &buffer->buffer[buffer->nesting - 1][0];
3370 }
3371
3372 static void put_trace_buf(void)
3373 {
3374         /* Don't let the decrement of nesting leak before this */
3375         barrier();
3376         this_cpu_dec(trace_percpu_buffer->nesting);
3377 }
3378
3379 static int alloc_percpu_trace_buffer(void)
3380 {
3381         struct trace_buffer_struct __percpu *buffers;
3382
3383         if (trace_percpu_buffer)
3384                 return 0;
3385
3386         buffers = alloc_percpu(struct trace_buffer_struct);
3387         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3388                 return -ENOMEM;
3389
3390         trace_percpu_buffer = buffers;
3391         return 0;
3392 }
3393
3394 static int buffers_allocated;
3395
3396 void trace_printk_init_buffers(void)
3397 {
3398         if (buffers_allocated)
3399                 return;
3400
3401         if (alloc_percpu_trace_buffer())
3402                 return;
3403
3404         /* trace_printk() is for debug use only. Don't use it in production. */
3405
3406         pr_warn("\n");
3407         pr_warn("**********************************************************\n");
3408         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3409         pr_warn("**                                                      **\n");
3410         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3411         pr_warn("**                                                      **\n");
3412         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3413         pr_warn("** unsafe for production use.                           **\n");
3414         pr_warn("**                                                      **\n");
3415         pr_warn("** If you see this message and you are not debugging    **\n");
3416         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3417         pr_warn("**                                                      **\n");
3418         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3419         pr_warn("**********************************************************\n");
3420
3421         /* Expand the buffers to set size */
3422         tracing_update_buffers();
3423
3424         buffers_allocated = 1;
3425
3426         /*
3427          * trace_printk_init_buffers() can be called by modules.
3428          * If that happens, then we need to start cmdline recording
3429          * directly here. If the global_trace.buffer is already
3430          * allocated here, then this was called by module code.
3431          */
3432         if (global_trace.array_buffer.buffer)
3433                 tracing_start_cmdline_record();
3434 }
3435 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3436
3437 void trace_printk_start_comm(void)
3438 {
3439         /* Start tracing comms if trace printk is set */
3440         if (!buffers_allocated)
3441                 return;
3442         tracing_start_cmdline_record();
3443 }
3444
3445 static void trace_printk_start_stop_comm(int enabled)
3446 {
3447         if (!buffers_allocated)
3448                 return;
3449
3450         if (enabled)
3451                 tracing_start_cmdline_record();
3452         else
3453                 tracing_stop_cmdline_record();
3454 }
3455
3456 /**
3457  * trace_vbprintk - write binary msg to tracing buffer
3458  * @ip:    The address of the caller
3459  * @fmt:   The string format to write to the buffer
3460  * @args:  Arguments for @fmt
3461  */
3462 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3463 {
3464         struct trace_event_call *call = &event_bprint;
3465         struct ring_buffer_event *event;
3466         struct trace_buffer *buffer;
3467         struct trace_array *tr = &global_trace;
3468         struct bprint_entry *entry;
3469         unsigned int trace_ctx;
3470         char *tbuffer;
3471         int len = 0, size;
3472
3473         if (unlikely(tracing_selftest_running || tracing_disabled))
3474                 return 0;
3475
3476         /* Don't pollute graph traces with trace_vprintk internals */
3477         pause_graph_tracing();
3478
3479         trace_ctx = tracing_gen_ctx();
3480         preempt_disable_notrace();
3481
3482         tbuffer = get_trace_buf();
3483         if (!tbuffer) {
3484                 len = 0;
3485                 goto out_nobuffer;
3486         }
3487
3488         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3489
3490         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3491                 goto out_put;
3492
3493         size = sizeof(*entry) + sizeof(u32) * len;
3494         buffer = tr->array_buffer.buffer;
3495         ring_buffer_nest_start(buffer);
3496         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3497                                             trace_ctx);
3498         if (!event)
3499                 goto out;
3500         entry = ring_buffer_event_data(event);
3501         entry->ip                       = ip;
3502         entry->fmt                      = fmt;
3503
3504         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3505         if (!call_filter_check_discard(call, entry, buffer, event)) {
3506                 __buffer_unlock_commit(buffer, event);
3507                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3508         }
3509
3510 out:
3511         ring_buffer_nest_end(buffer);
3512 out_put:
3513         put_trace_buf();
3514
3515 out_nobuffer:
3516         preempt_enable_notrace();
3517         unpause_graph_tracing();
3518
3519         return len;
3520 }
3521 EXPORT_SYMBOL_GPL(trace_vbprintk);
3522
3523 __printf(3, 0)
3524 static int
3525 __trace_array_vprintk(struct trace_buffer *buffer,
3526                       unsigned long ip, const char *fmt, va_list args)
3527 {
3528         struct trace_event_call *call = &event_print;
3529         struct ring_buffer_event *event;
3530         int len = 0, size;
3531         struct print_entry *entry;
3532         unsigned int trace_ctx;
3533         char *tbuffer;
3534
3535         if (tracing_disabled)
3536                 return 0;
3537
3538         /* Don't pollute graph traces with trace_vprintk internals */
3539         pause_graph_tracing();
3540
3541         trace_ctx = tracing_gen_ctx();
3542         preempt_disable_notrace();
3543
3544
3545         tbuffer = get_trace_buf();
3546         if (!tbuffer) {
3547                 len = 0;
3548                 goto out_nobuffer;
3549         }
3550
3551         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3552
3553         size = sizeof(*entry) + len + 1;
3554         ring_buffer_nest_start(buffer);
3555         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3556                                             trace_ctx);
3557         if (!event)
3558                 goto out;
3559         entry = ring_buffer_event_data(event);
3560         entry->ip = ip;
3561
3562         memcpy(&entry->buf, tbuffer, len + 1);
3563         if (!call_filter_check_discard(call, entry, buffer, event)) {
3564                 __buffer_unlock_commit(buffer, event);
3565                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3566         }
3567
3568 out:
3569         ring_buffer_nest_end(buffer);
3570         put_trace_buf();
3571
3572 out_nobuffer:
3573         preempt_enable_notrace();
3574         unpause_graph_tracing();
3575
3576         return len;
3577 }
3578
3579 __printf(3, 0)
3580 int trace_array_vprintk(struct trace_array *tr,
3581                         unsigned long ip, const char *fmt, va_list args)
3582 {
3583         if (tracing_selftest_running && tr == &global_trace)
3584                 return 0;
3585
3586         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3587 }
3588
3589 /**
3590  * trace_array_printk - Print a message to a specific instance
3591  * @tr: The instance trace_array descriptor
3592  * @ip: The instruction pointer that this is called from.
3593  * @fmt: The format to print (printf format)
3594  *
3595  * If a subsystem sets up its own instance, they have the right to
3596  * printk strings into their tracing instance buffer using this
3597  * function. Note, this function will not write into the top level
3598  * buffer (use trace_printk() for that), as writing into the top level
3599  * buffer should only have events that can be individually disabled.
3600  * trace_printk() is only used for debugging a kernel, and should not
3601  * be ever incorporated in normal use.
3602  *
3603  * trace_array_printk() can be used, as it will not add noise to the
3604  * top level tracing buffer.
3605  *
3606  * Note, trace_array_init_printk() must be called on @tr before this
3607  * can be used.
3608  */
3609 __printf(3, 0)
3610 int trace_array_printk(struct trace_array *tr,
3611                        unsigned long ip, const char *fmt, ...)
3612 {
3613         int ret;
3614         va_list ap;
3615
3616         if (!tr)
3617                 return -ENOENT;
3618
3619         /* This is only allowed for created instances */
3620         if (tr == &global_trace)
3621                 return 0;
3622
3623         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3624                 return 0;
3625
3626         va_start(ap, fmt);
3627         ret = trace_array_vprintk(tr, ip, fmt, ap);
3628         va_end(ap);
3629         return ret;
3630 }
3631 EXPORT_SYMBOL_GPL(trace_array_printk);
3632
3633 /**
3634  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3635  * @tr: The trace array to initialize the buffers for
3636  *
3637  * As trace_array_printk() only writes into instances, they are OK to
3638  * have in the kernel (unlike trace_printk()). This needs to be called
3639  * before trace_array_printk() can be used on a trace_array.
3640  */
3641 int trace_array_init_printk(struct trace_array *tr)
3642 {
3643         if (!tr)
3644                 return -ENOENT;
3645
3646         /* This is only allowed for created instances */
3647         if (tr == &global_trace)
3648                 return -EINVAL;
3649
3650         return alloc_percpu_trace_buffer();
3651 }
3652 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3653
3654 __printf(3, 4)
3655 int trace_array_printk_buf(struct trace_buffer *buffer,
3656                            unsigned long ip, const char *fmt, ...)
3657 {
3658         int ret;
3659         va_list ap;
3660
3661         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3662                 return 0;
3663
3664         va_start(ap, fmt);
3665         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3666         va_end(ap);
3667         return ret;
3668 }
3669
3670 __printf(2, 0)
3671 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3672 {
3673         return trace_array_vprintk(&global_trace, ip, fmt, args);
3674 }
3675 EXPORT_SYMBOL_GPL(trace_vprintk);
3676
3677 static void trace_iterator_increment(struct trace_iterator *iter)
3678 {
3679         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3680
3681         iter->idx++;
3682         if (buf_iter)
3683                 ring_buffer_iter_advance(buf_iter);
3684 }
3685
3686 static struct trace_entry *
3687 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3688                 unsigned long *lost_events)
3689 {
3690         struct ring_buffer_event *event;
3691         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3692
3693         if (buf_iter) {
3694                 event = ring_buffer_iter_peek(buf_iter, ts);
3695                 if (lost_events)
3696                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3697                                 (unsigned long)-1 : 0;
3698         } else {
3699                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3700                                          lost_events);
3701         }
3702
3703         if (event) {
3704                 iter->ent_size = ring_buffer_event_length(event);
3705                 return ring_buffer_event_data(event);
3706         }
3707         iter->ent_size = 0;
3708         return NULL;
3709 }
3710
3711 static struct trace_entry *
3712 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3713                   unsigned long *missing_events, u64 *ent_ts)
3714 {
3715         struct trace_buffer *buffer = iter->array_buffer->buffer;
3716         struct trace_entry *ent, *next = NULL;
3717         unsigned long lost_events = 0, next_lost = 0;
3718         int cpu_file = iter->cpu_file;
3719         u64 next_ts = 0, ts;
3720         int next_cpu = -1;
3721         int next_size = 0;
3722         int cpu;
3723
3724         /*
3725          * If we are in a per_cpu trace file, don't bother by iterating over
3726          * all cpu and peek directly.
3727          */
3728         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3729                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3730                         return NULL;
3731                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3732                 if (ent_cpu)
3733                         *ent_cpu = cpu_file;
3734
3735                 return ent;
3736         }
3737
3738         for_each_tracing_cpu(cpu) {
3739
3740                 if (ring_buffer_empty_cpu(buffer, cpu))
3741                         continue;
3742
3743                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3744
3745                 /*
3746                  * Pick the entry with the smallest timestamp:
3747                  */
3748                 if (ent && (!next || ts < next_ts)) {
3749                         next = ent;
3750                         next_cpu = cpu;
3751                         next_ts = ts;
3752                         next_lost = lost_events;
3753                         next_size = iter->ent_size;
3754                 }
3755         }
3756
3757         iter->ent_size = next_size;
3758
3759         if (ent_cpu)
3760                 *ent_cpu = next_cpu;
3761
3762         if (ent_ts)
3763                 *ent_ts = next_ts;
3764
3765         if (missing_events)
3766                 *missing_events = next_lost;
3767
3768         return next;
3769 }
3770
3771 #define STATIC_FMT_BUF_SIZE     128
3772 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3773
3774 char *trace_iter_expand_format(struct trace_iterator *iter)
3775 {
3776         char *tmp;
3777
3778         /*
3779          * iter->tr is NULL when used with tp_printk, which makes
3780          * this get called where it is not safe to call krealloc().
3781          */
3782         if (!iter->tr || iter->fmt == static_fmt_buf)
3783                 return NULL;
3784
3785         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3786                        GFP_KERNEL);
3787         if (tmp) {
3788                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3789                 iter->fmt = tmp;
3790         }
3791
3792         return tmp;
3793 }
3794
3795 /* Returns true if the string is safe to dereference from an event */
3796 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3797                            bool star, int len)
3798 {
3799         unsigned long addr = (unsigned long)str;
3800         struct trace_event *trace_event;
3801         struct trace_event_call *event;
3802
3803         /* Ignore strings with no length */
3804         if (star && !len)
3805                 return true;
3806
3807         /* OK if part of the event data */
3808         if ((addr >= (unsigned long)iter->ent) &&
3809             (addr < (unsigned long)iter->ent + iter->ent_size))
3810                 return true;
3811
3812         /* OK if part of the temp seq buffer */
3813         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3814             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3815                 return true;
3816
3817         /* Core rodata can not be freed */
3818         if (is_kernel_rodata(addr))
3819                 return true;
3820
3821         if (trace_is_tracepoint_string(str))
3822                 return true;
3823
3824         /*
3825          * Now this could be a module event, referencing core module
3826          * data, which is OK.
3827          */
3828         if (!iter->ent)
3829                 return false;
3830
3831         trace_event = ftrace_find_event(iter->ent->type);
3832         if (!trace_event)
3833                 return false;
3834
3835         event = container_of(trace_event, struct trace_event_call, event);
3836         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3837                 return false;
3838
3839         /* Would rather have rodata, but this will suffice */
3840         if (within_module_core(addr, event->module))
3841                 return true;
3842
3843         return false;
3844 }
3845
3846 static const char *show_buffer(struct trace_seq *s)
3847 {
3848         struct seq_buf *seq = &s->seq;
3849
3850         seq_buf_terminate(seq);
3851
3852         return seq->buffer;
3853 }
3854
3855 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3856
3857 static int test_can_verify_check(const char *fmt, ...)
3858 {
3859         char buf[16];
3860         va_list ap;
3861         int ret;
3862
3863         /*
3864          * The verifier is dependent on vsnprintf() modifies the va_list
3865          * passed to it, where it is sent as a reference. Some architectures
3866          * (like x86_32) passes it by value, which means that vsnprintf()
3867          * does not modify the va_list passed to it, and the verifier
3868          * would then need to be able to understand all the values that
3869          * vsnprintf can use. If it is passed by value, then the verifier
3870          * is disabled.
3871          */
3872         va_start(ap, fmt);
3873         vsnprintf(buf, 16, "%d", ap);
3874         ret = va_arg(ap, int);
3875         va_end(ap);
3876
3877         return ret;
3878 }
3879
3880 static void test_can_verify(void)
3881 {
3882         if (!test_can_verify_check("%d %d", 0, 1)) {
3883                 pr_info("trace event string verifier disabled\n");
3884                 static_branch_inc(&trace_no_verify);
3885         }
3886 }
3887
3888 /**
3889  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3890  * @iter: The iterator that holds the seq buffer and the event being printed
3891  * @fmt: The format used to print the event
3892  * @ap: The va_list holding the data to print from @fmt.
3893  *
3894  * This writes the data into the @iter->seq buffer using the data from
3895  * @fmt and @ap. If the format has a %s, then the source of the string
3896  * is examined to make sure it is safe to print, otherwise it will
3897  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3898  * pointer.
3899  */
3900 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3901                          va_list ap)
3902 {
3903         const char *p = fmt;
3904         const char *str;
3905         int i, j;
3906
3907         if (WARN_ON_ONCE(!fmt))
3908                 return;
3909
3910         if (static_branch_unlikely(&trace_no_verify))
3911                 goto print;
3912
3913         /* Don't bother checking when doing a ftrace_dump() */
3914         if (iter->fmt == static_fmt_buf)
3915                 goto print;
3916
3917         while (*p) {
3918                 bool star = false;
3919                 int len = 0;
3920
3921                 j = 0;
3922
3923                 /* We only care about %s and variants */
3924                 for (i = 0; p[i]; i++) {
3925                         if (i + 1 >= iter->fmt_size) {
3926                                 /*
3927                                  * If we can't expand the copy buffer,
3928                                  * just print it.
3929                                  */
3930                                 if (!trace_iter_expand_format(iter))
3931                                         goto print;
3932                         }
3933
3934                         if (p[i] == '\\' && p[i+1]) {
3935                                 i++;
3936                                 continue;
3937                         }
3938                         if (p[i] == '%') {
3939                                 /* Need to test cases like %08.*s */
3940                                 for (j = 1; p[i+j]; j++) {
3941                                         if (isdigit(p[i+j]) ||
3942                                             p[i+j] == '.')
3943                                                 continue;
3944                                         if (p[i+j] == '*') {
3945                                                 star = true;
3946                                                 continue;
3947                                         }
3948                                         break;
3949                                 }
3950                                 if (p[i+j] == 's')
3951                                         break;
3952                                 star = false;
3953                         }
3954                         j = 0;
3955                 }
3956                 /* If no %s found then just print normally */
3957                 if (!p[i])
3958                         break;
3959
3960                 /* Copy up to the %s, and print that */
3961                 strncpy(iter->fmt, p, i);
3962                 iter->fmt[i] = '\0';
3963                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3964
3965                 /*
3966                  * If iter->seq is full, the above call no longer guarantees
3967                  * that ap is in sync with fmt processing, and further calls
3968                  * to va_arg() can return wrong positional arguments.
3969                  *
3970                  * Ensure that ap is no longer used in this case.
3971                  */
3972                 if (iter->seq.full) {
3973                         p = "";
3974                         break;
3975                 }
3976
3977                 if (star)
3978                         len = va_arg(ap, int);
3979
3980                 /* The ap now points to the string data of the %s */
3981                 str = va_arg(ap, const char *);
3982
3983                 /*
3984                  * If you hit this warning, it is likely that the
3985                  * trace event in question used %s on a string that
3986                  * was saved at the time of the event, but may not be
3987                  * around when the trace is read. Use __string(),
3988                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3989                  * instead. See samples/trace_events/trace-events-sample.h
3990                  * for reference.
3991                  */
3992                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3993                               "fmt: '%s' current_buffer: '%s'",
3994                               fmt, show_buffer(&iter->seq))) {
3995                         int ret;
3996
3997                         /* Try to safely read the string */
3998                         if (star) {
3999                                 if (len + 1 > iter->fmt_size)
4000                                         len = iter->fmt_size - 1;
4001                                 if (len < 0)
4002                                         len = 0;
4003                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
4004                                 iter->fmt[len] = 0;
4005                                 star = false;
4006                         } else {
4007                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
4008                                                                   iter->fmt_size);
4009                         }
4010                         if (ret < 0)
4011                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
4012                         else
4013                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
4014                                                  str, iter->fmt);
4015                         str = "[UNSAFE-MEMORY]";
4016                         strcpy(iter->fmt, "%s");
4017                 } else {
4018                         strncpy(iter->fmt, p + i, j + 1);
4019                         iter->fmt[j+1] = '\0';
4020                 }
4021                 if (star)
4022                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
4023                 else
4024                         trace_seq_printf(&iter->seq, iter->fmt, str);
4025
4026                 p += i + j + 1;
4027         }
4028  print:
4029         if (*p)
4030                 trace_seq_vprintf(&iter->seq, p, ap);
4031 }
4032
4033 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
4034 {
4035         const char *p, *new_fmt;
4036         char *q;
4037
4038         if (WARN_ON_ONCE(!fmt))
4039                 return fmt;
4040
4041         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
4042                 return fmt;
4043
4044         p = fmt;
4045         new_fmt = q = iter->fmt;
4046         while (*p) {
4047                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
4048                         if (!trace_iter_expand_format(iter))
4049                                 return fmt;
4050
4051                         q += iter->fmt - new_fmt;
4052                         new_fmt = iter->fmt;
4053                 }
4054
4055                 *q++ = *p++;
4056
4057                 /* Replace %p with %px */
4058                 if (p[-1] == '%') {
4059                         if (p[0] == '%') {
4060                                 *q++ = *p++;
4061                         } else if (p[0] == 'p' && !isalnum(p[1])) {
4062                                 *q++ = *p++;
4063                                 *q++ = 'x';
4064                         }
4065                 }
4066         }
4067         *q = '\0';
4068
4069         return new_fmt;
4070 }
4071
4072 #define STATIC_TEMP_BUF_SIZE    128
4073 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4074
4075 /* Find the next real entry, without updating the iterator itself */
4076 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4077                                           int *ent_cpu, u64 *ent_ts)
4078 {
4079         /* __find_next_entry will reset ent_size */
4080         int ent_size = iter->ent_size;
4081         struct trace_entry *entry;
4082
4083         /*
4084          * If called from ftrace_dump(), then the iter->temp buffer
4085          * will be the static_temp_buf and not created from kmalloc.
4086          * If the entry size is greater than the buffer, we can
4087          * not save it. Just return NULL in that case. This is only
4088          * used to add markers when two consecutive events' time
4089          * stamps have a large delta. See trace_print_lat_context()
4090          */
4091         if (iter->temp == static_temp_buf &&
4092             STATIC_TEMP_BUF_SIZE < ent_size)
4093                 return NULL;
4094
4095         /*
4096          * The __find_next_entry() may call peek_next_entry(), which may
4097          * call ring_buffer_peek() that may make the contents of iter->ent
4098          * undefined. Need to copy iter->ent now.
4099          */
4100         if (iter->ent && iter->ent != iter->temp) {
4101                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4102                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4103                         void *temp;
4104                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
4105                         if (!temp)
4106                                 return NULL;
4107                         kfree(iter->temp);
4108                         iter->temp = temp;
4109                         iter->temp_size = iter->ent_size;
4110                 }
4111                 memcpy(iter->temp, iter->ent, iter->ent_size);
4112                 iter->ent = iter->temp;
4113         }
4114         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4115         /* Put back the original ent_size */
4116         iter->ent_size = ent_size;
4117
4118         return entry;
4119 }
4120
4121 /* Find the next real entry, and increment the iterator to the next entry */
4122 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4123 {
4124         iter->ent = __find_next_entry(iter, &iter->cpu,
4125                                       &iter->lost_events, &iter->ts);
4126
4127         if (iter->ent)
4128                 trace_iterator_increment(iter);
4129
4130         return iter->ent ? iter : NULL;
4131 }
4132
4133 static void trace_consume(struct trace_iterator *iter)
4134 {
4135         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4136                             &iter->lost_events);
4137 }
4138
4139 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4140 {
4141         struct trace_iterator *iter = m->private;
4142         int i = (int)*pos;
4143         void *ent;
4144
4145         WARN_ON_ONCE(iter->leftover);
4146
4147         (*pos)++;
4148
4149         /* can't go backwards */
4150         if (iter->idx > i)
4151                 return NULL;
4152
4153         if (iter->idx < 0)
4154                 ent = trace_find_next_entry_inc(iter);
4155         else
4156                 ent = iter;
4157
4158         while (ent && iter->idx < i)
4159                 ent = trace_find_next_entry_inc(iter);
4160
4161         iter->pos = *pos;
4162
4163         return ent;
4164 }
4165
4166 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4167 {
4168         struct ring_buffer_iter *buf_iter;
4169         unsigned long entries = 0;
4170         u64 ts;
4171
4172         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4173
4174         buf_iter = trace_buffer_iter(iter, cpu);
4175         if (!buf_iter)
4176                 return;
4177
4178         ring_buffer_iter_reset(buf_iter);
4179
4180         /*
4181          * We could have the case with the max latency tracers
4182          * that a reset never took place on a cpu. This is evident
4183          * by the timestamp being before the start of the buffer.
4184          */
4185         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4186                 if (ts >= iter->array_buffer->time_start)
4187                         break;
4188                 entries++;
4189                 ring_buffer_iter_advance(buf_iter);
4190         }
4191
4192         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4193 }
4194
4195 /*
4196  * The current tracer is copied to avoid a global locking
4197  * all around.
4198  */
4199 static void *s_start(struct seq_file *m, loff_t *pos)
4200 {
4201         struct trace_iterator *iter = m->private;
4202         struct trace_array *tr = iter->tr;
4203         int cpu_file = iter->cpu_file;
4204         void *p = NULL;
4205         loff_t l = 0;
4206         int cpu;
4207
4208         /*
4209          * copy the tracer to avoid using a global lock all around.
4210          * iter->trace is a copy of current_trace, the pointer to the
4211          * name may be used instead of a strcmp(), as iter->trace->name
4212          * will point to the same string as current_trace->name.
4213          */
4214         mutex_lock(&trace_types_lock);
4215         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4216                 *iter->trace = *tr->current_trace;
4217         mutex_unlock(&trace_types_lock);
4218
4219 #ifdef CONFIG_TRACER_MAX_TRACE
4220         if (iter->snapshot && iter->trace->use_max_tr)
4221                 return ERR_PTR(-EBUSY);
4222 #endif
4223
4224         if (*pos != iter->pos) {
4225                 iter->ent = NULL;
4226                 iter->cpu = 0;
4227                 iter->idx = -1;
4228
4229                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4230                         for_each_tracing_cpu(cpu)
4231                                 tracing_iter_reset(iter, cpu);
4232                 } else
4233                         tracing_iter_reset(iter, cpu_file);
4234
4235                 iter->leftover = 0;
4236                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4237                         ;
4238
4239         } else {
4240                 /*
4241                  * If we overflowed the seq_file before, then we want
4242                  * to just reuse the trace_seq buffer again.
4243                  */
4244                 if (iter->leftover)
4245                         p = iter;
4246                 else {
4247                         l = *pos - 1;
4248                         p = s_next(m, p, &l);
4249                 }
4250         }
4251
4252         trace_event_read_lock();
4253         trace_access_lock(cpu_file);
4254         return p;
4255 }
4256
4257 static void s_stop(struct seq_file *m, void *p)
4258 {
4259         struct trace_iterator *iter = m->private;
4260
4261 #ifdef CONFIG_TRACER_MAX_TRACE
4262         if (iter->snapshot && iter->trace->use_max_tr)
4263                 return;
4264 #endif
4265
4266         trace_access_unlock(iter->cpu_file);
4267         trace_event_read_unlock();
4268 }
4269
4270 static void
4271 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4272                       unsigned long *entries, int cpu)
4273 {
4274         unsigned long count;
4275
4276         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4277         /*
4278          * If this buffer has skipped entries, then we hold all
4279          * entries for the trace and we need to ignore the
4280          * ones before the time stamp.
4281          */
4282         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4283                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4284                 /* total is the same as the entries */
4285                 *total = count;
4286         } else
4287                 *total = count +
4288                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4289         *entries = count;
4290 }
4291
4292 static void
4293 get_total_entries(struct array_buffer *buf,
4294                   unsigned long *total, unsigned long *entries)
4295 {
4296         unsigned long t, e;
4297         int cpu;
4298
4299         *total = 0;
4300         *entries = 0;
4301
4302         for_each_tracing_cpu(cpu) {
4303                 get_total_entries_cpu(buf, &t, &e, cpu);
4304                 *total += t;
4305                 *entries += e;
4306         }
4307 }
4308
4309 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4310 {
4311         unsigned long total, entries;
4312
4313         if (!tr)
4314                 tr = &global_trace;
4315
4316         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4317
4318         return entries;
4319 }
4320
4321 unsigned long trace_total_entries(struct trace_array *tr)
4322 {
4323         unsigned long total, entries;
4324
4325         if (!tr)
4326                 tr = &global_trace;
4327
4328         get_total_entries(&tr->array_buffer, &total, &entries);
4329
4330         return entries;
4331 }
4332
4333 static void print_lat_help_header(struct seq_file *m)
4334 {
4335         seq_puts(m, "#                    _------=> CPU#            \n"
4336                     "#                   / _-----=> irqs-off/BH-disabled\n"
4337                     "#                  | / _----=> need-resched    \n"
4338                     "#                  || / _---=> hardirq/softirq \n"
4339                     "#                  ||| / _--=> preempt-depth   \n"
4340                     "#                  |||| / _-=> migrate-disable \n"
4341                     "#                  ||||| /     delay           \n"
4342                     "#  cmd     pid     |||||| time  |   caller     \n"
4343                     "#     \\   /        ||||||  \\    |    /       \n");
4344 }
4345
4346 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4347 {
4348         unsigned long total;
4349         unsigned long entries;
4350
4351         get_total_entries(buf, &total, &entries);
4352         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4353                    entries, total, num_online_cpus());
4354         seq_puts(m, "#\n");
4355 }
4356
4357 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4358                                    unsigned int flags)
4359 {
4360         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4361
4362         print_event_info(buf, m);
4363
4364         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4365         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4366 }
4367
4368 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4369                                        unsigned int flags)
4370 {
4371         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4372         static const char space[] = "            ";
4373         int prec = tgid ? 12 : 2;
4374
4375         print_event_info(buf, m);
4376
4377         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4378         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4379         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4380         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4381         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4382         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4383         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4384         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4385 }
4386
4387 void
4388 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4389 {
4390         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4391         struct array_buffer *buf = iter->array_buffer;
4392         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4393         struct tracer *type = iter->trace;
4394         unsigned long entries;
4395         unsigned long total;
4396         const char *name = type->name;
4397
4398         get_total_entries(buf, &total, &entries);
4399
4400         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4401                    name, UTS_RELEASE);
4402         seq_puts(m, "# -----------------------------------"
4403                  "---------------------------------\n");
4404         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4405                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4406                    nsecs_to_usecs(data->saved_latency),
4407                    entries,
4408                    total,
4409                    buf->cpu,
4410                    preempt_model_none()      ? "server" :
4411                    preempt_model_voluntary() ? "desktop" :
4412                    preempt_model_full()      ? "preempt" :
4413                    preempt_model_rt()        ? "preempt_rt" :
4414                    "unknown",
4415                    /* These are reserved for later use */
4416                    0, 0, 0, 0);
4417 #ifdef CONFIG_SMP
4418         seq_printf(m, " #P:%d)\n", num_online_cpus());
4419 #else
4420         seq_puts(m, ")\n");
4421 #endif
4422         seq_puts(m, "#    -----------------\n");
4423         seq_printf(m, "#    | task: %.16s-%d "
4424                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4425                    data->comm, data->pid,
4426                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4427                    data->policy, data->rt_priority);
4428         seq_puts(m, "#    -----------------\n");
4429
4430         if (data->critical_start) {
4431                 seq_puts(m, "#  => started at: ");
4432                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4433                 trace_print_seq(m, &iter->seq);
4434                 seq_puts(m, "\n#  => ended at:   ");
4435                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4436                 trace_print_seq(m, &iter->seq);
4437                 seq_puts(m, "\n#\n");
4438         }
4439
4440         seq_puts(m, "#\n");
4441 }
4442
4443 static void test_cpu_buff_start(struct trace_iterator *iter)
4444 {
4445         struct trace_seq *s = &iter->seq;
4446         struct trace_array *tr = iter->tr;
4447
4448         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4449                 return;
4450
4451         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4452                 return;
4453
4454         if (cpumask_available(iter->started) &&
4455             cpumask_test_cpu(iter->cpu, iter->started))
4456                 return;
4457
4458         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4459                 return;
4460
4461         if (cpumask_available(iter->started))
4462                 cpumask_set_cpu(iter->cpu, iter->started);
4463
4464         /* Don't print started cpu buffer for the first entry of the trace */
4465         if (iter->idx > 1)
4466                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4467                                 iter->cpu);
4468 }
4469
4470 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4471 {
4472         struct trace_array *tr = iter->tr;
4473         struct trace_seq *s = &iter->seq;
4474         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4475         struct trace_entry *entry;
4476         struct trace_event *event;
4477
4478         entry = iter->ent;
4479
4480         test_cpu_buff_start(iter);
4481
4482         event = ftrace_find_event(entry->type);
4483
4484         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4485                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4486                         trace_print_lat_context(iter);
4487                 else
4488                         trace_print_context(iter);
4489         }
4490
4491         if (trace_seq_has_overflowed(s))
4492                 return TRACE_TYPE_PARTIAL_LINE;
4493
4494         if (event) {
4495                 if (tr->trace_flags & TRACE_ITER_FIELDS)
4496                         return print_event_fields(iter, event);
4497                 return event->funcs->trace(iter, sym_flags, event);
4498         }
4499
4500         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4501
4502         return trace_handle_return(s);
4503 }
4504
4505 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4506 {
4507         struct trace_array *tr = iter->tr;
4508         struct trace_seq *s = &iter->seq;
4509         struct trace_entry *entry;
4510         struct trace_event *event;
4511
4512         entry = iter->ent;
4513
4514         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4515                 trace_seq_printf(s, "%d %d %llu ",
4516                                  entry->pid, iter->cpu, iter->ts);
4517
4518         if (trace_seq_has_overflowed(s))
4519                 return TRACE_TYPE_PARTIAL_LINE;
4520
4521         event = ftrace_find_event(entry->type);
4522         if (event)
4523                 return event->funcs->raw(iter, 0, event);
4524
4525         trace_seq_printf(s, "%d ?\n", entry->type);
4526
4527         return trace_handle_return(s);
4528 }
4529
4530 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4531 {
4532         struct trace_array *tr = iter->tr;
4533         struct trace_seq *s = &iter->seq;
4534         unsigned char newline = '\n';
4535         struct trace_entry *entry;
4536         struct trace_event *event;
4537
4538         entry = iter->ent;
4539
4540         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4541                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4542                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4543                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4544                 if (trace_seq_has_overflowed(s))
4545                         return TRACE_TYPE_PARTIAL_LINE;
4546         }
4547
4548         event = ftrace_find_event(entry->type);
4549         if (event) {
4550                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4551                 if (ret != TRACE_TYPE_HANDLED)
4552                         return ret;
4553         }
4554
4555         SEQ_PUT_FIELD(s, newline);
4556
4557         return trace_handle_return(s);
4558 }
4559
4560 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4561 {
4562         struct trace_array *tr = iter->tr;
4563         struct trace_seq *s = &iter->seq;
4564         struct trace_entry *entry;
4565         struct trace_event *event;
4566
4567         entry = iter->ent;
4568
4569         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4570                 SEQ_PUT_FIELD(s, entry->pid);
4571                 SEQ_PUT_FIELD(s, iter->cpu);
4572                 SEQ_PUT_FIELD(s, iter->ts);
4573                 if (trace_seq_has_overflowed(s))
4574                         return TRACE_TYPE_PARTIAL_LINE;
4575         }
4576
4577         event = ftrace_find_event(entry->type);
4578         return event ? event->funcs->binary(iter, 0, event) :
4579                 TRACE_TYPE_HANDLED;
4580 }
4581
4582 int trace_empty(struct trace_iterator *iter)
4583 {
4584         struct ring_buffer_iter *buf_iter;
4585         int cpu;
4586
4587         /* If we are looking at one CPU buffer, only check that one */
4588         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4589                 cpu = iter->cpu_file;
4590                 buf_iter = trace_buffer_iter(iter, cpu);
4591                 if (buf_iter) {
4592                         if (!ring_buffer_iter_empty(buf_iter))
4593                                 return 0;
4594                 } else {
4595                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4596                                 return 0;
4597                 }
4598                 return 1;
4599         }
4600
4601         for_each_tracing_cpu(cpu) {
4602                 buf_iter = trace_buffer_iter(iter, cpu);
4603                 if (buf_iter) {
4604                         if (!ring_buffer_iter_empty(buf_iter))
4605                                 return 0;
4606                 } else {
4607                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4608                                 return 0;
4609                 }
4610         }
4611
4612         return 1;
4613 }
4614
4615 /*  Called with trace_event_read_lock() held. */
4616 enum print_line_t print_trace_line(struct trace_iterator *iter)
4617 {
4618         struct trace_array *tr = iter->tr;
4619         unsigned long trace_flags = tr->trace_flags;
4620         enum print_line_t ret;
4621
4622         if (iter->lost_events) {
4623                 if (iter->lost_events == (unsigned long)-1)
4624                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4625                                          iter->cpu);
4626                 else
4627                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4628                                          iter->cpu, iter->lost_events);
4629                 if (trace_seq_has_overflowed(&iter->seq))
4630                         return TRACE_TYPE_PARTIAL_LINE;
4631         }
4632
4633         if (iter->trace && iter->trace->print_line) {
4634                 ret = iter->trace->print_line(iter);
4635                 if (ret != TRACE_TYPE_UNHANDLED)
4636                         return ret;
4637         }
4638
4639         if (iter->ent->type == TRACE_BPUTS &&
4640                         trace_flags & TRACE_ITER_PRINTK &&
4641                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4642                 return trace_print_bputs_msg_only(iter);
4643
4644         if (iter->ent->type == TRACE_BPRINT &&
4645                         trace_flags & TRACE_ITER_PRINTK &&
4646                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4647                 return trace_print_bprintk_msg_only(iter);
4648
4649         if (iter->ent->type == TRACE_PRINT &&
4650                         trace_flags & TRACE_ITER_PRINTK &&
4651                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4652                 return trace_print_printk_msg_only(iter);
4653
4654         if (trace_flags & TRACE_ITER_BIN)
4655                 return print_bin_fmt(iter);
4656
4657         if (trace_flags & TRACE_ITER_HEX)
4658                 return print_hex_fmt(iter);
4659
4660         if (trace_flags & TRACE_ITER_RAW)
4661                 return print_raw_fmt(iter);
4662
4663         return print_trace_fmt(iter);
4664 }
4665
4666 void trace_latency_header(struct seq_file *m)
4667 {
4668         struct trace_iterator *iter = m->private;
4669         struct trace_array *tr = iter->tr;
4670
4671         /* print nothing if the buffers are empty */
4672         if (trace_empty(iter))
4673                 return;
4674
4675         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4676                 print_trace_header(m, iter);
4677
4678         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4679                 print_lat_help_header(m);
4680 }
4681
4682 void trace_default_header(struct seq_file *m)
4683 {
4684         struct trace_iterator *iter = m->private;
4685         struct trace_array *tr = iter->tr;
4686         unsigned long trace_flags = tr->trace_flags;
4687
4688         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4689                 return;
4690
4691         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4692                 /* print nothing if the buffers are empty */
4693                 if (trace_empty(iter))
4694                         return;
4695                 print_trace_header(m, iter);
4696                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4697                         print_lat_help_header(m);
4698         } else {
4699                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4700                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4701                                 print_func_help_header_irq(iter->array_buffer,
4702                                                            m, trace_flags);
4703                         else
4704                                 print_func_help_header(iter->array_buffer, m,
4705                                                        trace_flags);
4706                 }
4707         }
4708 }
4709
4710 static void test_ftrace_alive(struct seq_file *m)
4711 {
4712         if (!ftrace_is_dead())
4713                 return;
4714         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4715                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4716 }
4717
4718 #ifdef CONFIG_TRACER_MAX_TRACE
4719 static void show_snapshot_main_help(struct seq_file *m)
4720 {
4721         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4722                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4723                     "#                      Takes a snapshot of the main buffer.\n"
4724                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4725                     "#                      (Doesn't have to be '2' works with any number that\n"
4726                     "#                       is not a '0' or '1')\n");
4727 }
4728
4729 static void show_snapshot_percpu_help(struct seq_file *m)
4730 {
4731         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4732 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4733         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4734                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4735 #else
4736         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4737                     "#                     Must use main snapshot file to allocate.\n");
4738 #endif
4739         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4740                     "#                      (Doesn't have to be '2' works with any number that\n"
4741                     "#                       is not a '0' or '1')\n");
4742 }
4743
4744 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4745 {
4746         if (iter->tr->allocated_snapshot)
4747                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4748         else
4749                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4750
4751         seq_puts(m, "# Snapshot commands:\n");
4752         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4753                 show_snapshot_main_help(m);
4754         else
4755                 show_snapshot_percpu_help(m);
4756 }
4757 #else
4758 /* Should never be called */
4759 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4760 #endif
4761
4762 static int s_show(struct seq_file *m, void *v)
4763 {
4764         struct trace_iterator *iter = v;
4765         int ret;
4766
4767         if (iter->ent == NULL) {
4768                 if (iter->tr) {
4769                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4770                         seq_puts(m, "#\n");
4771                         test_ftrace_alive(m);
4772                 }
4773                 if (iter->snapshot && trace_empty(iter))
4774                         print_snapshot_help(m, iter);
4775                 else if (iter->trace && iter->trace->print_header)
4776                         iter->trace->print_header(m);
4777                 else
4778                         trace_default_header(m);
4779
4780         } else if (iter->leftover) {
4781                 /*
4782                  * If we filled the seq_file buffer earlier, we
4783                  * want to just show it now.
4784                  */
4785                 ret = trace_print_seq(m, &iter->seq);
4786
4787                 /* ret should this time be zero, but you never know */
4788                 iter->leftover = ret;
4789
4790         } else {
4791                 print_trace_line(iter);
4792                 ret = trace_print_seq(m, &iter->seq);
4793                 /*
4794                  * If we overflow the seq_file buffer, then it will
4795                  * ask us for this data again at start up.
4796                  * Use that instead.
4797                  *  ret is 0 if seq_file write succeeded.
4798                  *        -1 otherwise.
4799                  */
4800                 iter->leftover = ret;
4801         }
4802
4803         return 0;
4804 }
4805
4806 /*
4807  * Should be used after trace_array_get(), trace_types_lock
4808  * ensures that i_cdev was already initialized.
4809  */
4810 static inline int tracing_get_cpu(struct inode *inode)
4811 {
4812         if (inode->i_cdev) /* See trace_create_cpu_file() */
4813                 return (long)inode->i_cdev - 1;
4814         return RING_BUFFER_ALL_CPUS;
4815 }
4816
4817 static const struct seq_operations tracer_seq_ops = {
4818         .start          = s_start,
4819         .next           = s_next,
4820         .stop           = s_stop,
4821         .show           = s_show,
4822 };
4823
4824 static struct trace_iterator *
4825 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4826 {
4827         struct trace_array *tr = inode->i_private;
4828         struct trace_iterator *iter;
4829         int cpu;
4830
4831         if (tracing_disabled)
4832                 return ERR_PTR(-ENODEV);
4833
4834         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4835         if (!iter)
4836                 return ERR_PTR(-ENOMEM);
4837
4838         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4839                                     GFP_KERNEL);
4840         if (!iter->buffer_iter)
4841                 goto release;
4842
4843         /*
4844          * trace_find_next_entry() may need to save off iter->ent.
4845          * It will place it into the iter->temp buffer. As most
4846          * events are less than 128, allocate a buffer of that size.
4847          * If one is greater, then trace_find_next_entry() will
4848          * allocate a new buffer to adjust for the bigger iter->ent.
4849          * It's not critical if it fails to get allocated here.
4850          */
4851         iter->temp = kmalloc(128, GFP_KERNEL);
4852         if (iter->temp)
4853                 iter->temp_size = 128;
4854
4855         /*
4856          * trace_event_printf() may need to modify given format
4857          * string to replace %p with %px so that it shows real address
4858          * instead of hash value. However, that is only for the event
4859          * tracing, other tracer may not need. Defer the allocation
4860          * until it is needed.
4861          */
4862         iter->fmt = NULL;
4863         iter->fmt_size = 0;
4864
4865         /*
4866          * We make a copy of the current tracer to avoid concurrent
4867          * changes on it while we are reading.
4868          */
4869         mutex_lock(&trace_types_lock);
4870         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4871         if (!iter->trace)
4872                 goto fail;
4873
4874         *iter->trace = *tr->current_trace;
4875
4876         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4877                 goto fail;
4878
4879         iter->tr = tr;
4880
4881 #ifdef CONFIG_TRACER_MAX_TRACE
4882         /* Currently only the top directory has a snapshot */
4883         if (tr->current_trace->print_max || snapshot)
4884                 iter->array_buffer = &tr->max_buffer;
4885         else
4886 #endif
4887                 iter->array_buffer = &tr->array_buffer;
4888         iter->snapshot = snapshot;
4889         iter->pos = -1;
4890         iter->cpu_file = tracing_get_cpu(inode);
4891         mutex_init(&iter->mutex);
4892
4893         /* Notify the tracer early; before we stop tracing. */
4894         if (iter->trace->open)
4895                 iter->trace->open(iter);
4896
4897         /* Annotate start of buffers if we had overruns */
4898         if (ring_buffer_overruns(iter->array_buffer->buffer))
4899                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4900
4901         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4902         if (trace_clocks[tr->clock_id].in_ns)
4903                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4904
4905         /*
4906          * If pause-on-trace is enabled, then stop the trace while
4907          * dumping, unless this is the "snapshot" file
4908          */
4909         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4910                 tracing_stop_tr(tr);
4911
4912         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4913                 for_each_tracing_cpu(cpu) {
4914                         iter->buffer_iter[cpu] =
4915                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4916                                                          cpu, GFP_KERNEL);
4917                 }
4918                 ring_buffer_read_prepare_sync();
4919                 for_each_tracing_cpu(cpu) {
4920                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4921                         tracing_iter_reset(iter, cpu);
4922                 }
4923         } else {
4924                 cpu = iter->cpu_file;
4925                 iter->buffer_iter[cpu] =
4926                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4927                                                  cpu, GFP_KERNEL);
4928                 ring_buffer_read_prepare_sync();
4929                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4930                 tracing_iter_reset(iter, cpu);
4931         }
4932
4933         mutex_unlock(&trace_types_lock);
4934
4935         return iter;
4936
4937  fail:
4938         mutex_unlock(&trace_types_lock);
4939         kfree(iter->trace);
4940         kfree(iter->temp);
4941         kfree(iter->buffer_iter);
4942 release:
4943         seq_release_private(inode, file);
4944         return ERR_PTR(-ENOMEM);
4945 }
4946
4947 int tracing_open_generic(struct inode *inode, struct file *filp)
4948 {
4949         int ret;
4950
4951         ret = tracing_check_open_get_tr(NULL);
4952         if (ret)
4953                 return ret;
4954
4955         filp->private_data = inode->i_private;
4956         return 0;
4957 }
4958
4959 bool tracing_is_disabled(void)
4960 {
4961         return (tracing_disabled) ? true: false;
4962 }
4963
4964 /*
4965  * Open and update trace_array ref count.
4966  * Must have the current trace_array passed to it.
4967  */
4968 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4969 {
4970         struct trace_array *tr = inode->i_private;
4971         int ret;
4972
4973         ret = tracing_check_open_get_tr(tr);
4974         if (ret)
4975                 return ret;
4976
4977         filp->private_data = inode->i_private;
4978
4979         return 0;
4980 }
4981
4982 static int tracing_mark_open(struct inode *inode, struct file *filp)
4983 {
4984         stream_open(inode, filp);
4985         return tracing_open_generic_tr(inode, filp);
4986 }
4987
4988 static int tracing_release(struct inode *inode, struct file *file)
4989 {
4990         struct trace_array *tr = inode->i_private;
4991         struct seq_file *m = file->private_data;
4992         struct trace_iterator *iter;
4993         int cpu;
4994
4995         if (!(file->f_mode & FMODE_READ)) {
4996                 trace_array_put(tr);
4997                 return 0;
4998         }
4999
5000         /* Writes do not use seq_file */
5001         iter = m->private;
5002         mutex_lock(&trace_types_lock);
5003
5004         for_each_tracing_cpu(cpu) {
5005                 if (iter->buffer_iter[cpu])
5006                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
5007         }
5008
5009         if (iter->trace && iter->trace->close)
5010                 iter->trace->close(iter);
5011
5012         if (!iter->snapshot && tr->stop_count)
5013                 /* reenable tracing if it was previously enabled */
5014                 tracing_start_tr(tr);
5015
5016         __trace_array_put(tr);
5017
5018         mutex_unlock(&trace_types_lock);
5019
5020         mutex_destroy(&iter->mutex);
5021         free_cpumask_var(iter->started);
5022         kfree(iter->fmt);
5023         kfree(iter->temp);
5024         kfree(iter->trace);
5025         kfree(iter->buffer_iter);
5026         seq_release_private(inode, file);
5027
5028         return 0;
5029 }
5030
5031 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
5032 {
5033         struct trace_array *tr = inode->i_private;
5034
5035         trace_array_put(tr);
5036         return 0;
5037 }
5038
5039 static int tracing_single_release_tr(struct inode *inode, struct file *file)
5040 {
5041         struct trace_array *tr = inode->i_private;
5042
5043         trace_array_put(tr);
5044
5045         return single_release(inode, file);
5046 }
5047
5048 static int tracing_open(struct inode *inode, struct file *file)
5049 {
5050         struct trace_array *tr = inode->i_private;
5051         struct trace_iterator *iter;
5052         int ret;
5053
5054         ret = tracing_check_open_get_tr(tr);
5055         if (ret)
5056                 return ret;
5057
5058         /* If this file was open for write, then erase contents */
5059         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5060                 int cpu = tracing_get_cpu(inode);
5061                 struct array_buffer *trace_buf = &tr->array_buffer;
5062
5063 #ifdef CONFIG_TRACER_MAX_TRACE
5064                 if (tr->current_trace->print_max)
5065                         trace_buf = &tr->max_buffer;
5066 #endif
5067
5068                 if (cpu == RING_BUFFER_ALL_CPUS)
5069                         tracing_reset_online_cpus(trace_buf);
5070                 else
5071                         tracing_reset_cpu(trace_buf, cpu);
5072         }
5073
5074         if (file->f_mode & FMODE_READ) {
5075                 iter = __tracing_open(inode, file, false);
5076                 if (IS_ERR(iter))
5077                         ret = PTR_ERR(iter);
5078                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5079                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
5080         }
5081
5082         if (ret < 0)
5083                 trace_array_put(tr);
5084
5085         return ret;
5086 }
5087
5088 /*
5089  * Some tracers are not suitable for instance buffers.
5090  * A tracer is always available for the global array (toplevel)
5091  * or if it explicitly states that it is.
5092  */
5093 static bool
5094 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5095 {
5096         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5097 }
5098
5099 /* Find the next tracer that this trace array may use */
5100 static struct tracer *
5101 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5102 {
5103         while (t && !trace_ok_for_array(t, tr))
5104                 t = t->next;
5105
5106         return t;
5107 }
5108
5109 static void *
5110 t_next(struct seq_file *m, void *v, loff_t *pos)
5111 {
5112         struct trace_array *tr = m->private;
5113         struct tracer *t = v;
5114
5115         (*pos)++;
5116
5117         if (t)
5118                 t = get_tracer_for_array(tr, t->next);
5119
5120         return t;
5121 }
5122
5123 static void *t_start(struct seq_file *m, loff_t *pos)
5124 {
5125         struct trace_array *tr = m->private;
5126         struct tracer *t;
5127         loff_t l = 0;
5128
5129         mutex_lock(&trace_types_lock);
5130
5131         t = get_tracer_for_array(tr, trace_types);
5132         for (; t && l < *pos; t = t_next(m, t, &l))
5133                         ;
5134
5135         return t;
5136 }
5137
5138 static void t_stop(struct seq_file *m, void *p)
5139 {
5140         mutex_unlock(&trace_types_lock);
5141 }
5142
5143 static int t_show(struct seq_file *m, void *v)
5144 {
5145         struct tracer *t = v;
5146
5147         if (!t)
5148                 return 0;
5149
5150         seq_puts(m, t->name);
5151         if (t->next)
5152                 seq_putc(m, ' ');
5153         else
5154                 seq_putc(m, '\n');
5155
5156         return 0;
5157 }
5158
5159 static const struct seq_operations show_traces_seq_ops = {
5160         .start          = t_start,
5161         .next           = t_next,
5162         .stop           = t_stop,
5163         .show           = t_show,
5164 };
5165
5166 static int show_traces_open(struct inode *inode, struct file *file)
5167 {
5168         struct trace_array *tr = inode->i_private;
5169         struct seq_file *m;
5170         int ret;
5171
5172         ret = tracing_check_open_get_tr(tr);
5173         if (ret)
5174                 return ret;
5175
5176         ret = seq_open(file, &show_traces_seq_ops);
5177         if (ret) {
5178                 trace_array_put(tr);
5179                 return ret;
5180         }
5181
5182         m = file->private_data;
5183         m->private = tr;
5184
5185         return 0;
5186 }
5187
5188 static int show_traces_release(struct inode *inode, struct file *file)
5189 {
5190         struct trace_array *tr = inode->i_private;
5191
5192         trace_array_put(tr);
5193         return seq_release(inode, file);
5194 }
5195
5196 static ssize_t
5197 tracing_write_stub(struct file *filp, const char __user *ubuf,
5198                    size_t count, loff_t *ppos)
5199 {
5200         return count;
5201 }
5202
5203 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5204 {
5205         int ret;
5206
5207         if (file->f_mode & FMODE_READ)
5208                 ret = seq_lseek(file, offset, whence);
5209         else
5210                 file->f_pos = ret = 0;
5211
5212         return ret;
5213 }
5214
5215 static const struct file_operations tracing_fops = {
5216         .open           = tracing_open,
5217         .read           = seq_read,
5218         .read_iter      = seq_read_iter,
5219         .splice_read    = copy_splice_read,
5220         .write          = tracing_write_stub,
5221         .llseek         = tracing_lseek,
5222         .release        = tracing_release,
5223 };
5224
5225 static const struct file_operations show_traces_fops = {
5226         .open           = show_traces_open,
5227         .read           = seq_read,
5228         .llseek         = seq_lseek,
5229         .release        = show_traces_release,
5230 };
5231
5232 static ssize_t
5233 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5234                      size_t count, loff_t *ppos)
5235 {
5236         struct trace_array *tr = file_inode(filp)->i_private;
5237         char *mask_str;
5238         int len;
5239
5240         len = snprintf(NULL, 0, "%*pb\n",
5241                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5242         mask_str = kmalloc(len, GFP_KERNEL);
5243         if (!mask_str)
5244                 return -ENOMEM;
5245
5246         len = snprintf(mask_str, len, "%*pb\n",
5247                        cpumask_pr_args(tr->tracing_cpumask));
5248         if (len >= count) {
5249                 count = -EINVAL;
5250                 goto out_err;
5251         }
5252         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5253
5254 out_err:
5255         kfree(mask_str);
5256
5257         return count;
5258 }
5259
5260 int tracing_set_cpumask(struct trace_array *tr,
5261                         cpumask_var_t tracing_cpumask_new)
5262 {
5263         int cpu;
5264
5265         if (!tr)
5266                 return -EINVAL;
5267
5268         local_irq_disable();
5269         arch_spin_lock(&tr->max_lock);
5270         for_each_tracing_cpu(cpu) {
5271                 /*
5272                  * Increase/decrease the disabled counter if we are
5273                  * about to flip a bit in the cpumask:
5274                  */
5275                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5276                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5277                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5278                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5279                 }
5280                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5281                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5282                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5283                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5284                 }
5285         }
5286         arch_spin_unlock(&tr->max_lock);
5287         local_irq_enable();
5288
5289         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5290
5291         return 0;
5292 }
5293
5294 static ssize_t
5295 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5296                       size_t count, loff_t *ppos)
5297 {
5298         struct trace_array *tr = file_inode(filp)->i_private;
5299         cpumask_var_t tracing_cpumask_new;
5300         int err;
5301
5302         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5303                 return -ENOMEM;
5304
5305         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5306         if (err)
5307                 goto err_free;
5308
5309         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5310         if (err)
5311                 goto err_free;
5312
5313         free_cpumask_var(tracing_cpumask_new);
5314
5315         return count;
5316
5317 err_free:
5318         free_cpumask_var(tracing_cpumask_new);
5319
5320         return err;
5321 }
5322
5323 static const struct file_operations tracing_cpumask_fops = {
5324         .open           = tracing_open_generic_tr,
5325         .read           = tracing_cpumask_read,
5326         .write          = tracing_cpumask_write,
5327         .release        = tracing_release_generic_tr,
5328         .llseek         = generic_file_llseek,
5329 };
5330
5331 static int tracing_trace_options_show(struct seq_file *m, void *v)
5332 {
5333         struct tracer_opt *trace_opts;
5334         struct trace_array *tr = m->private;
5335         u32 tracer_flags;
5336         int i;
5337
5338         mutex_lock(&trace_types_lock);
5339         tracer_flags = tr->current_trace->flags->val;
5340         trace_opts = tr->current_trace->flags->opts;
5341
5342         for (i = 0; trace_options[i]; i++) {
5343                 if (tr->trace_flags & (1 << i))
5344                         seq_printf(m, "%s\n", trace_options[i]);
5345                 else
5346                         seq_printf(m, "no%s\n", trace_options[i]);
5347         }
5348
5349         for (i = 0; trace_opts[i].name; i++) {
5350                 if (tracer_flags & trace_opts[i].bit)
5351                         seq_printf(m, "%s\n", trace_opts[i].name);
5352                 else
5353                         seq_printf(m, "no%s\n", trace_opts[i].name);
5354         }
5355         mutex_unlock(&trace_types_lock);
5356
5357         return 0;
5358 }
5359
5360 static int __set_tracer_option(struct trace_array *tr,
5361                                struct tracer_flags *tracer_flags,
5362                                struct tracer_opt *opts, int neg)
5363 {
5364         struct tracer *trace = tracer_flags->trace;
5365         int ret;
5366
5367         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5368         if (ret)
5369                 return ret;
5370
5371         if (neg)
5372                 tracer_flags->val &= ~opts->bit;
5373         else
5374                 tracer_flags->val |= opts->bit;
5375         return 0;
5376 }
5377
5378 /* Try to assign a tracer specific option */
5379 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5380 {
5381         struct tracer *trace = tr->current_trace;
5382         struct tracer_flags *tracer_flags = trace->flags;
5383         struct tracer_opt *opts = NULL;
5384         int i;
5385
5386         for (i = 0; tracer_flags->opts[i].name; i++) {
5387                 opts = &tracer_flags->opts[i];
5388
5389                 if (strcmp(cmp, opts->name) == 0)
5390                         return __set_tracer_option(tr, trace->flags, opts, neg);
5391         }
5392
5393         return -EINVAL;
5394 }
5395
5396 /* Some tracers require overwrite to stay enabled */
5397 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5398 {
5399         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5400                 return -1;
5401
5402         return 0;
5403 }
5404
5405 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5406 {
5407         int *map;
5408
5409         if ((mask == TRACE_ITER_RECORD_TGID) ||
5410             (mask == TRACE_ITER_RECORD_CMD))
5411                 lockdep_assert_held(&event_mutex);
5412
5413         /* do nothing if flag is already set */
5414         if (!!(tr->trace_flags & mask) == !!enabled)
5415                 return 0;
5416
5417         /* Give the tracer a chance to approve the change */
5418         if (tr->current_trace->flag_changed)
5419                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5420                         return -EINVAL;
5421
5422         if (enabled)
5423                 tr->trace_flags |= mask;
5424         else
5425                 tr->trace_flags &= ~mask;
5426
5427         if (mask == TRACE_ITER_RECORD_CMD)
5428                 trace_event_enable_cmd_record(enabled);
5429
5430         if (mask == TRACE_ITER_RECORD_TGID) {
5431                 if (!tgid_map) {
5432                         tgid_map_max = pid_max;
5433                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5434                                        GFP_KERNEL);
5435
5436                         /*
5437                          * Pairs with smp_load_acquire() in
5438                          * trace_find_tgid_ptr() to ensure that if it observes
5439                          * the tgid_map we just allocated then it also observes
5440                          * the corresponding tgid_map_max value.
5441                          */
5442                         smp_store_release(&tgid_map, map);
5443                 }
5444                 if (!tgid_map) {
5445                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5446                         return -ENOMEM;
5447                 }
5448
5449                 trace_event_enable_tgid_record(enabled);
5450         }
5451
5452         if (mask == TRACE_ITER_EVENT_FORK)
5453                 trace_event_follow_fork(tr, enabled);
5454
5455         if (mask == TRACE_ITER_FUNC_FORK)
5456                 ftrace_pid_follow_fork(tr, enabled);
5457
5458         if (mask == TRACE_ITER_OVERWRITE) {
5459                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5460 #ifdef CONFIG_TRACER_MAX_TRACE
5461                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5462 #endif
5463         }
5464
5465         if (mask == TRACE_ITER_PRINTK) {
5466                 trace_printk_start_stop_comm(enabled);
5467                 trace_printk_control(enabled);
5468         }
5469
5470         return 0;
5471 }
5472
5473 int trace_set_options(struct trace_array *tr, char *option)
5474 {
5475         char *cmp;
5476         int neg = 0;
5477         int ret;
5478         size_t orig_len = strlen(option);
5479         int len;
5480
5481         cmp = strstrip(option);
5482
5483         len = str_has_prefix(cmp, "no");
5484         if (len)
5485                 neg = 1;
5486
5487         cmp += len;
5488
5489         mutex_lock(&event_mutex);
5490         mutex_lock(&trace_types_lock);
5491
5492         ret = match_string(trace_options, -1, cmp);
5493         /* If no option could be set, test the specific tracer options */
5494         if (ret < 0)
5495                 ret = set_tracer_option(tr, cmp, neg);
5496         else
5497                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5498
5499         mutex_unlock(&trace_types_lock);
5500         mutex_unlock(&event_mutex);
5501
5502         /*
5503          * If the first trailing whitespace is replaced with '\0' by strstrip,
5504          * turn it back into a space.
5505          */
5506         if (orig_len > strlen(option))
5507                 option[strlen(option)] = ' ';
5508
5509         return ret;
5510 }
5511
5512 static void __init apply_trace_boot_options(void)
5513 {
5514         char *buf = trace_boot_options_buf;
5515         char *option;
5516
5517         while (true) {
5518                 option = strsep(&buf, ",");
5519
5520                 if (!option)
5521                         break;
5522
5523                 if (*option)
5524                         trace_set_options(&global_trace, option);
5525
5526                 /* Put back the comma to allow this to be called again */
5527                 if (buf)
5528                         *(buf - 1) = ',';
5529         }
5530 }
5531
5532 static ssize_t
5533 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5534                         size_t cnt, loff_t *ppos)
5535 {
5536         struct seq_file *m = filp->private_data;
5537         struct trace_array *tr = m->private;
5538         char buf[64];
5539         int ret;
5540
5541         if (cnt >= sizeof(buf))
5542                 return -EINVAL;
5543
5544         if (copy_from_user(buf, ubuf, cnt))
5545                 return -EFAULT;
5546
5547         buf[cnt] = 0;
5548
5549         ret = trace_set_options(tr, buf);
5550         if (ret < 0)
5551                 return ret;
5552
5553         *ppos += cnt;
5554
5555         return cnt;
5556 }
5557
5558 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5559 {
5560         struct trace_array *tr = inode->i_private;
5561         int ret;
5562
5563         ret = tracing_check_open_get_tr(tr);
5564         if (ret)
5565                 return ret;
5566
5567         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5568         if (ret < 0)
5569                 trace_array_put(tr);
5570
5571         return ret;
5572 }
5573
5574 static const struct file_operations tracing_iter_fops = {
5575         .open           = tracing_trace_options_open,
5576         .read           = seq_read,
5577         .llseek         = seq_lseek,
5578         .release        = tracing_single_release_tr,
5579         .write          = tracing_trace_options_write,
5580 };
5581
5582 static const char readme_msg[] =
5583         "tracing mini-HOWTO:\n\n"
5584         "# echo 0 > tracing_on : quick way to disable tracing\n"
5585         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5586         " Important files:\n"
5587         "  trace\t\t\t- The static contents of the buffer\n"
5588         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5589         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5590         "  current_tracer\t- function and latency tracers\n"
5591         "  available_tracers\t- list of configured tracers for current_tracer\n"
5592         "  error_log\t- error log for failed commands (that support it)\n"
5593         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5594         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5595         "  trace_clock\t\t- change the clock used to order events\n"
5596         "       local:   Per cpu clock but may not be synced across CPUs\n"
5597         "      global:   Synced across CPUs but slows tracing down.\n"
5598         "     counter:   Not a clock, but just an increment\n"
5599         "      uptime:   Jiffy counter from time of boot\n"
5600         "        perf:   Same clock that perf events use\n"
5601 #ifdef CONFIG_X86_64
5602         "     x86-tsc:   TSC cycle counter\n"
5603 #endif
5604         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5605         "       delta:   Delta difference against a buffer-wide timestamp\n"
5606         "    absolute:   Absolute (standalone) timestamp\n"
5607         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5608         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5609         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5610         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5611         "\t\t\t  Remove sub-buffer with rmdir\n"
5612         "  trace_options\t\t- Set format or modify how tracing happens\n"
5613         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5614         "\t\t\t  option name\n"
5615         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5616 #ifdef CONFIG_DYNAMIC_FTRACE
5617         "\n  available_filter_functions - list of functions that can be filtered on\n"
5618         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5619         "\t\t\t  functions\n"
5620         "\t     accepts: func_full_name or glob-matching-pattern\n"
5621         "\t     modules: Can select a group via module\n"
5622         "\t      Format: :mod:<module-name>\n"
5623         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5624         "\t    triggers: a command to perform when function is hit\n"
5625         "\t      Format: <function>:<trigger>[:count]\n"
5626         "\t     trigger: traceon, traceoff\n"
5627         "\t\t      enable_event:<system>:<event>\n"
5628         "\t\t      disable_event:<system>:<event>\n"
5629 #ifdef CONFIG_STACKTRACE
5630         "\t\t      stacktrace\n"
5631 #endif
5632 #ifdef CONFIG_TRACER_SNAPSHOT
5633         "\t\t      snapshot\n"
5634 #endif
5635         "\t\t      dump\n"
5636         "\t\t      cpudump\n"
5637         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5638         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5639         "\t     The first one will disable tracing every time do_fault is hit\n"
5640         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5641         "\t       The first time do trap is hit and it disables tracing, the\n"
5642         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5643         "\t       the counter will not decrement. It only decrements when the\n"
5644         "\t       trigger did work\n"
5645         "\t     To remove trigger without count:\n"
5646         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5647         "\t     To remove trigger with a count:\n"
5648         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5649         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5650         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5651         "\t    modules: Can select a group via module command :mod:\n"
5652         "\t    Does not accept triggers\n"
5653 #endif /* CONFIG_DYNAMIC_FTRACE */
5654 #ifdef CONFIG_FUNCTION_TRACER
5655         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5656         "\t\t    (function)\n"
5657         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5658         "\t\t    (function)\n"
5659 #endif
5660 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5661         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5662         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5663         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5664 #endif
5665 #ifdef CONFIG_TRACER_SNAPSHOT
5666         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5667         "\t\t\t  snapshot buffer. Read the contents for more\n"
5668         "\t\t\t  information\n"
5669 #endif
5670 #ifdef CONFIG_STACK_TRACER
5671         "  stack_trace\t\t- Shows the max stack trace when active\n"
5672         "  stack_max_size\t- Shows current max stack size that was traced\n"
5673         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5674         "\t\t\t  new trace)\n"
5675 #ifdef CONFIG_DYNAMIC_FTRACE
5676         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5677         "\t\t\t  traces\n"
5678 #endif
5679 #endif /* CONFIG_STACK_TRACER */
5680 #ifdef CONFIG_DYNAMIC_EVENTS
5681         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5682         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5683 #endif
5684 #ifdef CONFIG_KPROBE_EVENTS
5685         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5686         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5687 #endif
5688 #ifdef CONFIG_UPROBE_EVENTS
5689         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5690         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5691 #endif
5692 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5693     defined(CONFIG_FPROBE_EVENTS)
5694         "\t  accepts: event-definitions (one definition per line)\n"
5695 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5696         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5697         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5698 #endif
5699 #ifdef CONFIG_FPROBE_EVENTS
5700         "\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5701         "\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5702 #endif
5703 #ifdef CONFIG_HIST_TRIGGERS
5704         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5705 #endif
5706         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5707         "\t           -:[<group>/][<event>]\n"
5708 #ifdef CONFIG_KPROBE_EVENTS
5709         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5710   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5711 #endif
5712 #ifdef CONFIG_UPROBE_EVENTS
5713   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5714 #endif
5715         "\t     args: <name>=fetcharg[:type]\n"
5716         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5717 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5718 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5719         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>, <argname>\n"
5720 #else
5721         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5722 #endif
5723 #else
5724         "\t           $stack<index>, $stack, $retval, $comm,\n"
5725 #endif
5726         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5727         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5728         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5729         "\t           symstr, <type>\\[<array-size>\\]\n"
5730 #ifdef CONFIG_HIST_TRIGGERS
5731         "\t    field: <stype> <name>;\n"
5732         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5733         "\t           [unsigned] char/int/long\n"
5734 #endif
5735         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5736         "\t            of the <attached-group>/<attached-event>.\n"
5737 #endif
5738         "  events/\t\t- Directory containing all trace event subsystems:\n"
5739         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5740         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5741         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5742         "\t\t\t  events\n"
5743         "      filter\t\t- If set, only events passing filter are traced\n"
5744         "  events/<system>/<event>/\t- Directory containing control files for\n"
5745         "\t\t\t  <event>:\n"
5746         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5747         "      filter\t\t- If set, only events passing filter are traced\n"
5748         "      trigger\t\t- If set, a command to perform when event is hit\n"
5749         "\t    Format: <trigger>[:count][if <filter>]\n"
5750         "\t   trigger: traceon, traceoff\n"
5751         "\t            enable_event:<system>:<event>\n"
5752         "\t            disable_event:<system>:<event>\n"
5753 #ifdef CONFIG_HIST_TRIGGERS
5754         "\t            enable_hist:<system>:<event>\n"
5755         "\t            disable_hist:<system>:<event>\n"
5756 #endif
5757 #ifdef CONFIG_STACKTRACE
5758         "\t\t    stacktrace\n"
5759 #endif
5760 #ifdef CONFIG_TRACER_SNAPSHOT
5761         "\t\t    snapshot\n"
5762 #endif
5763 #ifdef CONFIG_HIST_TRIGGERS
5764         "\t\t    hist (see below)\n"
5765 #endif
5766         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5767         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5768         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5769         "\t                  events/block/block_unplug/trigger\n"
5770         "\t   The first disables tracing every time block_unplug is hit.\n"
5771         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5772         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5773         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5774         "\t   Like function triggers, the counter is only decremented if it\n"
5775         "\t    enabled or disabled tracing.\n"
5776         "\t   To remove a trigger without a count:\n"
5777         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5778         "\t   To remove a trigger with a count:\n"
5779         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5780         "\t   Filters can be ignored when removing a trigger.\n"
5781 #ifdef CONFIG_HIST_TRIGGERS
5782         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5783         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5784         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5785         "\t            [:values=<field1[,field2,...]>]\n"
5786         "\t            [:sort=<field1[,field2,...]>]\n"
5787         "\t            [:size=#entries]\n"
5788         "\t            [:pause][:continue][:clear]\n"
5789         "\t            [:name=histname1]\n"
5790         "\t            [:nohitcount]\n"
5791         "\t            [:<handler>.<action>]\n"
5792         "\t            [if <filter>]\n\n"
5793         "\t    Note, special fields can be used as well:\n"
5794         "\t            common_timestamp - to record current timestamp\n"
5795         "\t            common_cpu - to record the CPU the event happened on\n"
5796         "\n"
5797         "\t    A hist trigger variable can be:\n"
5798         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5799         "\t        - a reference to another variable e.g. y=$x,\n"
5800         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5801         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5802         "\n"
5803         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5804         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5805         "\t    variable reference, field or numeric literal.\n"
5806         "\n"
5807         "\t    When a matching event is hit, an entry is added to a hash\n"
5808         "\t    table using the key(s) and value(s) named, and the value of a\n"
5809         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5810         "\t    correspond to fields in the event's format description.  Keys\n"
5811         "\t    can be any field, or the special string 'common_stacktrace'.\n"
5812         "\t    Compound keys consisting of up to two fields can be specified\n"
5813         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5814         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5815         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5816         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5817         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5818         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5819         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5820         "\t    its histogram data will be shared with other triggers of the\n"
5821         "\t    same name, and trigger hits will update this common data.\n\n"
5822         "\t    Reading the 'hist' file for the event will dump the hash\n"
5823         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5824         "\t    triggers attached to an event, there will be a table for each\n"
5825         "\t    trigger in the output.  The table displayed for a named\n"
5826         "\t    trigger will be the same as any other instance having the\n"
5827         "\t    same name.  The default format used to display a given field\n"
5828         "\t    can be modified by appending any of the following modifiers\n"
5829         "\t    to the field name, as applicable:\n\n"
5830         "\t            .hex        display a number as a hex value\n"
5831         "\t            .sym        display an address as a symbol\n"
5832         "\t            .sym-offset display an address as a symbol and offset\n"
5833         "\t            .execname   display a common_pid as a program name\n"
5834         "\t            .syscall    display a syscall id as a syscall name\n"
5835         "\t            .log2       display log2 value rather than raw number\n"
5836         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5837         "\t            .usecs      display a common_timestamp in microseconds\n"
5838         "\t            .percent    display a number of percentage value\n"
5839         "\t            .graph      display a bar-graph of a value\n\n"
5840         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5841         "\t    trigger or to start a hist trigger but not log any events\n"
5842         "\t    until told to do so.  'continue' can be used to start or\n"
5843         "\t    restart a paused hist trigger.\n\n"
5844         "\t    The 'clear' parameter will clear the contents of a running\n"
5845         "\t    hist trigger and leave its current paused/active state\n"
5846         "\t    unchanged.\n\n"
5847         "\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5848         "\t    raw hitcount in the histogram.\n\n"
5849         "\t    The enable_hist and disable_hist triggers can be used to\n"
5850         "\t    have one event conditionally start and stop another event's\n"
5851         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5852         "\t    the enable_event and disable_event triggers.\n\n"
5853         "\t    Hist trigger handlers and actions are executed whenever a\n"
5854         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5855         "\t        <handler>.<action>\n\n"
5856         "\t    The available handlers are:\n\n"
5857         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5858         "\t        onmax(var)               - invoke if var exceeds current max\n"
5859         "\t        onchange(var)            - invoke action if var changes\n\n"
5860         "\t    The available actions are:\n\n"
5861         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5862         "\t        save(field,...)                      - save current event fields\n"
5863 #ifdef CONFIG_TRACER_SNAPSHOT
5864         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5865 #endif
5866 #ifdef CONFIG_SYNTH_EVENTS
5867         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5868         "\t  Write into this file to define/undefine new synthetic events.\n"
5869         "\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5870 #endif
5871 #endif
5872 ;
5873
5874 static ssize_t
5875 tracing_readme_read(struct file *filp, char __user *ubuf,
5876                        size_t cnt, loff_t *ppos)
5877 {
5878         return simple_read_from_buffer(ubuf, cnt, ppos,
5879                                         readme_msg, strlen(readme_msg));
5880 }
5881
5882 static const struct file_operations tracing_readme_fops = {
5883         .open           = tracing_open_generic,
5884         .read           = tracing_readme_read,
5885         .llseek         = generic_file_llseek,
5886 };
5887
5888 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5889 {
5890         int pid = ++(*pos);
5891
5892         return trace_find_tgid_ptr(pid);
5893 }
5894
5895 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5896 {
5897         int pid = *pos;
5898
5899         return trace_find_tgid_ptr(pid);
5900 }
5901
5902 static void saved_tgids_stop(struct seq_file *m, void *v)
5903 {
5904 }
5905
5906 static int saved_tgids_show(struct seq_file *m, void *v)
5907 {
5908         int *entry = (int *)v;
5909         int pid = entry - tgid_map;
5910         int tgid = *entry;
5911
5912         if (tgid == 0)
5913                 return SEQ_SKIP;
5914
5915         seq_printf(m, "%d %d\n", pid, tgid);
5916         return 0;
5917 }
5918
5919 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5920         .start          = saved_tgids_start,
5921         .stop           = saved_tgids_stop,
5922         .next           = saved_tgids_next,
5923         .show           = saved_tgids_show,
5924 };
5925
5926 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5927 {
5928         int ret;
5929
5930         ret = tracing_check_open_get_tr(NULL);
5931         if (ret)
5932                 return ret;
5933
5934         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5935 }
5936
5937
5938 static const struct file_operations tracing_saved_tgids_fops = {
5939         .open           = tracing_saved_tgids_open,
5940         .read           = seq_read,
5941         .llseek         = seq_lseek,
5942         .release        = seq_release,
5943 };
5944
5945 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5946 {
5947         unsigned int *ptr = v;
5948
5949         if (*pos || m->count)
5950                 ptr++;
5951
5952         (*pos)++;
5953
5954         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5955              ptr++) {
5956                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5957                         continue;
5958
5959                 return ptr;
5960         }
5961
5962         return NULL;
5963 }
5964
5965 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5966 {
5967         void *v;
5968         loff_t l = 0;
5969
5970         preempt_disable();
5971         arch_spin_lock(&trace_cmdline_lock);
5972
5973         v = &savedcmd->map_cmdline_to_pid[0];
5974         while (l <= *pos) {
5975                 v = saved_cmdlines_next(m, v, &l);
5976                 if (!v)
5977                         return NULL;
5978         }
5979
5980         return v;
5981 }
5982
5983 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5984 {
5985         arch_spin_unlock(&trace_cmdline_lock);
5986         preempt_enable();
5987 }
5988
5989 static int saved_cmdlines_show(struct seq_file *m, void *v)
5990 {
5991         char buf[TASK_COMM_LEN];
5992         unsigned int *pid = v;
5993
5994         __trace_find_cmdline(*pid, buf);
5995         seq_printf(m, "%d %s\n", *pid, buf);
5996         return 0;
5997 }
5998
5999 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
6000         .start          = saved_cmdlines_start,
6001         .next           = saved_cmdlines_next,
6002         .stop           = saved_cmdlines_stop,
6003         .show           = saved_cmdlines_show,
6004 };
6005
6006 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
6007 {
6008         int ret;
6009
6010         ret = tracing_check_open_get_tr(NULL);
6011         if (ret)
6012                 return ret;
6013
6014         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
6015 }
6016
6017 static const struct file_operations tracing_saved_cmdlines_fops = {
6018         .open           = tracing_saved_cmdlines_open,
6019         .read           = seq_read,
6020         .llseek         = seq_lseek,
6021         .release        = seq_release,
6022 };
6023
6024 static ssize_t
6025 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
6026                                  size_t cnt, loff_t *ppos)
6027 {
6028         char buf[64];
6029         int r;
6030
6031         preempt_disable();
6032         arch_spin_lock(&trace_cmdline_lock);
6033         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
6034         arch_spin_unlock(&trace_cmdline_lock);
6035         preempt_enable();
6036
6037         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6038 }
6039
6040 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
6041 {
6042         kfree(s->saved_cmdlines);
6043         kfree(s->map_cmdline_to_pid);
6044         kfree(s);
6045 }
6046
6047 static int tracing_resize_saved_cmdlines(unsigned int val)
6048 {
6049         struct saved_cmdlines_buffer *s, *savedcmd_temp;
6050
6051         s = kmalloc(sizeof(*s), GFP_KERNEL);
6052         if (!s)
6053                 return -ENOMEM;
6054
6055         if (allocate_cmdlines_buffer(val, s) < 0) {
6056                 kfree(s);
6057                 return -ENOMEM;
6058         }
6059
6060         preempt_disable();
6061         arch_spin_lock(&trace_cmdline_lock);
6062         savedcmd_temp = savedcmd;
6063         savedcmd = s;
6064         arch_spin_unlock(&trace_cmdline_lock);
6065         preempt_enable();
6066         free_saved_cmdlines_buffer(savedcmd_temp);
6067
6068         return 0;
6069 }
6070
6071 static ssize_t
6072 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6073                                   size_t cnt, loff_t *ppos)
6074 {
6075         unsigned long val;
6076         int ret;
6077
6078         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6079         if (ret)
6080                 return ret;
6081
6082         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
6083         if (!val || val > PID_MAX_DEFAULT)
6084                 return -EINVAL;
6085
6086         ret = tracing_resize_saved_cmdlines((unsigned int)val);
6087         if (ret < 0)
6088                 return ret;
6089
6090         *ppos += cnt;
6091
6092         return cnt;
6093 }
6094
6095 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6096         .open           = tracing_open_generic,
6097         .read           = tracing_saved_cmdlines_size_read,
6098         .write          = tracing_saved_cmdlines_size_write,
6099 };
6100
6101 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6102 static union trace_eval_map_item *
6103 update_eval_map(union trace_eval_map_item *ptr)
6104 {
6105         if (!ptr->map.eval_string) {
6106                 if (ptr->tail.next) {
6107                         ptr = ptr->tail.next;
6108                         /* Set ptr to the next real item (skip head) */
6109                         ptr++;
6110                 } else
6111                         return NULL;
6112         }
6113         return ptr;
6114 }
6115
6116 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6117 {
6118         union trace_eval_map_item *ptr = v;
6119
6120         /*
6121          * Paranoid! If ptr points to end, we don't want to increment past it.
6122          * This really should never happen.
6123          */
6124         (*pos)++;
6125         ptr = update_eval_map(ptr);
6126         if (WARN_ON_ONCE(!ptr))
6127                 return NULL;
6128
6129         ptr++;
6130         ptr = update_eval_map(ptr);
6131
6132         return ptr;
6133 }
6134
6135 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6136 {
6137         union trace_eval_map_item *v;
6138         loff_t l = 0;
6139
6140         mutex_lock(&trace_eval_mutex);
6141
6142         v = trace_eval_maps;
6143         if (v)
6144                 v++;
6145
6146         while (v && l < *pos) {
6147                 v = eval_map_next(m, v, &l);
6148         }
6149
6150         return v;
6151 }
6152
6153 static void eval_map_stop(struct seq_file *m, void *v)
6154 {
6155         mutex_unlock(&trace_eval_mutex);
6156 }
6157
6158 static int eval_map_show(struct seq_file *m, void *v)
6159 {
6160         union trace_eval_map_item *ptr = v;
6161
6162         seq_printf(m, "%s %ld (%s)\n",
6163                    ptr->map.eval_string, ptr->map.eval_value,
6164                    ptr->map.system);
6165
6166         return 0;
6167 }
6168
6169 static const struct seq_operations tracing_eval_map_seq_ops = {
6170         .start          = eval_map_start,
6171         .next           = eval_map_next,
6172         .stop           = eval_map_stop,
6173         .show           = eval_map_show,
6174 };
6175
6176 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6177 {
6178         int ret;
6179
6180         ret = tracing_check_open_get_tr(NULL);
6181         if (ret)
6182                 return ret;
6183
6184         return seq_open(filp, &tracing_eval_map_seq_ops);
6185 }
6186
6187 static const struct file_operations tracing_eval_map_fops = {
6188         .open           = tracing_eval_map_open,
6189         .read           = seq_read,
6190         .llseek         = seq_lseek,
6191         .release        = seq_release,
6192 };
6193
6194 static inline union trace_eval_map_item *
6195 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6196 {
6197         /* Return tail of array given the head */
6198         return ptr + ptr->head.length + 1;
6199 }
6200
6201 static void
6202 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6203                            int len)
6204 {
6205         struct trace_eval_map **stop;
6206         struct trace_eval_map **map;
6207         union trace_eval_map_item *map_array;
6208         union trace_eval_map_item *ptr;
6209
6210         stop = start + len;
6211
6212         /*
6213          * The trace_eval_maps contains the map plus a head and tail item,
6214          * where the head holds the module and length of array, and the
6215          * tail holds a pointer to the next list.
6216          */
6217         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6218         if (!map_array) {
6219                 pr_warn("Unable to allocate trace eval mapping\n");
6220                 return;
6221         }
6222
6223         mutex_lock(&trace_eval_mutex);
6224
6225         if (!trace_eval_maps)
6226                 trace_eval_maps = map_array;
6227         else {
6228                 ptr = trace_eval_maps;
6229                 for (;;) {
6230                         ptr = trace_eval_jmp_to_tail(ptr);
6231                         if (!ptr->tail.next)
6232                                 break;
6233                         ptr = ptr->tail.next;
6234
6235                 }
6236                 ptr->tail.next = map_array;
6237         }
6238         map_array->head.mod = mod;
6239         map_array->head.length = len;
6240         map_array++;
6241
6242         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6243                 map_array->map = **map;
6244                 map_array++;
6245         }
6246         memset(map_array, 0, sizeof(*map_array));
6247
6248         mutex_unlock(&trace_eval_mutex);
6249 }
6250
6251 static void trace_create_eval_file(struct dentry *d_tracer)
6252 {
6253         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6254                           NULL, &tracing_eval_map_fops);
6255 }
6256
6257 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6258 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6259 static inline void trace_insert_eval_map_file(struct module *mod,
6260                               struct trace_eval_map **start, int len) { }
6261 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6262
6263 static void trace_insert_eval_map(struct module *mod,
6264                                   struct trace_eval_map **start, int len)
6265 {
6266         struct trace_eval_map **map;
6267
6268         if (len <= 0)
6269                 return;
6270
6271         map = start;
6272
6273         trace_event_eval_update(map, len);
6274
6275         trace_insert_eval_map_file(mod, start, len);
6276 }
6277
6278 static ssize_t
6279 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6280                        size_t cnt, loff_t *ppos)
6281 {
6282         struct trace_array *tr = filp->private_data;
6283         char buf[MAX_TRACER_SIZE+2];
6284         int r;
6285
6286         mutex_lock(&trace_types_lock);
6287         r = sprintf(buf, "%s\n", tr->current_trace->name);
6288         mutex_unlock(&trace_types_lock);
6289
6290         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6291 }
6292
6293 int tracer_init(struct tracer *t, struct trace_array *tr)
6294 {
6295         tracing_reset_online_cpus(&tr->array_buffer);
6296         return t->init(tr);
6297 }
6298
6299 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6300 {
6301         int cpu;
6302
6303         for_each_tracing_cpu(cpu)
6304                 per_cpu_ptr(buf->data, cpu)->entries = val;
6305 }
6306
6307 #ifdef CONFIG_TRACER_MAX_TRACE
6308 /* resize @tr's buffer to the size of @size_tr's entries */
6309 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6310                                         struct array_buffer *size_buf, int cpu_id)
6311 {
6312         int cpu, ret = 0;
6313
6314         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6315                 for_each_tracing_cpu(cpu) {
6316                         ret = ring_buffer_resize(trace_buf->buffer,
6317                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6318                         if (ret < 0)
6319                                 break;
6320                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6321                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6322                 }
6323         } else {
6324                 ret = ring_buffer_resize(trace_buf->buffer,
6325                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6326                 if (ret == 0)
6327                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6328                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6329         }
6330
6331         return ret;
6332 }
6333 #endif /* CONFIG_TRACER_MAX_TRACE */
6334
6335 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6336                                         unsigned long size, int cpu)
6337 {
6338         int ret;
6339
6340         /*
6341          * If kernel or user changes the size of the ring buffer
6342          * we use the size that was given, and we can forget about
6343          * expanding it later.
6344          */
6345         ring_buffer_expanded = true;
6346
6347         /* May be called before buffers are initialized */
6348         if (!tr->array_buffer.buffer)
6349                 return 0;
6350
6351         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6352         if (ret < 0)
6353                 return ret;
6354
6355 #ifdef CONFIG_TRACER_MAX_TRACE
6356         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6357             !tr->current_trace->use_max_tr)
6358                 goto out;
6359
6360         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6361         if (ret < 0) {
6362                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6363                                                      &tr->array_buffer, cpu);
6364                 if (r < 0) {
6365                         /*
6366                          * AARGH! We are left with different
6367                          * size max buffer!!!!
6368                          * The max buffer is our "snapshot" buffer.
6369                          * When a tracer needs a snapshot (one of the
6370                          * latency tracers), it swaps the max buffer
6371                          * with the saved snap shot. We succeeded to
6372                          * update the size of the main buffer, but failed to
6373                          * update the size of the max buffer. But when we tried
6374                          * to reset the main buffer to the original size, we
6375                          * failed there too. This is very unlikely to
6376                          * happen, but if it does, warn and kill all
6377                          * tracing.
6378                          */
6379                         WARN_ON(1);
6380                         tracing_disabled = 1;
6381                 }
6382                 return ret;
6383         }
6384
6385         if (cpu == RING_BUFFER_ALL_CPUS)
6386                 set_buffer_entries(&tr->max_buffer, size);
6387         else
6388                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6389
6390  out:
6391 #endif /* CONFIG_TRACER_MAX_TRACE */
6392
6393         if (cpu == RING_BUFFER_ALL_CPUS)
6394                 set_buffer_entries(&tr->array_buffer, size);
6395         else
6396                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6397
6398         return ret;
6399 }
6400
6401 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6402                                   unsigned long size, int cpu_id)
6403 {
6404         int ret;
6405
6406         mutex_lock(&trace_types_lock);
6407
6408         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6409                 /* make sure, this cpu is enabled in the mask */
6410                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6411                         ret = -EINVAL;
6412                         goto out;
6413                 }
6414         }
6415
6416         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6417         if (ret < 0)
6418                 ret = -ENOMEM;
6419
6420 out:
6421         mutex_unlock(&trace_types_lock);
6422
6423         return ret;
6424 }
6425
6426
6427 /**
6428  * tracing_update_buffers - used by tracing facility to expand ring buffers
6429  *
6430  * To save on memory when the tracing is never used on a system with it
6431  * configured in. The ring buffers are set to a minimum size. But once
6432  * a user starts to use the tracing facility, then they need to grow
6433  * to their default size.
6434  *
6435  * This function is to be called when a tracer is about to be used.
6436  */
6437 int tracing_update_buffers(void)
6438 {
6439         int ret = 0;
6440
6441         mutex_lock(&trace_types_lock);
6442         if (!ring_buffer_expanded)
6443                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6444                                                 RING_BUFFER_ALL_CPUS);
6445         mutex_unlock(&trace_types_lock);
6446
6447         return ret;
6448 }
6449
6450 struct trace_option_dentry;
6451
6452 static void
6453 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6454
6455 /*
6456  * Used to clear out the tracer before deletion of an instance.
6457  * Must have trace_types_lock held.
6458  */
6459 static void tracing_set_nop(struct trace_array *tr)
6460 {
6461         if (tr->current_trace == &nop_trace)
6462                 return;
6463         
6464         tr->current_trace->enabled--;
6465
6466         if (tr->current_trace->reset)
6467                 tr->current_trace->reset(tr);
6468
6469         tr->current_trace = &nop_trace;
6470 }
6471
6472 static bool tracer_options_updated;
6473
6474 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6475 {
6476         /* Only enable if the directory has been created already. */
6477         if (!tr->dir)
6478                 return;
6479
6480         /* Only create trace option files after update_tracer_options finish */
6481         if (!tracer_options_updated)
6482                 return;
6483
6484         create_trace_option_files(tr, t);
6485 }
6486
6487 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6488 {
6489         struct tracer *t;
6490 #ifdef CONFIG_TRACER_MAX_TRACE
6491         bool had_max_tr;
6492 #endif
6493         int ret = 0;
6494
6495         mutex_lock(&trace_types_lock);
6496
6497         if (!ring_buffer_expanded) {
6498                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6499                                                 RING_BUFFER_ALL_CPUS);
6500                 if (ret < 0)
6501                         goto out;
6502                 ret = 0;
6503         }
6504
6505         for (t = trace_types; t; t = t->next) {
6506                 if (strcmp(t->name, buf) == 0)
6507                         break;
6508         }
6509         if (!t) {
6510                 ret = -EINVAL;
6511                 goto out;
6512         }
6513         if (t == tr->current_trace)
6514                 goto out;
6515
6516 #ifdef CONFIG_TRACER_SNAPSHOT
6517         if (t->use_max_tr) {
6518                 local_irq_disable();
6519                 arch_spin_lock(&tr->max_lock);
6520                 if (tr->cond_snapshot)
6521                         ret = -EBUSY;
6522                 arch_spin_unlock(&tr->max_lock);
6523                 local_irq_enable();
6524                 if (ret)
6525                         goto out;
6526         }
6527 #endif
6528         /* Some tracers won't work on kernel command line */
6529         if (system_state < SYSTEM_RUNNING && t->noboot) {
6530                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6531                         t->name);
6532                 goto out;
6533         }
6534
6535         /* Some tracers are only allowed for the top level buffer */
6536         if (!trace_ok_for_array(t, tr)) {
6537                 ret = -EINVAL;
6538                 goto out;
6539         }
6540
6541         /* If trace pipe files are being read, we can't change the tracer */
6542         if (tr->trace_ref) {
6543                 ret = -EBUSY;
6544                 goto out;
6545         }
6546
6547         trace_branch_disable();
6548
6549         tr->current_trace->enabled--;
6550
6551         if (tr->current_trace->reset)
6552                 tr->current_trace->reset(tr);
6553
6554 #ifdef CONFIG_TRACER_MAX_TRACE
6555         had_max_tr = tr->current_trace->use_max_tr;
6556
6557         /* Current trace needs to be nop_trace before synchronize_rcu */
6558         tr->current_trace = &nop_trace;
6559
6560         if (had_max_tr && !t->use_max_tr) {
6561                 /*
6562                  * We need to make sure that the update_max_tr sees that
6563                  * current_trace changed to nop_trace to keep it from
6564                  * swapping the buffers after we resize it.
6565                  * The update_max_tr is called from interrupts disabled
6566                  * so a synchronized_sched() is sufficient.
6567                  */
6568                 synchronize_rcu();
6569                 free_snapshot(tr);
6570         }
6571
6572         if (t->use_max_tr && !tr->allocated_snapshot) {
6573                 ret = tracing_alloc_snapshot_instance(tr);
6574                 if (ret < 0)
6575                         goto out;
6576         }
6577 #else
6578         tr->current_trace = &nop_trace;
6579 #endif
6580
6581         if (t->init) {
6582                 ret = tracer_init(t, tr);
6583                 if (ret)
6584                         goto out;
6585         }
6586
6587         tr->current_trace = t;
6588         tr->current_trace->enabled++;
6589         trace_branch_enable(tr);
6590  out:
6591         mutex_unlock(&trace_types_lock);
6592
6593         return ret;
6594 }
6595
6596 static ssize_t
6597 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6598                         size_t cnt, loff_t *ppos)
6599 {
6600         struct trace_array *tr = filp->private_data;
6601         char buf[MAX_TRACER_SIZE+1];
6602         char *name;
6603         size_t ret;
6604         int err;
6605
6606         ret = cnt;
6607
6608         if (cnt > MAX_TRACER_SIZE)
6609                 cnt = MAX_TRACER_SIZE;
6610
6611         if (copy_from_user(buf, ubuf, cnt))
6612                 return -EFAULT;
6613
6614         buf[cnt] = 0;
6615
6616         name = strim(buf);
6617
6618         err = tracing_set_tracer(tr, name);
6619         if (err)
6620                 return err;
6621
6622         *ppos += ret;
6623
6624         return ret;
6625 }
6626
6627 static ssize_t
6628 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6629                    size_t cnt, loff_t *ppos)
6630 {
6631         char buf[64];
6632         int r;
6633
6634         r = snprintf(buf, sizeof(buf), "%ld\n",
6635                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6636         if (r > sizeof(buf))
6637                 r = sizeof(buf);
6638         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6639 }
6640
6641 static ssize_t
6642 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6643                     size_t cnt, loff_t *ppos)
6644 {
6645         unsigned long val;
6646         int ret;
6647
6648         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6649         if (ret)
6650                 return ret;
6651
6652         *ptr = val * 1000;
6653
6654         return cnt;
6655 }
6656
6657 static ssize_t
6658 tracing_thresh_read(struct file *filp, char __user *ubuf,
6659                     size_t cnt, loff_t *ppos)
6660 {
6661         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6662 }
6663
6664 static ssize_t
6665 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6666                      size_t cnt, loff_t *ppos)
6667 {
6668         struct trace_array *tr = filp->private_data;
6669         int ret;
6670
6671         mutex_lock(&trace_types_lock);
6672         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6673         if (ret < 0)
6674                 goto out;
6675
6676         if (tr->current_trace->update_thresh) {
6677                 ret = tr->current_trace->update_thresh(tr);
6678                 if (ret < 0)
6679                         goto out;
6680         }
6681
6682         ret = cnt;
6683 out:
6684         mutex_unlock(&trace_types_lock);
6685
6686         return ret;
6687 }
6688
6689 #ifdef CONFIG_TRACER_MAX_TRACE
6690
6691 static ssize_t
6692 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6693                      size_t cnt, loff_t *ppos)
6694 {
6695         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6696 }
6697
6698 static ssize_t
6699 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6700                       size_t cnt, loff_t *ppos)
6701 {
6702         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6703 }
6704
6705 #endif
6706
6707 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6708 {
6709         struct trace_array *tr = inode->i_private;
6710         struct trace_iterator *iter;
6711         int ret;
6712
6713         ret = tracing_check_open_get_tr(tr);
6714         if (ret)
6715                 return ret;
6716
6717         mutex_lock(&trace_types_lock);
6718
6719         /* create a buffer to store the information to pass to userspace */
6720         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6721         if (!iter) {
6722                 ret = -ENOMEM;
6723                 __trace_array_put(tr);
6724                 goto out;
6725         }
6726
6727         trace_seq_init(&iter->seq);
6728         iter->trace = tr->current_trace;
6729
6730         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6731                 ret = -ENOMEM;
6732                 goto fail;
6733         }
6734
6735         /* trace pipe does not show start of buffer */
6736         cpumask_setall(iter->started);
6737
6738         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6739                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6740
6741         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6742         if (trace_clocks[tr->clock_id].in_ns)
6743                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6744
6745         iter->tr = tr;
6746         iter->array_buffer = &tr->array_buffer;
6747         iter->cpu_file = tracing_get_cpu(inode);
6748         mutex_init(&iter->mutex);
6749         filp->private_data = iter;
6750
6751         if (iter->trace->pipe_open)
6752                 iter->trace->pipe_open(iter);
6753
6754         nonseekable_open(inode, filp);
6755
6756         tr->trace_ref++;
6757 out:
6758         mutex_unlock(&trace_types_lock);
6759         return ret;
6760
6761 fail:
6762         kfree(iter);
6763         __trace_array_put(tr);
6764         mutex_unlock(&trace_types_lock);
6765         return ret;
6766 }
6767
6768 static int tracing_release_pipe(struct inode *inode, struct file *file)
6769 {
6770         struct trace_iterator *iter = file->private_data;
6771         struct trace_array *tr = inode->i_private;
6772
6773         mutex_lock(&trace_types_lock);
6774
6775         tr->trace_ref--;
6776
6777         if (iter->trace->pipe_close)
6778                 iter->trace->pipe_close(iter);
6779
6780         mutex_unlock(&trace_types_lock);
6781
6782         free_cpumask_var(iter->started);
6783         kfree(iter->fmt);
6784         kfree(iter->temp);
6785         mutex_destroy(&iter->mutex);
6786         kfree(iter);
6787
6788         trace_array_put(tr);
6789
6790         return 0;
6791 }
6792
6793 static __poll_t
6794 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6795 {
6796         struct trace_array *tr = iter->tr;
6797
6798         /* Iterators are static, they should be filled or empty */
6799         if (trace_buffer_iter(iter, iter->cpu_file))
6800                 return EPOLLIN | EPOLLRDNORM;
6801
6802         if (tr->trace_flags & TRACE_ITER_BLOCK)
6803                 /*
6804                  * Always select as readable when in blocking mode
6805                  */
6806                 return EPOLLIN | EPOLLRDNORM;
6807         else
6808                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6809                                              filp, poll_table, iter->tr->buffer_percent);
6810 }
6811
6812 static __poll_t
6813 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6814 {
6815         struct trace_iterator *iter = filp->private_data;
6816
6817         return trace_poll(iter, filp, poll_table);
6818 }
6819
6820 /* Must be called with iter->mutex held. */
6821 static int tracing_wait_pipe(struct file *filp)
6822 {
6823         struct trace_iterator *iter = filp->private_data;
6824         int ret;
6825
6826         while (trace_empty(iter)) {
6827
6828                 if ((filp->f_flags & O_NONBLOCK)) {
6829                         return -EAGAIN;
6830                 }
6831
6832                 /*
6833                  * We block until we read something and tracing is disabled.
6834                  * We still block if tracing is disabled, but we have never
6835                  * read anything. This allows a user to cat this file, and
6836                  * then enable tracing. But after we have read something,
6837                  * we give an EOF when tracing is again disabled.
6838                  *
6839                  * iter->pos will be 0 if we haven't read anything.
6840                  */
6841                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6842                         break;
6843
6844                 mutex_unlock(&iter->mutex);
6845
6846                 ret = wait_on_pipe(iter, 0);
6847
6848                 mutex_lock(&iter->mutex);
6849
6850                 if (ret)
6851                         return ret;
6852         }
6853
6854         return 1;
6855 }
6856
6857 /*
6858  * Consumer reader.
6859  */
6860 static ssize_t
6861 tracing_read_pipe(struct file *filp, char __user *ubuf,
6862                   size_t cnt, loff_t *ppos)
6863 {
6864         struct trace_iterator *iter = filp->private_data;
6865         ssize_t sret;
6866
6867         /*
6868          * Avoid more than one consumer on a single file descriptor
6869          * This is just a matter of traces coherency, the ring buffer itself
6870          * is protected.
6871          */
6872         mutex_lock(&iter->mutex);
6873
6874         /* return any leftover data */
6875         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6876         if (sret != -EBUSY)
6877                 goto out;
6878
6879         trace_seq_init(&iter->seq);
6880
6881         if (iter->trace->read) {
6882                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6883                 if (sret)
6884                         goto out;
6885         }
6886
6887 waitagain:
6888         sret = tracing_wait_pipe(filp);
6889         if (sret <= 0)
6890                 goto out;
6891
6892         /* stop when tracing is finished */
6893         if (trace_empty(iter)) {
6894                 sret = 0;
6895                 goto out;
6896         }
6897
6898         if (cnt >= PAGE_SIZE)
6899                 cnt = PAGE_SIZE - 1;
6900
6901         /* reset all but tr, trace, and overruns */
6902         trace_iterator_reset(iter);
6903         cpumask_clear(iter->started);
6904         trace_seq_init(&iter->seq);
6905
6906         trace_event_read_lock();
6907         trace_access_lock(iter->cpu_file);
6908         while (trace_find_next_entry_inc(iter) != NULL) {
6909                 enum print_line_t ret;
6910                 int save_len = iter->seq.seq.len;
6911
6912                 ret = print_trace_line(iter);
6913                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6914                         /*
6915                          * If one print_trace_line() fills entire trace_seq in one shot,
6916                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6917                          * In this case, we need to consume it, otherwise, loop will peek
6918                          * this event next time, resulting in an infinite loop.
6919                          */
6920                         if (save_len == 0) {
6921                                 iter->seq.full = 0;
6922                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6923                                 trace_consume(iter);
6924                                 break;
6925                         }
6926
6927                         /* In other cases, don't print partial lines */
6928                         iter->seq.seq.len = save_len;
6929                         break;
6930                 }
6931                 if (ret != TRACE_TYPE_NO_CONSUME)
6932                         trace_consume(iter);
6933
6934                 if (trace_seq_used(&iter->seq) >= cnt)
6935                         break;
6936
6937                 /*
6938                  * Setting the full flag means we reached the trace_seq buffer
6939                  * size and we should leave by partial output condition above.
6940                  * One of the trace_seq_* functions is not used properly.
6941                  */
6942                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6943                           iter->ent->type);
6944         }
6945         trace_access_unlock(iter->cpu_file);
6946         trace_event_read_unlock();
6947
6948         /* Now copy what we have to the user */
6949         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6950         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6951                 trace_seq_init(&iter->seq);
6952
6953         /*
6954          * If there was nothing to send to user, in spite of consuming trace
6955          * entries, go back to wait for more entries.
6956          */
6957         if (sret == -EBUSY)
6958                 goto waitagain;
6959
6960 out:
6961         mutex_unlock(&iter->mutex);
6962
6963         return sret;
6964 }
6965
6966 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6967                                      unsigned int idx)
6968 {
6969         __free_page(spd->pages[idx]);
6970 }
6971
6972 static size_t
6973 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6974 {
6975         size_t count;
6976         int save_len;
6977         int ret;
6978
6979         /* Seq buffer is page-sized, exactly what we need. */
6980         for (;;) {
6981                 save_len = iter->seq.seq.len;
6982                 ret = print_trace_line(iter);
6983
6984                 if (trace_seq_has_overflowed(&iter->seq)) {
6985                         iter->seq.seq.len = save_len;
6986                         break;
6987                 }
6988
6989                 /*
6990                  * This should not be hit, because it should only
6991                  * be set if the iter->seq overflowed. But check it
6992                  * anyway to be safe.
6993                  */
6994                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6995                         iter->seq.seq.len = save_len;
6996                         break;
6997                 }
6998
6999                 count = trace_seq_used(&iter->seq) - save_len;
7000                 if (rem < count) {
7001                         rem = 0;
7002                         iter->seq.seq.len = save_len;
7003                         break;
7004                 }
7005
7006                 if (ret != TRACE_TYPE_NO_CONSUME)
7007                         trace_consume(iter);
7008                 rem -= count;
7009                 if (!trace_find_next_entry_inc(iter))   {
7010                         rem = 0;
7011                         iter->ent = NULL;
7012                         break;
7013                 }
7014         }
7015
7016         return rem;
7017 }
7018
7019 static ssize_t tracing_splice_read_pipe(struct file *filp,
7020                                         loff_t *ppos,
7021                                         struct pipe_inode_info *pipe,
7022                                         size_t len,
7023                                         unsigned int flags)
7024 {
7025         struct page *pages_def[PIPE_DEF_BUFFERS];
7026         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7027         struct trace_iterator *iter = filp->private_data;
7028         struct splice_pipe_desc spd = {
7029                 .pages          = pages_def,
7030                 .partial        = partial_def,
7031                 .nr_pages       = 0, /* This gets updated below. */
7032                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7033                 .ops            = &default_pipe_buf_ops,
7034                 .spd_release    = tracing_spd_release_pipe,
7035         };
7036         ssize_t ret;
7037         size_t rem;
7038         unsigned int i;
7039
7040         if (splice_grow_spd(pipe, &spd))
7041                 return -ENOMEM;
7042
7043         mutex_lock(&iter->mutex);
7044
7045         if (iter->trace->splice_read) {
7046                 ret = iter->trace->splice_read(iter, filp,
7047                                                ppos, pipe, len, flags);
7048                 if (ret)
7049                         goto out_err;
7050         }
7051
7052         ret = tracing_wait_pipe(filp);
7053         if (ret <= 0)
7054                 goto out_err;
7055
7056         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7057                 ret = -EFAULT;
7058                 goto out_err;
7059         }
7060
7061         trace_event_read_lock();
7062         trace_access_lock(iter->cpu_file);
7063
7064         /* Fill as many pages as possible. */
7065         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7066                 spd.pages[i] = alloc_page(GFP_KERNEL);
7067                 if (!spd.pages[i])
7068                         break;
7069
7070                 rem = tracing_fill_pipe_page(rem, iter);
7071
7072                 /* Copy the data into the page, so we can start over. */
7073                 ret = trace_seq_to_buffer(&iter->seq,
7074                                           page_address(spd.pages[i]),
7075                                           trace_seq_used(&iter->seq));
7076                 if (ret < 0) {
7077                         __free_page(spd.pages[i]);
7078                         break;
7079                 }
7080                 spd.partial[i].offset = 0;
7081                 spd.partial[i].len = trace_seq_used(&iter->seq);
7082
7083                 trace_seq_init(&iter->seq);
7084         }
7085
7086         trace_access_unlock(iter->cpu_file);
7087         trace_event_read_unlock();
7088         mutex_unlock(&iter->mutex);
7089
7090         spd.nr_pages = i;
7091
7092         if (i)
7093                 ret = splice_to_pipe(pipe, &spd);
7094         else
7095                 ret = 0;
7096 out:
7097         splice_shrink_spd(&spd);
7098         return ret;
7099
7100 out_err:
7101         mutex_unlock(&iter->mutex);
7102         goto out;
7103 }
7104
7105 static ssize_t
7106 tracing_entries_read(struct file *filp, char __user *ubuf,
7107                      size_t cnt, loff_t *ppos)
7108 {
7109         struct inode *inode = file_inode(filp);
7110         struct trace_array *tr = inode->i_private;
7111         int cpu = tracing_get_cpu(inode);
7112         char buf[64];
7113         int r = 0;
7114         ssize_t ret;
7115
7116         mutex_lock(&trace_types_lock);
7117
7118         if (cpu == RING_BUFFER_ALL_CPUS) {
7119                 int cpu, buf_size_same;
7120                 unsigned long size;
7121
7122                 size = 0;
7123                 buf_size_same = 1;
7124                 /* check if all cpu sizes are same */
7125                 for_each_tracing_cpu(cpu) {
7126                         /* fill in the size from first enabled cpu */
7127                         if (size == 0)
7128                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7129                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7130                                 buf_size_same = 0;
7131                                 break;
7132                         }
7133                 }
7134
7135                 if (buf_size_same) {
7136                         if (!ring_buffer_expanded)
7137                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
7138                                             size >> 10,
7139                                             trace_buf_size >> 10);
7140                         else
7141                                 r = sprintf(buf, "%lu\n", size >> 10);
7142                 } else
7143                         r = sprintf(buf, "X\n");
7144         } else
7145                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7146
7147         mutex_unlock(&trace_types_lock);
7148
7149         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7150         return ret;
7151 }
7152
7153 static ssize_t
7154 tracing_entries_write(struct file *filp, const char __user *ubuf,
7155                       size_t cnt, loff_t *ppos)
7156 {
7157         struct inode *inode = file_inode(filp);
7158         struct trace_array *tr = inode->i_private;
7159         unsigned long val;
7160         int ret;
7161
7162         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7163         if (ret)
7164                 return ret;
7165
7166         /* must have at least 1 entry */
7167         if (!val)
7168                 return -EINVAL;
7169
7170         /* value is in KB */
7171         val <<= 10;
7172         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7173         if (ret < 0)
7174                 return ret;
7175
7176         *ppos += cnt;
7177
7178         return cnt;
7179 }
7180
7181 static ssize_t
7182 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7183                                 size_t cnt, loff_t *ppos)
7184 {
7185         struct trace_array *tr = filp->private_data;
7186         char buf[64];
7187         int r, cpu;
7188         unsigned long size = 0, expanded_size = 0;
7189
7190         mutex_lock(&trace_types_lock);
7191         for_each_tracing_cpu(cpu) {
7192                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7193                 if (!ring_buffer_expanded)
7194                         expanded_size += trace_buf_size >> 10;
7195         }
7196         if (ring_buffer_expanded)
7197                 r = sprintf(buf, "%lu\n", size);
7198         else
7199                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7200         mutex_unlock(&trace_types_lock);
7201
7202         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7203 }
7204
7205 static ssize_t
7206 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7207                           size_t cnt, loff_t *ppos)
7208 {
7209         /*
7210          * There is no need to read what the user has written, this function
7211          * is just to make sure that there is no error when "echo" is used
7212          */
7213
7214         *ppos += cnt;
7215
7216         return cnt;
7217 }
7218
7219 static int
7220 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7221 {
7222         struct trace_array *tr = inode->i_private;
7223
7224         /* disable tracing ? */
7225         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7226                 tracer_tracing_off(tr);
7227         /* resize the ring buffer to 0 */
7228         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7229
7230         trace_array_put(tr);
7231
7232         return 0;
7233 }
7234
7235 static ssize_t
7236 tracing_mark_write(struct file *filp, const char __user *ubuf,
7237                                         size_t cnt, loff_t *fpos)
7238 {
7239         struct trace_array *tr = filp->private_data;
7240         struct ring_buffer_event *event;
7241         enum event_trigger_type tt = ETT_NONE;
7242         struct trace_buffer *buffer;
7243         struct print_entry *entry;
7244         ssize_t written;
7245         int size;
7246         int len;
7247
7248 /* Used in tracing_mark_raw_write() as well */
7249 #define FAULTED_STR "<faulted>"
7250 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7251
7252         if (tracing_disabled)
7253                 return -EINVAL;
7254
7255         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7256                 return -EINVAL;
7257
7258         if (cnt > TRACE_BUF_SIZE)
7259                 cnt = TRACE_BUF_SIZE;
7260
7261         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7262
7263         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7264
7265         /* If less than "<faulted>", then make sure we can still add that */
7266         if (cnt < FAULTED_SIZE)
7267                 size += FAULTED_SIZE - cnt;
7268
7269         buffer = tr->array_buffer.buffer;
7270         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7271                                             tracing_gen_ctx());
7272         if (unlikely(!event))
7273                 /* Ring buffer disabled, return as if not open for write */
7274                 return -EBADF;
7275
7276         entry = ring_buffer_event_data(event);
7277         entry->ip = _THIS_IP_;
7278
7279         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7280         if (len) {
7281                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7282                 cnt = FAULTED_SIZE;
7283                 written = -EFAULT;
7284         } else
7285                 written = cnt;
7286
7287         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7288                 /* do not add \n before testing triggers, but add \0 */
7289                 entry->buf[cnt] = '\0';
7290                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7291         }
7292
7293         if (entry->buf[cnt - 1] != '\n') {
7294                 entry->buf[cnt] = '\n';
7295                 entry->buf[cnt + 1] = '\0';
7296         } else
7297                 entry->buf[cnt] = '\0';
7298
7299         if (static_branch_unlikely(&trace_marker_exports_enabled))
7300                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7301         __buffer_unlock_commit(buffer, event);
7302
7303         if (tt)
7304                 event_triggers_post_call(tr->trace_marker_file, tt);
7305
7306         return written;
7307 }
7308
7309 /* Limit it for now to 3K (including tag) */
7310 #define RAW_DATA_MAX_SIZE (1024*3)
7311
7312 static ssize_t
7313 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7314                                         size_t cnt, loff_t *fpos)
7315 {
7316         struct trace_array *tr = filp->private_data;
7317         struct ring_buffer_event *event;
7318         struct trace_buffer *buffer;
7319         struct raw_data_entry *entry;
7320         ssize_t written;
7321         int size;
7322         int len;
7323
7324 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7325
7326         if (tracing_disabled)
7327                 return -EINVAL;
7328
7329         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7330                 return -EINVAL;
7331
7332         /* The marker must at least have a tag id */
7333         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7334                 return -EINVAL;
7335
7336         if (cnt > TRACE_BUF_SIZE)
7337                 cnt = TRACE_BUF_SIZE;
7338
7339         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7340
7341         size = sizeof(*entry) + cnt;
7342         if (cnt < FAULT_SIZE_ID)
7343                 size += FAULT_SIZE_ID - cnt;
7344
7345         buffer = tr->array_buffer.buffer;
7346         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7347                                             tracing_gen_ctx());
7348         if (!event)
7349                 /* Ring buffer disabled, return as if not open for write */
7350                 return -EBADF;
7351
7352         entry = ring_buffer_event_data(event);
7353
7354         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7355         if (len) {
7356                 entry->id = -1;
7357                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7358                 written = -EFAULT;
7359         } else
7360                 written = cnt;
7361
7362         __buffer_unlock_commit(buffer, event);
7363
7364         return written;
7365 }
7366
7367 static int tracing_clock_show(struct seq_file *m, void *v)
7368 {
7369         struct trace_array *tr = m->private;
7370         int i;
7371
7372         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7373                 seq_printf(m,
7374                         "%s%s%s%s", i ? " " : "",
7375                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7376                         i == tr->clock_id ? "]" : "");
7377         seq_putc(m, '\n');
7378
7379         return 0;
7380 }
7381
7382 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7383 {
7384         int i;
7385
7386         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7387                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7388                         break;
7389         }
7390         if (i == ARRAY_SIZE(trace_clocks))
7391                 return -EINVAL;
7392
7393         mutex_lock(&trace_types_lock);
7394
7395         tr->clock_id = i;
7396
7397         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7398
7399         /*
7400          * New clock may not be consistent with the previous clock.
7401          * Reset the buffer so that it doesn't have incomparable timestamps.
7402          */
7403         tracing_reset_online_cpus(&tr->array_buffer);
7404
7405 #ifdef CONFIG_TRACER_MAX_TRACE
7406         if (tr->max_buffer.buffer)
7407                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7408         tracing_reset_online_cpus(&tr->max_buffer);
7409 #endif
7410
7411         mutex_unlock(&trace_types_lock);
7412
7413         return 0;
7414 }
7415
7416 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7417                                    size_t cnt, loff_t *fpos)
7418 {
7419         struct seq_file *m = filp->private_data;
7420         struct trace_array *tr = m->private;
7421         char buf[64];
7422         const char *clockstr;
7423         int ret;
7424
7425         if (cnt >= sizeof(buf))
7426                 return -EINVAL;
7427
7428         if (copy_from_user(buf, ubuf, cnt))
7429                 return -EFAULT;
7430
7431         buf[cnt] = 0;
7432
7433         clockstr = strstrip(buf);
7434
7435         ret = tracing_set_clock(tr, clockstr);
7436         if (ret)
7437                 return ret;
7438
7439         *fpos += cnt;
7440
7441         return cnt;
7442 }
7443
7444 static int tracing_clock_open(struct inode *inode, struct file *file)
7445 {
7446         struct trace_array *tr = inode->i_private;
7447         int ret;
7448
7449         ret = tracing_check_open_get_tr(tr);
7450         if (ret)
7451                 return ret;
7452
7453         ret = single_open(file, tracing_clock_show, inode->i_private);
7454         if (ret < 0)
7455                 trace_array_put(tr);
7456
7457         return ret;
7458 }
7459
7460 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7461 {
7462         struct trace_array *tr = m->private;
7463
7464         mutex_lock(&trace_types_lock);
7465
7466         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7467                 seq_puts(m, "delta [absolute]\n");
7468         else
7469                 seq_puts(m, "[delta] absolute\n");
7470
7471         mutex_unlock(&trace_types_lock);
7472
7473         return 0;
7474 }
7475
7476 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7477 {
7478         struct trace_array *tr = inode->i_private;
7479         int ret;
7480
7481         ret = tracing_check_open_get_tr(tr);
7482         if (ret)
7483                 return ret;
7484
7485         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7486         if (ret < 0)
7487                 trace_array_put(tr);
7488
7489         return ret;
7490 }
7491
7492 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7493 {
7494         if (rbe == this_cpu_read(trace_buffered_event))
7495                 return ring_buffer_time_stamp(buffer);
7496
7497         return ring_buffer_event_time_stamp(buffer, rbe);
7498 }
7499
7500 /*
7501  * Set or disable using the per CPU trace_buffer_event when possible.
7502  */
7503 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7504 {
7505         int ret = 0;
7506
7507         mutex_lock(&trace_types_lock);
7508
7509         if (set && tr->no_filter_buffering_ref++)
7510                 goto out;
7511
7512         if (!set) {
7513                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7514                         ret = -EINVAL;
7515                         goto out;
7516                 }
7517
7518                 --tr->no_filter_buffering_ref;
7519         }
7520  out:
7521         mutex_unlock(&trace_types_lock);
7522
7523         return ret;
7524 }
7525
7526 struct ftrace_buffer_info {
7527         struct trace_iterator   iter;
7528         void                    *spare;
7529         unsigned int            spare_cpu;
7530         unsigned int            read;
7531 };
7532
7533 #ifdef CONFIG_TRACER_SNAPSHOT
7534 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7535 {
7536         struct trace_array *tr = inode->i_private;
7537         struct trace_iterator *iter;
7538         struct seq_file *m;
7539         int ret;
7540
7541         ret = tracing_check_open_get_tr(tr);
7542         if (ret)
7543                 return ret;
7544
7545         if (file->f_mode & FMODE_READ) {
7546                 iter = __tracing_open(inode, file, true);
7547                 if (IS_ERR(iter))
7548                         ret = PTR_ERR(iter);
7549         } else {
7550                 /* Writes still need the seq_file to hold the private data */
7551                 ret = -ENOMEM;
7552                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7553                 if (!m)
7554                         goto out;
7555                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7556                 if (!iter) {
7557                         kfree(m);
7558                         goto out;
7559                 }
7560                 ret = 0;
7561
7562                 iter->tr = tr;
7563                 iter->array_buffer = &tr->max_buffer;
7564                 iter->cpu_file = tracing_get_cpu(inode);
7565                 m->private = iter;
7566                 file->private_data = m;
7567         }
7568 out:
7569         if (ret < 0)
7570                 trace_array_put(tr);
7571
7572         return ret;
7573 }
7574
7575 static ssize_t
7576 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7577                        loff_t *ppos)
7578 {
7579         struct seq_file *m = filp->private_data;
7580         struct trace_iterator *iter = m->private;
7581         struct trace_array *tr = iter->tr;
7582         unsigned long val;
7583         int ret;
7584
7585         ret = tracing_update_buffers();
7586         if (ret < 0)
7587                 return ret;
7588
7589         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7590         if (ret)
7591                 return ret;
7592
7593         mutex_lock(&trace_types_lock);
7594
7595         if (tr->current_trace->use_max_tr) {
7596                 ret = -EBUSY;
7597                 goto out;
7598         }
7599
7600         local_irq_disable();
7601         arch_spin_lock(&tr->max_lock);
7602         if (tr->cond_snapshot)
7603                 ret = -EBUSY;
7604         arch_spin_unlock(&tr->max_lock);
7605         local_irq_enable();
7606         if (ret)
7607                 goto out;
7608
7609         switch (val) {
7610         case 0:
7611                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7612                         ret = -EINVAL;
7613                         break;
7614                 }
7615                 if (tr->allocated_snapshot)
7616                         free_snapshot(tr);
7617                 break;
7618         case 1:
7619 /* Only allow per-cpu swap if the ring buffer supports it */
7620 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7621                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7622                         ret = -EINVAL;
7623                         break;
7624                 }
7625 #endif
7626                 if (tr->allocated_snapshot)
7627                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7628                                         &tr->array_buffer, iter->cpu_file);
7629                 else
7630                         ret = tracing_alloc_snapshot_instance(tr);
7631                 if (ret < 0)
7632                         break;
7633                 local_irq_disable();
7634                 /* Now, we're going to swap */
7635                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7636                         update_max_tr(tr, current, smp_processor_id(), NULL);
7637                 else
7638                         update_max_tr_single(tr, current, iter->cpu_file);
7639                 local_irq_enable();
7640                 break;
7641         default:
7642                 if (tr->allocated_snapshot) {
7643                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7644                                 tracing_reset_online_cpus(&tr->max_buffer);
7645                         else
7646                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7647                 }
7648                 break;
7649         }
7650
7651         if (ret >= 0) {
7652                 *ppos += cnt;
7653                 ret = cnt;
7654         }
7655 out:
7656         mutex_unlock(&trace_types_lock);
7657         return ret;
7658 }
7659
7660 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7661 {
7662         struct seq_file *m = file->private_data;
7663         int ret;
7664
7665         ret = tracing_release(inode, file);
7666
7667         if (file->f_mode & FMODE_READ)
7668                 return ret;
7669
7670         /* If write only, the seq_file is just a stub */
7671         if (m)
7672                 kfree(m->private);
7673         kfree(m);
7674
7675         return 0;
7676 }
7677
7678 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7679 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7680                                     size_t count, loff_t *ppos);
7681 static int tracing_buffers_release(struct inode *inode, struct file *file);
7682 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7683                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7684
7685 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7686 {
7687         struct ftrace_buffer_info *info;
7688         int ret;
7689
7690         /* The following checks for tracefs lockdown */
7691         ret = tracing_buffers_open(inode, filp);
7692         if (ret < 0)
7693                 return ret;
7694
7695         info = filp->private_data;
7696
7697         if (info->iter.trace->use_max_tr) {
7698                 tracing_buffers_release(inode, filp);
7699                 return -EBUSY;
7700         }
7701
7702         info->iter.snapshot = true;
7703         info->iter.array_buffer = &info->iter.tr->max_buffer;
7704
7705         return ret;
7706 }
7707
7708 #endif /* CONFIG_TRACER_SNAPSHOT */
7709
7710
7711 static const struct file_operations tracing_thresh_fops = {
7712         .open           = tracing_open_generic,
7713         .read           = tracing_thresh_read,
7714         .write          = tracing_thresh_write,
7715         .llseek         = generic_file_llseek,
7716 };
7717
7718 #ifdef CONFIG_TRACER_MAX_TRACE
7719 static const struct file_operations tracing_max_lat_fops = {
7720         .open           = tracing_open_generic,
7721         .read           = tracing_max_lat_read,
7722         .write          = tracing_max_lat_write,
7723         .llseek         = generic_file_llseek,
7724 };
7725 #endif
7726
7727 static const struct file_operations set_tracer_fops = {
7728         .open           = tracing_open_generic,
7729         .read           = tracing_set_trace_read,
7730         .write          = tracing_set_trace_write,
7731         .llseek         = generic_file_llseek,
7732 };
7733
7734 static const struct file_operations tracing_pipe_fops = {
7735         .open           = tracing_open_pipe,
7736         .poll           = tracing_poll_pipe,
7737         .read           = tracing_read_pipe,
7738         .splice_read    = tracing_splice_read_pipe,
7739         .release        = tracing_release_pipe,
7740         .llseek         = no_llseek,
7741 };
7742
7743 static const struct file_operations tracing_entries_fops = {
7744         .open           = tracing_open_generic_tr,
7745         .read           = tracing_entries_read,
7746         .write          = tracing_entries_write,
7747         .llseek         = generic_file_llseek,
7748         .release        = tracing_release_generic_tr,
7749 };
7750
7751 static const struct file_operations tracing_total_entries_fops = {
7752         .open           = tracing_open_generic_tr,
7753         .read           = tracing_total_entries_read,
7754         .llseek         = generic_file_llseek,
7755         .release        = tracing_release_generic_tr,
7756 };
7757
7758 static const struct file_operations tracing_free_buffer_fops = {
7759         .open           = tracing_open_generic_tr,
7760         .write          = tracing_free_buffer_write,
7761         .release        = tracing_free_buffer_release,
7762 };
7763
7764 static const struct file_operations tracing_mark_fops = {
7765         .open           = tracing_mark_open,
7766         .write          = tracing_mark_write,
7767         .release        = tracing_release_generic_tr,
7768 };
7769
7770 static const struct file_operations tracing_mark_raw_fops = {
7771         .open           = tracing_mark_open,
7772         .write          = tracing_mark_raw_write,
7773         .release        = tracing_release_generic_tr,
7774 };
7775
7776 static const struct file_operations trace_clock_fops = {
7777         .open           = tracing_clock_open,
7778         .read           = seq_read,
7779         .llseek         = seq_lseek,
7780         .release        = tracing_single_release_tr,
7781         .write          = tracing_clock_write,
7782 };
7783
7784 static const struct file_operations trace_time_stamp_mode_fops = {
7785         .open           = tracing_time_stamp_mode_open,
7786         .read           = seq_read,
7787         .llseek         = seq_lseek,
7788         .release        = tracing_single_release_tr,
7789 };
7790
7791 #ifdef CONFIG_TRACER_SNAPSHOT
7792 static const struct file_operations snapshot_fops = {
7793         .open           = tracing_snapshot_open,
7794         .read           = seq_read,
7795         .write          = tracing_snapshot_write,
7796         .llseek         = tracing_lseek,
7797         .release        = tracing_snapshot_release,
7798 };
7799
7800 static const struct file_operations snapshot_raw_fops = {
7801         .open           = snapshot_raw_open,
7802         .read           = tracing_buffers_read,
7803         .release        = tracing_buffers_release,
7804         .splice_read    = tracing_buffers_splice_read,
7805         .llseek         = no_llseek,
7806 };
7807
7808 #endif /* CONFIG_TRACER_SNAPSHOT */
7809
7810 /*
7811  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7812  * @filp: The active open file structure
7813  * @ubuf: The userspace provided buffer to read value into
7814  * @cnt: The maximum number of bytes to read
7815  * @ppos: The current "file" position
7816  *
7817  * This function implements the write interface for a struct trace_min_max_param.
7818  * The filp->private_data must point to a trace_min_max_param structure that
7819  * defines where to write the value, the min and the max acceptable values,
7820  * and a lock to protect the write.
7821  */
7822 static ssize_t
7823 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7824 {
7825         struct trace_min_max_param *param = filp->private_data;
7826         u64 val;
7827         int err;
7828
7829         if (!param)
7830                 return -EFAULT;
7831
7832         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7833         if (err)
7834                 return err;
7835
7836         if (param->lock)
7837                 mutex_lock(param->lock);
7838
7839         if (param->min && val < *param->min)
7840                 err = -EINVAL;
7841
7842         if (param->max && val > *param->max)
7843                 err = -EINVAL;
7844
7845         if (!err)
7846                 *param->val = val;
7847
7848         if (param->lock)
7849                 mutex_unlock(param->lock);
7850
7851         if (err)
7852                 return err;
7853
7854         return cnt;
7855 }
7856
7857 /*
7858  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7859  * @filp: The active open file structure
7860  * @ubuf: The userspace provided buffer to read value into
7861  * @cnt: The maximum number of bytes to read
7862  * @ppos: The current "file" position
7863  *
7864  * This function implements the read interface for a struct trace_min_max_param.
7865  * The filp->private_data must point to a trace_min_max_param struct with valid
7866  * data.
7867  */
7868 static ssize_t
7869 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7870 {
7871         struct trace_min_max_param *param = filp->private_data;
7872         char buf[U64_STR_SIZE];
7873         int len;
7874         u64 val;
7875
7876         if (!param)
7877                 return -EFAULT;
7878
7879         val = *param->val;
7880
7881         if (cnt > sizeof(buf))
7882                 cnt = sizeof(buf);
7883
7884         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7885
7886         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7887 }
7888
7889 const struct file_operations trace_min_max_fops = {
7890         .open           = tracing_open_generic,
7891         .read           = trace_min_max_read,
7892         .write          = trace_min_max_write,
7893 };
7894
7895 #define TRACING_LOG_ERRS_MAX    8
7896 #define TRACING_LOG_LOC_MAX     128
7897
7898 #define CMD_PREFIX "  Command: "
7899
7900 struct err_info {
7901         const char      **errs; /* ptr to loc-specific array of err strings */
7902         u8              type;   /* index into errs -> specific err string */
7903         u16             pos;    /* caret position */
7904         u64             ts;
7905 };
7906
7907 struct tracing_log_err {
7908         struct list_head        list;
7909         struct err_info         info;
7910         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7911         char                    *cmd;                     /* what caused err */
7912 };
7913
7914 static DEFINE_MUTEX(tracing_err_log_lock);
7915
7916 static struct tracing_log_err *alloc_tracing_log_err(int len)
7917 {
7918         struct tracing_log_err *err;
7919
7920         err = kzalloc(sizeof(*err), GFP_KERNEL);
7921         if (!err)
7922                 return ERR_PTR(-ENOMEM);
7923
7924         err->cmd = kzalloc(len, GFP_KERNEL);
7925         if (!err->cmd) {
7926                 kfree(err);
7927                 return ERR_PTR(-ENOMEM);
7928         }
7929
7930         return err;
7931 }
7932
7933 static void free_tracing_log_err(struct tracing_log_err *err)
7934 {
7935         kfree(err->cmd);
7936         kfree(err);
7937 }
7938
7939 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7940                                                    int len)
7941 {
7942         struct tracing_log_err *err;
7943         char *cmd;
7944
7945         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7946                 err = alloc_tracing_log_err(len);
7947                 if (PTR_ERR(err) != -ENOMEM)
7948                         tr->n_err_log_entries++;
7949
7950                 return err;
7951         }
7952         cmd = kzalloc(len, GFP_KERNEL);
7953         if (!cmd)
7954                 return ERR_PTR(-ENOMEM);
7955         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7956         kfree(err->cmd);
7957         err->cmd = cmd;
7958         list_del(&err->list);
7959
7960         return err;
7961 }
7962
7963 /**
7964  * err_pos - find the position of a string within a command for error careting
7965  * @cmd: The tracing command that caused the error
7966  * @str: The string to position the caret at within @cmd
7967  *
7968  * Finds the position of the first occurrence of @str within @cmd.  The
7969  * return value can be passed to tracing_log_err() for caret placement
7970  * within @cmd.
7971  *
7972  * Returns the index within @cmd of the first occurrence of @str or 0
7973  * if @str was not found.
7974  */
7975 unsigned int err_pos(char *cmd, const char *str)
7976 {
7977         char *found;
7978
7979         if (WARN_ON(!strlen(cmd)))
7980                 return 0;
7981
7982         found = strstr(cmd, str);
7983         if (found)
7984                 return found - cmd;
7985
7986         return 0;
7987 }
7988
7989 /**
7990  * tracing_log_err - write an error to the tracing error log
7991  * @tr: The associated trace array for the error (NULL for top level array)
7992  * @loc: A string describing where the error occurred
7993  * @cmd: The tracing command that caused the error
7994  * @errs: The array of loc-specific static error strings
7995  * @type: The index into errs[], which produces the specific static err string
7996  * @pos: The position the caret should be placed in the cmd
7997  *
7998  * Writes an error into tracing/error_log of the form:
7999  *
8000  * <loc>: error: <text>
8001  *   Command: <cmd>
8002  *              ^
8003  *
8004  * tracing/error_log is a small log file containing the last
8005  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8006  * unless there has been a tracing error, and the error log can be
8007  * cleared and have its memory freed by writing the empty string in
8008  * truncation mode to it i.e. echo > tracing/error_log.
8009  *
8010  * NOTE: the @errs array along with the @type param are used to
8011  * produce a static error string - this string is not copied and saved
8012  * when the error is logged - only a pointer to it is saved.  See
8013  * existing callers for examples of how static strings are typically
8014  * defined for use with tracing_log_err().
8015  */
8016 void tracing_log_err(struct trace_array *tr,
8017                      const char *loc, const char *cmd,
8018                      const char **errs, u8 type, u16 pos)
8019 {
8020         struct tracing_log_err *err;
8021         int len = 0;
8022
8023         if (!tr)
8024                 tr = &global_trace;
8025
8026         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8027
8028         mutex_lock(&tracing_err_log_lock);
8029         err = get_tracing_log_err(tr, len);
8030         if (PTR_ERR(err) == -ENOMEM) {
8031                 mutex_unlock(&tracing_err_log_lock);
8032                 return;
8033         }
8034
8035         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8036         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8037
8038         err->info.errs = errs;
8039         err->info.type = type;
8040         err->info.pos = pos;
8041         err->info.ts = local_clock();
8042
8043         list_add_tail(&err->list, &tr->err_log);
8044         mutex_unlock(&tracing_err_log_lock);
8045 }
8046
8047 static void clear_tracing_err_log(struct trace_array *tr)
8048 {
8049         struct tracing_log_err *err, *next;
8050
8051         mutex_lock(&tracing_err_log_lock);
8052         list_for_each_entry_safe(err, next, &tr->err_log, list) {
8053                 list_del(&err->list);
8054                 free_tracing_log_err(err);
8055         }
8056
8057         tr->n_err_log_entries = 0;
8058         mutex_unlock(&tracing_err_log_lock);
8059 }
8060
8061 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8062 {
8063         struct trace_array *tr = m->private;
8064
8065         mutex_lock(&tracing_err_log_lock);
8066
8067         return seq_list_start(&tr->err_log, *pos);
8068 }
8069
8070 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8071 {
8072         struct trace_array *tr = m->private;
8073
8074         return seq_list_next(v, &tr->err_log, pos);
8075 }
8076
8077 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8078 {
8079         mutex_unlock(&tracing_err_log_lock);
8080 }
8081
8082 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8083 {
8084         u16 i;
8085
8086         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8087                 seq_putc(m, ' ');
8088         for (i = 0; i < pos; i++)
8089                 seq_putc(m, ' ');
8090         seq_puts(m, "^\n");
8091 }
8092
8093 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8094 {
8095         struct tracing_log_err *err = v;
8096
8097         if (err) {
8098                 const char *err_text = err->info.errs[err->info.type];
8099                 u64 sec = err->info.ts;
8100                 u32 nsec;
8101
8102                 nsec = do_div(sec, NSEC_PER_SEC);
8103                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8104                            err->loc, err_text);
8105                 seq_printf(m, "%s", err->cmd);
8106                 tracing_err_log_show_pos(m, err->info.pos);
8107         }
8108
8109         return 0;
8110 }
8111
8112 static const struct seq_operations tracing_err_log_seq_ops = {
8113         .start  = tracing_err_log_seq_start,
8114         .next   = tracing_err_log_seq_next,
8115         .stop   = tracing_err_log_seq_stop,
8116         .show   = tracing_err_log_seq_show
8117 };
8118
8119 static int tracing_err_log_open(struct inode *inode, struct file *file)
8120 {
8121         struct trace_array *tr = inode->i_private;
8122         int ret = 0;
8123
8124         ret = tracing_check_open_get_tr(tr);
8125         if (ret)
8126                 return ret;
8127
8128         /* If this file was opened for write, then erase contents */
8129         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8130                 clear_tracing_err_log(tr);
8131
8132         if (file->f_mode & FMODE_READ) {
8133                 ret = seq_open(file, &tracing_err_log_seq_ops);
8134                 if (!ret) {
8135                         struct seq_file *m = file->private_data;
8136                         m->private = tr;
8137                 } else {
8138                         trace_array_put(tr);
8139                 }
8140         }
8141         return ret;
8142 }
8143
8144 static ssize_t tracing_err_log_write(struct file *file,
8145                                      const char __user *buffer,
8146                                      size_t count, loff_t *ppos)
8147 {
8148         return count;
8149 }
8150
8151 static int tracing_err_log_release(struct inode *inode, struct file *file)
8152 {
8153         struct trace_array *tr = inode->i_private;
8154
8155         trace_array_put(tr);
8156
8157         if (file->f_mode & FMODE_READ)
8158                 seq_release(inode, file);
8159
8160         return 0;
8161 }
8162
8163 static const struct file_operations tracing_err_log_fops = {
8164         .open           = tracing_err_log_open,
8165         .write          = tracing_err_log_write,
8166         .read           = seq_read,
8167         .llseek         = tracing_lseek,
8168         .release        = tracing_err_log_release,
8169 };
8170
8171 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8172 {
8173         struct trace_array *tr = inode->i_private;
8174         struct ftrace_buffer_info *info;
8175         int ret;
8176
8177         ret = tracing_check_open_get_tr(tr);
8178         if (ret)
8179                 return ret;
8180
8181         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8182         if (!info) {
8183                 trace_array_put(tr);
8184                 return -ENOMEM;
8185         }
8186
8187         mutex_lock(&trace_types_lock);
8188
8189         info->iter.tr           = tr;
8190         info->iter.cpu_file     = tracing_get_cpu(inode);
8191         info->iter.trace        = tr->current_trace;
8192         info->iter.array_buffer = &tr->array_buffer;
8193         info->spare             = NULL;
8194         /* Force reading ring buffer for first read */
8195         info->read              = (unsigned int)-1;
8196
8197         filp->private_data = info;
8198
8199         tr->trace_ref++;
8200
8201         mutex_unlock(&trace_types_lock);
8202
8203         ret = nonseekable_open(inode, filp);
8204         if (ret < 0)
8205                 trace_array_put(tr);
8206
8207         return ret;
8208 }
8209
8210 static __poll_t
8211 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8212 {
8213         struct ftrace_buffer_info *info = filp->private_data;
8214         struct trace_iterator *iter = &info->iter;
8215
8216         return trace_poll(iter, filp, poll_table);
8217 }
8218
8219 static ssize_t
8220 tracing_buffers_read(struct file *filp, char __user *ubuf,
8221                      size_t count, loff_t *ppos)
8222 {
8223         struct ftrace_buffer_info *info = filp->private_data;
8224         struct trace_iterator *iter = &info->iter;
8225         ssize_t ret = 0;
8226         ssize_t size;
8227
8228         if (!count)
8229                 return 0;
8230
8231 #ifdef CONFIG_TRACER_MAX_TRACE
8232         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8233                 return -EBUSY;
8234 #endif
8235
8236         if (!info->spare) {
8237                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8238                                                           iter->cpu_file);
8239                 if (IS_ERR(info->spare)) {
8240                         ret = PTR_ERR(info->spare);
8241                         info->spare = NULL;
8242                 } else {
8243                         info->spare_cpu = iter->cpu_file;
8244                 }
8245         }
8246         if (!info->spare)
8247                 return ret;
8248
8249         /* Do we have previous read data to read? */
8250         if (info->read < PAGE_SIZE)
8251                 goto read;
8252
8253  again:
8254         trace_access_lock(iter->cpu_file);
8255         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8256                                     &info->spare,
8257                                     count,
8258                                     iter->cpu_file, 0);
8259         trace_access_unlock(iter->cpu_file);
8260
8261         if (ret < 0) {
8262                 if (trace_empty(iter)) {
8263                         if ((filp->f_flags & O_NONBLOCK))
8264                                 return -EAGAIN;
8265
8266                         ret = wait_on_pipe(iter, 0);
8267                         if (ret)
8268                                 return ret;
8269
8270                         goto again;
8271                 }
8272                 return 0;
8273         }
8274
8275         info->read = 0;
8276  read:
8277         size = PAGE_SIZE - info->read;
8278         if (size > count)
8279                 size = count;
8280
8281         ret = copy_to_user(ubuf, info->spare + info->read, size);
8282         if (ret == size)
8283                 return -EFAULT;
8284
8285         size -= ret;
8286
8287         *ppos += size;
8288         info->read += size;
8289
8290         return size;
8291 }
8292
8293 static int tracing_buffers_release(struct inode *inode, struct file *file)
8294 {
8295         struct ftrace_buffer_info *info = file->private_data;
8296         struct trace_iterator *iter = &info->iter;
8297
8298         mutex_lock(&trace_types_lock);
8299
8300         iter->tr->trace_ref--;
8301
8302         __trace_array_put(iter->tr);
8303
8304         iter->wait_index++;
8305         /* Make sure the waiters see the new wait_index */
8306         smp_wmb();
8307
8308         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8309
8310         if (info->spare)
8311                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8312                                            info->spare_cpu, info->spare);
8313         kvfree(info);
8314
8315         mutex_unlock(&trace_types_lock);
8316
8317         return 0;
8318 }
8319
8320 struct buffer_ref {
8321         struct trace_buffer     *buffer;
8322         void                    *page;
8323         int                     cpu;
8324         refcount_t              refcount;
8325 };
8326
8327 static void buffer_ref_release(struct buffer_ref *ref)
8328 {
8329         if (!refcount_dec_and_test(&ref->refcount))
8330                 return;
8331         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8332         kfree(ref);
8333 }
8334
8335 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8336                                     struct pipe_buffer *buf)
8337 {
8338         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8339
8340         buffer_ref_release(ref);
8341         buf->private = 0;
8342 }
8343
8344 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8345                                 struct pipe_buffer *buf)
8346 {
8347         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8348
8349         if (refcount_read(&ref->refcount) > INT_MAX/2)
8350                 return false;
8351
8352         refcount_inc(&ref->refcount);
8353         return true;
8354 }
8355
8356 /* Pipe buffer operations for a buffer. */
8357 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8358         .release                = buffer_pipe_buf_release,
8359         .get                    = buffer_pipe_buf_get,
8360 };
8361
8362 /*
8363  * Callback from splice_to_pipe(), if we need to release some pages
8364  * at the end of the spd in case we error'ed out in filling the pipe.
8365  */
8366 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8367 {
8368         struct buffer_ref *ref =
8369                 (struct buffer_ref *)spd->partial[i].private;
8370
8371         buffer_ref_release(ref);
8372         spd->partial[i].private = 0;
8373 }
8374
8375 static ssize_t
8376 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8377                             struct pipe_inode_info *pipe, size_t len,
8378                             unsigned int flags)
8379 {
8380         struct ftrace_buffer_info *info = file->private_data;
8381         struct trace_iterator *iter = &info->iter;
8382         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8383         struct page *pages_def[PIPE_DEF_BUFFERS];
8384         struct splice_pipe_desc spd = {
8385                 .pages          = pages_def,
8386                 .partial        = partial_def,
8387                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8388                 .ops            = &buffer_pipe_buf_ops,
8389                 .spd_release    = buffer_spd_release,
8390         };
8391         struct buffer_ref *ref;
8392         int entries, i;
8393         ssize_t ret = 0;
8394
8395 #ifdef CONFIG_TRACER_MAX_TRACE
8396         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8397                 return -EBUSY;
8398 #endif
8399
8400         if (*ppos & (PAGE_SIZE - 1))
8401                 return -EINVAL;
8402
8403         if (len & (PAGE_SIZE - 1)) {
8404                 if (len < PAGE_SIZE)
8405                         return -EINVAL;
8406                 len &= PAGE_MASK;
8407         }
8408
8409         if (splice_grow_spd(pipe, &spd))
8410                 return -ENOMEM;
8411
8412  again:
8413         trace_access_lock(iter->cpu_file);
8414         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8415
8416         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8417                 struct page *page;
8418                 int r;
8419
8420                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8421                 if (!ref) {
8422                         ret = -ENOMEM;
8423                         break;
8424                 }
8425
8426                 refcount_set(&ref->refcount, 1);
8427                 ref->buffer = iter->array_buffer->buffer;
8428                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8429                 if (IS_ERR(ref->page)) {
8430                         ret = PTR_ERR(ref->page);
8431                         ref->page = NULL;
8432                         kfree(ref);
8433                         break;
8434                 }
8435                 ref->cpu = iter->cpu_file;
8436
8437                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8438                                           len, iter->cpu_file, 1);
8439                 if (r < 0) {
8440                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8441                                                    ref->page);
8442                         kfree(ref);
8443                         break;
8444                 }
8445
8446                 page = virt_to_page(ref->page);
8447
8448                 spd.pages[i] = page;
8449                 spd.partial[i].len = PAGE_SIZE;
8450                 spd.partial[i].offset = 0;
8451                 spd.partial[i].private = (unsigned long)ref;
8452                 spd.nr_pages++;
8453                 *ppos += PAGE_SIZE;
8454
8455                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8456         }
8457
8458         trace_access_unlock(iter->cpu_file);
8459         spd.nr_pages = i;
8460
8461         /* did we read anything? */
8462         if (!spd.nr_pages) {
8463                 long wait_index;
8464
8465                 if (ret)
8466                         goto out;
8467
8468                 ret = -EAGAIN;
8469                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8470                         goto out;
8471
8472                 wait_index = READ_ONCE(iter->wait_index);
8473
8474                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8475                 if (ret)
8476                         goto out;
8477
8478                 /* No need to wait after waking up when tracing is off */
8479                 if (!tracer_tracing_is_on(iter->tr))
8480                         goto out;
8481
8482                 /* Make sure we see the new wait_index */
8483                 smp_rmb();
8484                 if (wait_index != iter->wait_index)
8485                         goto out;
8486
8487                 goto again;
8488         }
8489
8490         ret = splice_to_pipe(pipe, &spd);
8491 out:
8492         splice_shrink_spd(&spd);
8493
8494         return ret;
8495 }
8496
8497 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8498 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8499 {
8500         struct ftrace_buffer_info *info = file->private_data;
8501         struct trace_iterator *iter = &info->iter;
8502
8503         if (cmd)
8504                 return -ENOIOCTLCMD;
8505
8506         mutex_lock(&trace_types_lock);
8507
8508         iter->wait_index++;
8509         /* Make sure the waiters see the new wait_index */
8510         smp_wmb();
8511
8512         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8513
8514         mutex_unlock(&trace_types_lock);
8515         return 0;
8516 }
8517
8518 static const struct file_operations tracing_buffers_fops = {
8519         .open           = tracing_buffers_open,
8520         .read           = tracing_buffers_read,
8521         .poll           = tracing_buffers_poll,
8522         .release        = tracing_buffers_release,
8523         .splice_read    = tracing_buffers_splice_read,
8524         .unlocked_ioctl = tracing_buffers_ioctl,
8525         .llseek         = no_llseek,
8526 };
8527
8528 static ssize_t
8529 tracing_stats_read(struct file *filp, char __user *ubuf,
8530                    size_t count, loff_t *ppos)
8531 {
8532         struct inode *inode = file_inode(filp);
8533         struct trace_array *tr = inode->i_private;
8534         struct array_buffer *trace_buf = &tr->array_buffer;
8535         int cpu = tracing_get_cpu(inode);
8536         struct trace_seq *s;
8537         unsigned long cnt;
8538         unsigned long long t;
8539         unsigned long usec_rem;
8540
8541         s = kmalloc(sizeof(*s), GFP_KERNEL);
8542         if (!s)
8543                 return -ENOMEM;
8544
8545         trace_seq_init(s);
8546
8547         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8548         trace_seq_printf(s, "entries: %ld\n", cnt);
8549
8550         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8551         trace_seq_printf(s, "overrun: %ld\n", cnt);
8552
8553         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8554         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8555
8556         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8557         trace_seq_printf(s, "bytes: %ld\n", cnt);
8558
8559         if (trace_clocks[tr->clock_id].in_ns) {
8560                 /* local or global for trace_clock */
8561                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8562                 usec_rem = do_div(t, USEC_PER_SEC);
8563                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8564                                                                 t, usec_rem);
8565
8566                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8567                 usec_rem = do_div(t, USEC_PER_SEC);
8568                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8569         } else {
8570                 /* counter or tsc mode for trace_clock */
8571                 trace_seq_printf(s, "oldest event ts: %llu\n",
8572                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8573
8574                 trace_seq_printf(s, "now ts: %llu\n",
8575                                 ring_buffer_time_stamp(trace_buf->buffer));
8576         }
8577
8578         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8579         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8580
8581         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8582         trace_seq_printf(s, "read events: %ld\n", cnt);
8583
8584         count = simple_read_from_buffer(ubuf, count, ppos,
8585                                         s->buffer, trace_seq_used(s));
8586
8587         kfree(s);
8588
8589         return count;
8590 }
8591
8592 static const struct file_operations tracing_stats_fops = {
8593         .open           = tracing_open_generic_tr,
8594         .read           = tracing_stats_read,
8595         .llseek         = generic_file_llseek,
8596         .release        = tracing_release_generic_tr,
8597 };
8598
8599 #ifdef CONFIG_DYNAMIC_FTRACE
8600
8601 static ssize_t
8602 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8603                   size_t cnt, loff_t *ppos)
8604 {
8605         ssize_t ret;
8606         char *buf;
8607         int r;
8608
8609         /* 256 should be plenty to hold the amount needed */
8610         buf = kmalloc(256, GFP_KERNEL);
8611         if (!buf)
8612                 return -ENOMEM;
8613
8614         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8615                       ftrace_update_tot_cnt,
8616                       ftrace_number_of_pages,
8617                       ftrace_number_of_groups);
8618
8619         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8620         kfree(buf);
8621         return ret;
8622 }
8623
8624 static const struct file_operations tracing_dyn_info_fops = {
8625         .open           = tracing_open_generic,
8626         .read           = tracing_read_dyn_info,
8627         .llseek         = generic_file_llseek,
8628 };
8629 #endif /* CONFIG_DYNAMIC_FTRACE */
8630
8631 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8632 static void
8633 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8634                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8635                 void *data)
8636 {
8637         tracing_snapshot_instance(tr);
8638 }
8639
8640 static void
8641 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8642                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8643                       void *data)
8644 {
8645         struct ftrace_func_mapper *mapper = data;
8646         long *count = NULL;
8647
8648         if (mapper)
8649                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8650
8651         if (count) {
8652
8653                 if (*count <= 0)
8654                         return;
8655
8656                 (*count)--;
8657         }
8658
8659         tracing_snapshot_instance(tr);
8660 }
8661
8662 static int
8663 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8664                       struct ftrace_probe_ops *ops, void *data)
8665 {
8666         struct ftrace_func_mapper *mapper = data;
8667         long *count = NULL;
8668
8669         seq_printf(m, "%ps:", (void *)ip);
8670
8671         seq_puts(m, "snapshot");
8672
8673         if (mapper)
8674                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8675
8676         if (count)
8677                 seq_printf(m, ":count=%ld\n", *count);
8678         else
8679                 seq_puts(m, ":unlimited\n");
8680
8681         return 0;
8682 }
8683
8684 static int
8685 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8686                      unsigned long ip, void *init_data, void **data)
8687 {
8688         struct ftrace_func_mapper *mapper = *data;
8689
8690         if (!mapper) {
8691                 mapper = allocate_ftrace_func_mapper();
8692                 if (!mapper)
8693                         return -ENOMEM;
8694                 *data = mapper;
8695         }
8696
8697         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8698 }
8699
8700 static void
8701 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8702                      unsigned long ip, void *data)
8703 {
8704         struct ftrace_func_mapper *mapper = data;
8705
8706         if (!ip) {
8707                 if (!mapper)
8708                         return;
8709                 free_ftrace_func_mapper(mapper, NULL);
8710                 return;
8711         }
8712
8713         ftrace_func_mapper_remove_ip(mapper, ip);
8714 }
8715
8716 static struct ftrace_probe_ops snapshot_probe_ops = {
8717         .func                   = ftrace_snapshot,
8718         .print                  = ftrace_snapshot_print,
8719 };
8720
8721 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8722         .func                   = ftrace_count_snapshot,
8723         .print                  = ftrace_snapshot_print,
8724         .init                   = ftrace_snapshot_init,
8725         .free                   = ftrace_snapshot_free,
8726 };
8727
8728 static int
8729 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8730                                char *glob, char *cmd, char *param, int enable)
8731 {
8732         struct ftrace_probe_ops *ops;
8733         void *count = (void *)-1;
8734         char *number;
8735         int ret;
8736
8737         if (!tr)
8738                 return -ENODEV;
8739
8740         /* hash funcs only work with set_ftrace_filter */
8741         if (!enable)
8742                 return -EINVAL;
8743
8744         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8745
8746         if (glob[0] == '!')
8747                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8748
8749         if (!param)
8750                 goto out_reg;
8751
8752         number = strsep(&param, ":");
8753
8754         if (!strlen(number))
8755                 goto out_reg;
8756
8757         /*
8758          * We use the callback data field (which is a pointer)
8759          * as our counter.
8760          */
8761         ret = kstrtoul(number, 0, (unsigned long *)&count);
8762         if (ret)
8763                 return ret;
8764
8765  out_reg:
8766         ret = tracing_alloc_snapshot_instance(tr);
8767         if (ret < 0)
8768                 goto out;
8769
8770         ret = register_ftrace_function_probe(glob, tr, ops, count);
8771
8772  out:
8773         return ret < 0 ? ret : 0;
8774 }
8775
8776 static struct ftrace_func_command ftrace_snapshot_cmd = {
8777         .name                   = "snapshot",
8778         .func                   = ftrace_trace_snapshot_callback,
8779 };
8780
8781 static __init int register_snapshot_cmd(void)
8782 {
8783         return register_ftrace_command(&ftrace_snapshot_cmd);
8784 }
8785 #else
8786 static inline __init int register_snapshot_cmd(void) { return 0; }
8787 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8788
8789 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8790 {
8791         if (WARN_ON(!tr->dir))
8792                 return ERR_PTR(-ENODEV);
8793
8794         /* Top directory uses NULL as the parent */
8795         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8796                 return NULL;
8797
8798         /* All sub buffers have a descriptor */
8799         return tr->dir;
8800 }
8801
8802 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8803 {
8804         struct dentry *d_tracer;
8805
8806         if (tr->percpu_dir)
8807                 return tr->percpu_dir;
8808
8809         d_tracer = tracing_get_dentry(tr);
8810         if (IS_ERR(d_tracer))
8811                 return NULL;
8812
8813         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8814
8815         MEM_FAIL(!tr->percpu_dir,
8816                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8817
8818         return tr->percpu_dir;
8819 }
8820
8821 static struct dentry *
8822 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8823                       void *data, long cpu, const struct file_operations *fops)
8824 {
8825         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8826
8827         if (ret) /* See tracing_get_cpu() */
8828                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8829         return ret;
8830 }
8831
8832 static void
8833 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8834 {
8835         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8836         struct dentry *d_cpu;
8837         char cpu_dir[30]; /* 30 characters should be more than enough */
8838
8839         if (!d_percpu)
8840                 return;
8841
8842         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8843         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8844         if (!d_cpu) {
8845                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8846                 return;
8847         }
8848
8849         /* per cpu trace_pipe */
8850         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8851                                 tr, cpu, &tracing_pipe_fops);
8852
8853         /* per cpu trace */
8854         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8855                                 tr, cpu, &tracing_fops);
8856
8857         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8858                                 tr, cpu, &tracing_buffers_fops);
8859
8860         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8861                                 tr, cpu, &tracing_stats_fops);
8862
8863         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8864                                 tr, cpu, &tracing_entries_fops);
8865
8866 #ifdef CONFIG_TRACER_SNAPSHOT
8867         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8868                                 tr, cpu, &snapshot_fops);
8869
8870         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8871                                 tr, cpu, &snapshot_raw_fops);
8872 #endif
8873 }
8874
8875 #ifdef CONFIG_FTRACE_SELFTEST
8876 /* Let selftest have access to static functions in this file */
8877 #include "trace_selftest.c"
8878 #endif
8879
8880 static ssize_t
8881 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8882                         loff_t *ppos)
8883 {
8884         struct trace_option_dentry *topt = filp->private_data;
8885         char *buf;
8886
8887         if (topt->flags->val & topt->opt->bit)
8888                 buf = "1\n";
8889         else
8890                 buf = "0\n";
8891
8892         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8893 }
8894
8895 static ssize_t
8896 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8897                          loff_t *ppos)
8898 {
8899         struct trace_option_dentry *topt = filp->private_data;
8900         unsigned long val;
8901         int ret;
8902
8903         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8904         if (ret)
8905                 return ret;
8906
8907         if (val != 0 && val != 1)
8908                 return -EINVAL;
8909
8910         if (!!(topt->flags->val & topt->opt->bit) != val) {
8911                 mutex_lock(&trace_types_lock);
8912                 ret = __set_tracer_option(topt->tr, topt->flags,
8913                                           topt->opt, !val);
8914                 mutex_unlock(&trace_types_lock);
8915                 if (ret)
8916                         return ret;
8917         }
8918
8919         *ppos += cnt;
8920
8921         return cnt;
8922 }
8923
8924
8925 static const struct file_operations trace_options_fops = {
8926         .open = tracing_open_generic,
8927         .read = trace_options_read,
8928         .write = trace_options_write,
8929         .llseek = generic_file_llseek,
8930 };
8931
8932 /*
8933  * In order to pass in both the trace_array descriptor as well as the index
8934  * to the flag that the trace option file represents, the trace_array
8935  * has a character array of trace_flags_index[], which holds the index
8936  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8937  * The address of this character array is passed to the flag option file
8938  * read/write callbacks.
8939  *
8940  * In order to extract both the index and the trace_array descriptor,
8941  * get_tr_index() uses the following algorithm.
8942  *
8943  *   idx = *ptr;
8944  *
8945  * As the pointer itself contains the address of the index (remember
8946  * index[1] == 1).
8947  *
8948  * Then to get the trace_array descriptor, by subtracting that index
8949  * from the ptr, we get to the start of the index itself.
8950  *
8951  *   ptr - idx == &index[0]
8952  *
8953  * Then a simple container_of() from that pointer gets us to the
8954  * trace_array descriptor.
8955  */
8956 static void get_tr_index(void *data, struct trace_array **ptr,
8957                          unsigned int *pindex)
8958 {
8959         *pindex = *(unsigned char *)data;
8960
8961         *ptr = container_of(data - *pindex, struct trace_array,
8962                             trace_flags_index);
8963 }
8964
8965 static ssize_t
8966 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8967                         loff_t *ppos)
8968 {
8969         void *tr_index = filp->private_data;
8970         struct trace_array *tr;
8971         unsigned int index;
8972         char *buf;
8973
8974         get_tr_index(tr_index, &tr, &index);
8975
8976         if (tr->trace_flags & (1 << index))
8977                 buf = "1\n";
8978         else
8979                 buf = "0\n";
8980
8981         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8982 }
8983
8984 static ssize_t
8985 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8986                          loff_t *ppos)
8987 {
8988         void *tr_index = filp->private_data;
8989         struct trace_array *tr;
8990         unsigned int index;
8991         unsigned long val;
8992         int ret;
8993
8994         get_tr_index(tr_index, &tr, &index);
8995
8996         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8997         if (ret)
8998                 return ret;
8999
9000         if (val != 0 && val != 1)
9001                 return -EINVAL;
9002
9003         mutex_lock(&event_mutex);
9004         mutex_lock(&trace_types_lock);
9005         ret = set_tracer_flag(tr, 1 << index, val);
9006         mutex_unlock(&trace_types_lock);
9007         mutex_unlock(&event_mutex);
9008
9009         if (ret < 0)
9010                 return ret;
9011
9012         *ppos += cnt;
9013
9014         return cnt;
9015 }
9016
9017 static const struct file_operations trace_options_core_fops = {
9018         .open = tracing_open_generic,
9019         .read = trace_options_core_read,
9020         .write = trace_options_core_write,
9021         .llseek = generic_file_llseek,
9022 };
9023
9024 struct dentry *trace_create_file(const char *name,
9025                                  umode_t mode,
9026                                  struct dentry *parent,
9027                                  void *data,
9028                                  const struct file_operations *fops)
9029 {
9030         struct dentry *ret;
9031
9032         ret = tracefs_create_file(name, mode, parent, data, fops);
9033         if (!ret)
9034                 pr_warn("Could not create tracefs '%s' entry\n", name);
9035
9036         return ret;
9037 }
9038
9039
9040 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9041 {
9042         struct dentry *d_tracer;
9043
9044         if (tr->options)
9045                 return tr->options;
9046
9047         d_tracer = tracing_get_dentry(tr);
9048         if (IS_ERR(d_tracer))
9049                 return NULL;
9050
9051         tr->options = tracefs_create_dir("options", d_tracer);
9052         if (!tr->options) {
9053                 pr_warn("Could not create tracefs directory 'options'\n");
9054                 return NULL;
9055         }
9056
9057         return tr->options;
9058 }
9059
9060 static void
9061 create_trace_option_file(struct trace_array *tr,
9062                          struct trace_option_dentry *topt,
9063                          struct tracer_flags *flags,
9064                          struct tracer_opt *opt)
9065 {
9066         struct dentry *t_options;
9067
9068         t_options = trace_options_init_dentry(tr);
9069         if (!t_options)
9070                 return;
9071
9072         topt->flags = flags;
9073         topt->opt = opt;
9074         topt->tr = tr;
9075
9076         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9077                                         t_options, topt, &trace_options_fops);
9078
9079 }
9080
9081 static void
9082 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9083 {
9084         struct trace_option_dentry *topts;
9085         struct trace_options *tr_topts;
9086         struct tracer_flags *flags;
9087         struct tracer_opt *opts;
9088         int cnt;
9089         int i;
9090
9091         if (!tracer)
9092                 return;
9093
9094         flags = tracer->flags;
9095
9096         if (!flags || !flags->opts)
9097                 return;
9098
9099         /*
9100          * If this is an instance, only create flags for tracers
9101          * the instance may have.
9102          */
9103         if (!trace_ok_for_array(tracer, tr))
9104                 return;
9105
9106         for (i = 0; i < tr->nr_topts; i++) {
9107                 /* Make sure there's no duplicate flags. */
9108                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9109                         return;
9110         }
9111
9112         opts = flags->opts;
9113
9114         for (cnt = 0; opts[cnt].name; cnt++)
9115                 ;
9116
9117         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9118         if (!topts)
9119                 return;
9120
9121         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9122                             GFP_KERNEL);
9123         if (!tr_topts) {
9124                 kfree(topts);
9125                 return;
9126         }
9127
9128         tr->topts = tr_topts;
9129         tr->topts[tr->nr_topts].tracer = tracer;
9130         tr->topts[tr->nr_topts].topts = topts;
9131         tr->nr_topts++;
9132
9133         for (cnt = 0; opts[cnt].name; cnt++) {
9134                 create_trace_option_file(tr, &topts[cnt], flags,
9135                                          &opts[cnt]);
9136                 MEM_FAIL(topts[cnt].entry == NULL,
9137                           "Failed to create trace option: %s",
9138                           opts[cnt].name);
9139         }
9140 }
9141
9142 static struct dentry *
9143 create_trace_option_core_file(struct trace_array *tr,
9144                               const char *option, long index)
9145 {
9146         struct dentry *t_options;
9147
9148         t_options = trace_options_init_dentry(tr);
9149         if (!t_options)
9150                 return NULL;
9151
9152         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9153                                  (void *)&tr->trace_flags_index[index],
9154                                  &trace_options_core_fops);
9155 }
9156
9157 static void create_trace_options_dir(struct trace_array *tr)
9158 {
9159         struct dentry *t_options;
9160         bool top_level = tr == &global_trace;
9161         int i;
9162
9163         t_options = trace_options_init_dentry(tr);
9164         if (!t_options)
9165                 return;
9166
9167         for (i = 0; trace_options[i]; i++) {
9168                 if (top_level ||
9169                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9170                         create_trace_option_core_file(tr, trace_options[i], i);
9171         }
9172 }
9173
9174 static ssize_t
9175 rb_simple_read(struct file *filp, char __user *ubuf,
9176                size_t cnt, loff_t *ppos)
9177 {
9178         struct trace_array *tr = filp->private_data;
9179         char buf[64];
9180         int r;
9181
9182         r = tracer_tracing_is_on(tr);
9183         r = sprintf(buf, "%d\n", r);
9184
9185         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9186 }
9187
9188 static ssize_t
9189 rb_simple_write(struct file *filp, const char __user *ubuf,
9190                 size_t cnt, loff_t *ppos)
9191 {
9192         struct trace_array *tr = filp->private_data;
9193         struct trace_buffer *buffer = tr->array_buffer.buffer;
9194         unsigned long val;
9195         int ret;
9196
9197         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9198         if (ret)
9199                 return ret;
9200
9201         if (buffer) {
9202                 mutex_lock(&trace_types_lock);
9203                 if (!!val == tracer_tracing_is_on(tr)) {
9204                         val = 0; /* do nothing */
9205                 } else if (val) {
9206                         tracer_tracing_on(tr);
9207                         if (tr->current_trace->start)
9208                                 tr->current_trace->start(tr);
9209                 } else {
9210                         tracer_tracing_off(tr);
9211                         if (tr->current_trace->stop)
9212                                 tr->current_trace->stop(tr);
9213                         /* Wake up any waiters */
9214                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9215                 }
9216                 mutex_unlock(&trace_types_lock);
9217         }
9218
9219         (*ppos)++;
9220
9221         return cnt;
9222 }
9223
9224 static const struct file_operations rb_simple_fops = {
9225         .open           = tracing_open_generic_tr,
9226         .read           = rb_simple_read,
9227         .write          = rb_simple_write,
9228         .release        = tracing_release_generic_tr,
9229         .llseek         = default_llseek,
9230 };
9231
9232 static ssize_t
9233 buffer_percent_read(struct file *filp, char __user *ubuf,
9234                     size_t cnt, loff_t *ppos)
9235 {
9236         struct trace_array *tr = filp->private_data;
9237         char buf[64];
9238         int r;
9239
9240         r = tr->buffer_percent;
9241         r = sprintf(buf, "%d\n", r);
9242
9243         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9244 }
9245
9246 static ssize_t
9247 buffer_percent_write(struct file *filp, const char __user *ubuf,
9248                      size_t cnt, loff_t *ppos)
9249 {
9250         struct trace_array *tr = filp->private_data;
9251         unsigned long val;
9252         int ret;
9253
9254         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9255         if (ret)
9256                 return ret;
9257
9258         if (val > 100)
9259                 return -EINVAL;
9260
9261         tr->buffer_percent = val;
9262
9263         (*ppos)++;
9264
9265         return cnt;
9266 }
9267
9268 static const struct file_operations buffer_percent_fops = {
9269         .open           = tracing_open_generic_tr,
9270         .read           = buffer_percent_read,
9271         .write          = buffer_percent_write,
9272         .release        = tracing_release_generic_tr,
9273         .llseek         = default_llseek,
9274 };
9275
9276 static struct dentry *trace_instance_dir;
9277
9278 static void
9279 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9280
9281 static int
9282 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9283 {
9284         enum ring_buffer_flags rb_flags;
9285
9286         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9287
9288         buf->tr = tr;
9289
9290         buf->buffer = ring_buffer_alloc(size, rb_flags);
9291         if (!buf->buffer)
9292                 return -ENOMEM;
9293
9294         buf->data = alloc_percpu(struct trace_array_cpu);
9295         if (!buf->data) {
9296                 ring_buffer_free(buf->buffer);
9297                 buf->buffer = NULL;
9298                 return -ENOMEM;
9299         }
9300
9301         /* Allocate the first page for all buffers */
9302         set_buffer_entries(&tr->array_buffer,
9303                            ring_buffer_size(tr->array_buffer.buffer, 0));
9304
9305         return 0;
9306 }
9307
9308 static void free_trace_buffer(struct array_buffer *buf)
9309 {
9310         if (buf->buffer) {
9311                 ring_buffer_free(buf->buffer);
9312                 buf->buffer = NULL;
9313                 free_percpu(buf->data);
9314                 buf->data = NULL;
9315         }
9316 }
9317
9318 static int allocate_trace_buffers(struct trace_array *tr, int size)
9319 {
9320         int ret;
9321
9322         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9323         if (ret)
9324                 return ret;
9325
9326 #ifdef CONFIG_TRACER_MAX_TRACE
9327         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9328                                     allocate_snapshot ? size : 1);
9329         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9330                 free_trace_buffer(&tr->array_buffer);
9331                 return -ENOMEM;
9332         }
9333         tr->allocated_snapshot = allocate_snapshot;
9334
9335         allocate_snapshot = false;
9336 #endif
9337
9338         return 0;
9339 }
9340
9341 static void free_trace_buffers(struct trace_array *tr)
9342 {
9343         if (!tr)
9344                 return;
9345
9346         free_trace_buffer(&tr->array_buffer);
9347
9348 #ifdef CONFIG_TRACER_MAX_TRACE
9349         free_trace_buffer(&tr->max_buffer);
9350 #endif
9351 }
9352
9353 static void init_trace_flags_index(struct trace_array *tr)
9354 {
9355         int i;
9356
9357         /* Used by the trace options files */
9358         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9359                 tr->trace_flags_index[i] = i;
9360 }
9361
9362 static void __update_tracer_options(struct trace_array *tr)
9363 {
9364         struct tracer *t;
9365
9366         for (t = trace_types; t; t = t->next)
9367                 add_tracer_options(tr, t);
9368 }
9369
9370 static void update_tracer_options(struct trace_array *tr)
9371 {
9372         mutex_lock(&trace_types_lock);
9373         tracer_options_updated = true;
9374         __update_tracer_options(tr);
9375         mutex_unlock(&trace_types_lock);
9376 }
9377
9378 /* Must have trace_types_lock held */
9379 struct trace_array *trace_array_find(const char *instance)
9380 {
9381         struct trace_array *tr, *found = NULL;
9382
9383         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9384                 if (tr->name && strcmp(tr->name, instance) == 0) {
9385                         found = tr;
9386                         break;
9387                 }
9388         }
9389
9390         return found;
9391 }
9392
9393 struct trace_array *trace_array_find_get(const char *instance)
9394 {
9395         struct trace_array *tr;
9396
9397         mutex_lock(&trace_types_lock);
9398         tr = trace_array_find(instance);
9399         if (tr)
9400                 tr->ref++;
9401         mutex_unlock(&trace_types_lock);
9402
9403         return tr;
9404 }
9405
9406 static int trace_array_create_dir(struct trace_array *tr)
9407 {
9408         int ret;
9409
9410         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9411         if (!tr->dir)
9412                 return -EINVAL;
9413
9414         ret = event_trace_add_tracer(tr->dir, tr);
9415         if (ret) {
9416                 tracefs_remove(tr->dir);
9417                 return ret;
9418         }
9419
9420         init_tracer_tracefs(tr, tr->dir);
9421         __update_tracer_options(tr);
9422
9423         return ret;
9424 }
9425
9426 static struct trace_array *trace_array_create(const char *name)
9427 {
9428         struct trace_array *tr;
9429         int ret;
9430
9431         ret = -ENOMEM;
9432         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9433         if (!tr)
9434                 return ERR_PTR(ret);
9435
9436         tr->name = kstrdup(name, GFP_KERNEL);
9437         if (!tr->name)
9438                 goto out_free_tr;
9439
9440         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9441                 goto out_free_tr;
9442
9443         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9444
9445         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9446
9447         raw_spin_lock_init(&tr->start_lock);
9448
9449         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9450
9451         tr->current_trace = &nop_trace;
9452
9453         INIT_LIST_HEAD(&tr->systems);
9454         INIT_LIST_HEAD(&tr->events);
9455         INIT_LIST_HEAD(&tr->hist_vars);
9456         INIT_LIST_HEAD(&tr->err_log);
9457
9458         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9459                 goto out_free_tr;
9460
9461         if (ftrace_allocate_ftrace_ops(tr) < 0)
9462                 goto out_free_tr;
9463
9464         ftrace_init_trace_array(tr);
9465
9466         init_trace_flags_index(tr);
9467
9468         if (trace_instance_dir) {
9469                 ret = trace_array_create_dir(tr);
9470                 if (ret)
9471                         goto out_free_tr;
9472         } else
9473                 __trace_early_add_events(tr);
9474
9475         list_add(&tr->list, &ftrace_trace_arrays);
9476
9477         tr->ref++;
9478
9479         return tr;
9480
9481  out_free_tr:
9482         ftrace_free_ftrace_ops(tr);
9483         free_trace_buffers(tr);
9484         free_cpumask_var(tr->tracing_cpumask);
9485         kfree(tr->name);
9486         kfree(tr);
9487
9488         return ERR_PTR(ret);
9489 }
9490
9491 static int instance_mkdir(const char *name)
9492 {
9493         struct trace_array *tr;
9494         int ret;
9495
9496         mutex_lock(&event_mutex);
9497         mutex_lock(&trace_types_lock);
9498
9499         ret = -EEXIST;
9500         if (trace_array_find(name))
9501                 goto out_unlock;
9502
9503         tr = trace_array_create(name);
9504
9505         ret = PTR_ERR_OR_ZERO(tr);
9506
9507 out_unlock:
9508         mutex_unlock(&trace_types_lock);
9509         mutex_unlock(&event_mutex);
9510         return ret;
9511 }
9512
9513 /**
9514  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9515  * @name: The name of the trace array to be looked up/created.
9516  *
9517  * Returns pointer to trace array with given name.
9518  * NULL, if it cannot be created.
9519  *
9520  * NOTE: This function increments the reference counter associated with the
9521  * trace array returned. This makes sure it cannot be freed while in use.
9522  * Use trace_array_put() once the trace array is no longer needed.
9523  * If the trace_array is to be freed, trace_array_destroy() needs to
9524  * be called after the trace_array_put(), or simply let user space delete
9525  * it from the tracefs instances directory. But until the
9526  * trace_array_put() is called, user space can not delete it.
9527  *
9528  */
9529 struct trace_array *trace_array_get_by_name(const char *name)
9530 {
9531         struct trace_array *tr;
9532
9533         mutex_lock(&event_mutex);
9534         mutex_lock(&trace_types_lock);
9535
9536         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9537                 if (tr->name && strcmp(tr->name, name) == 0)
9538                         goto out_unlock;
9539         }
9540
9541         tr = trace_array_create(name);
9542
9543         if (IS_ERR(tr))
9544                 tr = NULL;
9545 out_unlock:
9546         if (tr)
9547                 tr->ref++;
9548
9549         mutex_unlock(&trace_types_lock);
9550         mutex_unlock(&event_mutex);
9551         return tr;
9552 }
9553 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9554
9555 static int __remove_instance(struct trace_array *tr)
9556 {
9557         int i;
9558
9559         /* Reference counter for a newly created trace array = 1. */
9560         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9561                 return -EBUSY;
9562
9563         list_del(&tr->list);
9564
9565         /* Disable all the flags that were enabled coming in */
9566         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9567                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9568                         set_tracer_flag(tr, 1 << i, 0);
9569         }
9570
9571         tracing_set_nop(tr);
9572         clear_ftrace_function_probes(tr);
9573         event_trace_del_tracer(tr);
9574         ftrace_clear_pids(tr);
9575         ftrace_destroy_function_files(tr);
9576         tracefs_remove(tr->dir);
9577         free_percpu(tr->last_func_repeats);
9578         free_trace_buffers(tr);
9579         clear_tracing_err_log(tr);
9580
9581         for (i = 0; i < tr->nr_topts; i++) {
9582                 kfree(tr->topts[i].topts);
9583         }
9584         kfree(tr->topts);
9585
9586         free_cpumask_var(tr->tracing_cpumask);
9587         kfree(tr->name);
9588         kfree(tr);
9589
9590         return 0;
9591 }
9592
9593 int trace_array_destroy(struct trace_array *this_tr)
9594 {
9595         struct trace_array *tr;
9596         int ret;
9597
9598         if (!this_tr)
9599                 return -EINVAL;
9600
9601         mutex_lock(&event_mutex);
9602         mutex_lock(&trace_types_lock);
9603
9604         ret = -ENODEV;
9605
9606         /* Making sure trace array exists before destroying it. */
9607         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9608                 if (tr == this_tr) {
9609                         ret = __remove_instance(tr);
9610                         break;
9611                 }
9612         }
9613
9614         mutex_unlock(&trace_types_lock);
9615         mutex_unlock(&event_mutex);
9616
9617         return ret;
9618 }
9619 EXPORT_SYMBOL_GPL(trace_array_destroy);
9620
9621 static int instance_rmdir(const char *name)
9622 {
9623         struct trace_array *tr;
9624         int ret;
9625
9626         mutex_lock(&event_mutex);
9627         mutex_lock(&trace_types_lock);
9628
9629         ret = -ENODEV;
9630         tr = trace_array_find(name);
9631         if (tr)
9632                 ret = __remove_instance(tr);
9633
9634         mutex_unlock(&trace_types_lock);
9635         mutex_unlock(&event_mutex);
9636
9637         return ret;
9638 }
9639
9640 static __init void create_trace_instances(struct dentry *d_tracer)
9641 {
9642         struct trace_array *tr;
9643
9644         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9645                                                          instance_mkdir,
9646                                                          instance_rmdir);
9647         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9648                 return;
9649
9650         mutex_lock(&event_mutex);
9651         mutex_lock(&trace_types_lock);
9652
9653         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9654                 if (!tr->name)
9655                         continue;
9656                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9657                              "Failed to create instance directory\n"))
9658                         break;
9659         }
9660
9661         mutex_unlock(&trace_types_lock);
9662         mutex_unlock(&event_mutex);
9663 }
9664
9665 static void
9666 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9667 {
9668         struct trace_event_file *file;
9669         int cpu;
9670
9671         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9672                         tr, &show_traces_fops);
9673
9674         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9675                         tr, &set_tracer_fops);
9676
9677         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9678                           tr, &tracing_cpumask_fops);
9679
9680         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9681                           tr, &tracing_iter_fops);
9682
9683         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9684                           tr, &tracing_fops);
9685
9686         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9687                           tr, &tracing_pipe_fops);
9688
9689         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9690                           tr, &tracing_entries_fops);
9691
9692         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9693                           tr, &tracing_total_entries_fops);
9694
9695         trace_create_file("free_buffer", 0200, d_tracer,
9696                           tr, &tracing_free_buffer_fops);
9697
9698         trace_create_file("trace_marker", 0220, d_tracer,
9699                           tr, &tracing_mark_fops);
9700
9701         file = __find_event_file(tr, "ftrace", "print");
9702         if (file && file->dir)
9703                 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9704                                   file, &event_trigger_fops);
9705         tr->trace_marker_file = file;
9706
9707         trace_create_file("trace_marker_raw", 0220, d_tracer,
9708                           tr, &tracing_mark_raw_fops);
9709
9710         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9711                           &trace_clock_fops);
9712
9713         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9714                           tr, &rb_simple_fops);
9715
9716         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9717                           &trace_time_stamp_mode_fops);
9718
9719         tr->buffer_percent = 50;
9720
9721         trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9722                         tr, &buffer_percent_fops);
9723
9724         create_trace_options_dir(tr);
9725
9726 #ifdef CONFIG_TRACER_MAX_TRACE
9727         trace_create_maxlat_file(tr, d_tracer);
9728 #endif
9729
9730         if (ftrace_create_function_files(tr, d_tracer))
9731                 MEM_FAIL(1, "Could not allocate function filter files");
9732
9733 #ifdef CONFIG_TRACER_SNAPSHOT
9734         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9735                           tr, &snapshot_fops);
9736 #endif
9737
9738         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9739                           tr, &tracing_err_log_fops);
9740
9741         for_each_tracing_cpu(cpu)
9742                 tracing_init_tracefs_percpu(tr, cpu);
9743
9744         ftrace_init_tracefs(tr, d_tracer);
9745 }
9746
9747 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9748 {
9749         struct vfsmount *mnt;
9750         struct file_system_type *type;
9751
9752         /*
9753          * To maintain backward compatibility for tools that mount
9754          * debugfs to get to the tracing facility, tracefs is automatically
9755          * mounted to the debugfs/tracing directory.
9756          */
9757         type = get_fs_type("tracefs");
9758         if (!type)
9759                 return NULL;
9760         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9761         put_filesystem(type);
9762         if (IS_ERR(mnt))
9763                 return NULL;
9764         mntget(mnt);
9765
9766         return mnt;
9767 }
9768
9769 /**
9770  * tracing_init_dentry - initialize top level trace array
9771  *
9772  * This is called when creating files or directories in the tracing
9773  * directory. It is called via fs_initcall() by any of the boot up code
9774  * and expects to return the dentry of the top level tracing directory.
9775  */
9776 int tracing_init_dentry(void)
9777 {
9778         struct trace_array *tr = &global_trace;
9779
9780         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9781                 pr_warn("Tracing disabled due to lockdown\n");
9782                 return -EPERM;
9783         }
9784
9785         /* The top level trace array uses  NULL as parent */
9786         if (tr->dir)
9787                 return 0;
9788
9789         if (WARN_ON(!tracefs_initialized()))
9790                 return -ENODEV;
9791
9792         /*
9793          * As there may still be users that expect the tracing
9794          * files to exist in debugfs/tracing, we must automount
9795          * the tracefs file system there, so older tools still
9796          * work with the newer kernel.
9797          */
9798         tr->dir = debugfs_create_automount("tracing", NULL,
9799                                            trace_automount, NULL);
9800
9801         return 0;
9802 }
9803
9804 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9805 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9806
9807 static struct workqueue_struct *eval_map_wq __initdata;
9808 static struct work_struct eval_map_work __initdata;
9809 static struct work_struct tracerfs_init_work __initdata;
9810
9811 static void __init eval_map_work_func(struct work_struct *work)
9812 {
9813         int len;
9814
9815         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9816         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9817 }
9818
9819 static int __init trace_eval_init(void)
9820 {
9821         INIT_WORK(&eval_map_work, eval_map_work_func);
9822
9823         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9824         if (!eval_map_wq) {
9825                 pr_err("Unable to allocate eval_map_wq\n");
9826                 /* Do work here */
9827                 eval_map_work_func(&eval_map_work);
9828                 return -ENOMEM;
9829         }
9830
9831         queue_work(eval_map_wq, &eval_map_work);
9832         return 0;
9833 }
9834
9835 subsys_initcall(trace_eval_init);
9836
9837 static int __init trace_eval_sync(void)
9838 {
9839         /* Make sure the eval map updates are finished */
9840         if (eval_map_wq)
9841                 destroy_workqueue(eval_map_wq);
9842         return 0;
9843 }
9844
9845 late_initcall_sync(trace_eval_sync);
9846
9847
9848 #ifdef CONFIG_MODULES
9849 static void trace_module_add_evals(struct module *mod)
9850 {
9851         if (!mod->num_trace_evals)
9852                 return;
9853
9854         /*
9855          * Modules with bad taint do not have events created, do
9856          * not bother with enums either.
9857          */
9858         if (trace_module_has_bad_taint(mod))
9859                 return;
9860
9861         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9862 }
9863
9864 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9865 static void trace_module_remove_evals(struct module *mod)
9866 {
9867         union trace_eval_map_item *map;
9868         union trace_eval_map_item **last = &trace_eval_maps;
9869
9870         if (!mod->num_trace_evals)
9871                 return;
9872
9873         mutex_lock(&trace_eval_mutex);
9874
9875         map = trace_eval_maps;
9876
9877         while (map) {
9878                 if (map->head.mod == mod)
9879                         break;
9880                 map = trace_eval_jmp_to_tail(map);
9881                 last = &map->tail.next;
9882                 map = map->tail.next;
9883         }
9884         if (!map)
9885                 goto out;
9886
9887         *last = trace_eval_jmp_to_tail(map)->tail.next;
9888         kfree(map);
9889  out:
9890         mutex_unlock(&trace_eval_mutex);
9891 }
9892 #else
9893 static inline void trace_module_remove_evals(struct module *mod) { }
9894 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9895
9896 static int trace_module_notify(struct notifier_block *self,
9897                                unsigned long val, void *data)
9898 {
9899         struct module *mod = data;
9900
9901         switch (val) {
9902         case MODULE_STATE_COMING:
9903                 trace_module_add_evals(mod);
9904                 break;
9905         case MODULE_STATE_GOING:
9906                 trace_module_remove_evals(mod);
9907                 break;
9908         }
9909
9910         return NOTIFY_OK;
9911 }
9912
9913 static struct notifier_block trace_module_nb = {
9914         .notifier_call = trace_module_notify,
9915         .priority = 0,
9916 };
9917 #endif /* CONFIG_MODULES */
9918
9919 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9920 {
9921
9922         event_trace_init();
9923
9924         init_tracer_tracefs(&global_trace, NULL);
9925         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9926
9927         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9928                         &global_trace, &tracing_thresh_fops);
9929
9930         trace_create_file("README", TRACE_MODE_READ, NULL,
9931                         NULL, &tracing_readme_fops);
9932
9933         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9934                         NULL, &tracing_saved_cmdlines_fops);
9935
9936         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9937                           NULL, &tracing_saved_cmdlines_size_fops);
9938
9939         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9940                         NULL, &tracing_saved_tgids_fops);
9941
9942         trace_create_eval_file(NULL);
9943
9944 #ifdef CONFIG_MODULES
9945         register_module_notifier(&trace_module_nb);
9946 #endif
9947
9948 #ifdef CONFIG_DYNAMIC_FTRACE
9949         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9950                         NULL, &tracing_dyn_info_fops);
9951 #endif
9952
9953         create_trace_instances(NULL);
9954
9955         update_tracer_options(&global_trace);
9956 }
9957
9958 static __init int tracer_init_tracefs(void)
9959 {
9960         int ret;
9961
9962         trace_access_lock_init();
9963
9964         ret = tracing_init_dentry();
9965         if (ret)
9966                 return 0;
9967
9968         if (eval_map_wq) {
9969                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9970                 queue_work(eval_map_wq, &tracerfs_init_work);
9971         } else {
9972                 tracer_init_tracefs_work_func(NULL);
9973         }
9974
9975         rv_init_interface();
9976
9977         return 0;
9978 }
9979
9980 fs_initcall(tracer_init_tracefs);
9981
9982 static int trace_die_panic_handler(struct notifier_block *self,
9983                                 unsigned long ev, void *unused);
9984
9985 static struct notifier_block trace_panic_notifier = {
9986         .notifier_call = trace_die_panic_handler,
9987         .priority = INT_MAX - 1,
9988 };
9989
9990 static struct notifier_block trace_die_notifier = {
9991         .notifier_call = trace_die_panic_handler,
9992         .priority = INT_MAX - 1,
9993 };
9994
9995 /*
9996  * The idea is to execute the following die/panic callback early, in order
9997  * to avoid showing irrelevant information in the trace (like other panic
9998  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
9999  * warnings get disabled (to prevent potential log flooding).
10000  */
10001 static int trace_die_panic_handler(struct notifier_block *self,
10002                                 unsigned long ev, void *unused)
10003 {
10004         if (!ftrace_dump_on_oops)
10005                 return NOTIFY_DONE;
10006
10007         /* The die notifier requires DIE_OOPS to trigger */
10008         if (self == &trace_die_notifier && ev != DIE_OOPS)
10009                 return NOTIFY_DONE;
10010
10011         ftrace_dump(ftrace_dump_on_oops);
10012
10013         return NOTIFY_DONE;
10014 }
10015
10016 /*
10017  * printk is set to max of 1024, we really don't need it that big.
10018  * Nothing should be printing 1000 characters anyway.
10019  */
10020 #define TRACE_MAX_PRINT         1000
10021
10022 /*
10023  * Define here KERN_TRACE so that we have one place to modify
10024  * it if we decide to change what log level the ftrace dump
10025  * should be at.
10026  */
10027 #define KERN_TRACE              KERN_EMERG
10028
10029 void
10030 trace_printk_seq(struct trace_seq *s)
10031 {
10032         /* Probably should print a warning here. */
10033         if (s->seq.len >= TRACE_MAX_PRINT)
10034                 s->seq.len = TRACE_MAX_PRINT;
10035
10036         /*
10037          * More paranoid code. Although the buffer size is set to
10038          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10039          * an extra layer of protection.
10040          */
10041         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10042                 s->seq.len = s->seq.size - 1;
10043
10044         /* should be zero ended, but we are paranoid. */
10045         s->buffer[s->seq.len] = 0;
10046
10047         printk(KERN_TRACE "%s", s->buffer);
10048
10049         trace_seq_init(s);
10050 }
10051
10052 void trace_init_global_iter(struct trace_iterator *iter)
10053 {
10054         iter->tr = &global_trace;
10055         iter->trace = iter->tr->current_trace;
10056         iter->cpu_file = RING_BUFFER_ALL_CPUS;
10057         iter->array_buffer = &global_trace.array_buffer;
10058
10059         if (iter->trace && iter->trace->open)
10060                 iter->trace->open(iter);
10061
10062         /* Annotate start of buffers if we had overruns */
10063         if (ring_buffer_overruns(iter->array_buffer->buffer))
10064                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10065
10066         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10067         if (trace_clocks[iter->tr->clock_id].in_ns)
10068                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10069
10070         /* Can not use kmalloc for iter.temp and iter.fmt */
10071         iter->temp = static_temp_buf;
10072         iter->temp_size = STATIC_TEMP_BUF_SIZE;
10073         iter->fmt = static_fmt_buf;
10074         iter->fmt_size = STATIC_FMT_BUF_SIZE;
10075 }
10076
10077 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10078 {
10079         /* use static because iter can be a bit big for the stack */
10080         static struct trace_iterator iter;
10081         static atomic_t dump_running;
10082         struct trace_array *tr = &global_trace;
10083         unsigned int old_userobj;
10084         unsigned long flags;
10085         int cnt = 0, cpu;
10086
10087         /* Only allow one dump user at a time. */
10088         if (atomic_inc_return(&dump_running) != 1) {
10089                 atomic_dec(&dump_running);
10090                 return;
10091         }
10092
10093         /*
10094          * Always turn off tracing when we dump.
10095          * We don't need to show trace output of what happens
10096          * between multiple crashes.
10097          *
10098          * If the user does a sysrq-z, then they can re-enable
10099          * tracing with echo 1 > tracing_on.
10100          */
10101         tracing_off();
10102
10103         local_irq_save(flags);
10104
10105         /* Simulate the iterator */
10106         trace_init_global_iter(&iter);
10107
10108         for_each_tracing_cpu(cpu) {
10109                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10110         }
10111
10112         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10113
10114         /* don't look at user memory in panic mode */
10115         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10116
10117         switch (oops_dump_mode) {
10118         case DUMP_ALL:
10119                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10120                 break;
10121         case DUMP_ORIG:
10122                 iter.cpu_file = raw_smp_processor_id();
10123                 break;
10124         case DUMP_NONE:
10125                 goto out_enable;
10126         default:
10127                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10128                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10129         }
10130
10131         printk(KERN_TRACE "Dumping ftrace buffer:\n");
10132
10133         /* Did function tracer already get disabled? */
10134         if (ftrace_is_dead()) {
10135                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10136                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10137         }
10138
10139         /*
10140          * We need to stop all tracing on all CPUS to read
10141          * the next buffer. This is a bit expensive, but is
10142          * not done often. We fill all what we can read,
10143          * and then release the locks again.
10144          */
10145
10146         while (!trace_empty(&iter)) {
10147
10148                 if (!cnt)
10149                         printk(KERN_TRACE "---------------------------------\n");
10150
10151                 cnt++;
10152
10153                 trace_iterator_reset(&iter);
10154                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10155
10156                 if (trace_find_next_entry_inc(&iter) != NULL) {
10157                         int ret;
10158
10159                         ret = print_trace_line(&iter);
10160                         if (ret != TRACE_TYPE_NO_CONSUME)
10161                                 trace_consume(&iter);
10162                 }
10163                 touch_nmi_watchdog();
10164
10165                 trace_printk_seq(&iter.seq);
10166         }
10167
10168         if (!cnt)
10169                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10170         else
10171                 printk(KERN_TRACE "---------------------------------\n");
10172
10173  out_enable:
10174         tr->trace_flags |= old_userobj;
10175
10176         for_each_tracing_cpu(cpu) {
10177                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10178         }
10179         atomic_dec(&dump_running);
10180         local_irq_restore(flags);
10181 }
10182 EXPORT_SYMBOL_GPL(ftrace_dump);
10183
10184 #define WRITE_BUFSIZE  4096
10185
10186 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10187                                 size_t count, loff_t *ppos,
10188                                 int (*createfn)(const char *))
10189 {
10190         char *kbuf, *buf, *tmp;
10191         int ret = 0;
10192         size_t done = 0;
10193         size_t size;
10194
10195         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10196         if (!kbuf)
10197                 return -ENOMEM;
10198
10199         while (done < count) {
10200                 size = count - done;
10201
10202                 if (size >= WRITE_BUFSIZE)
10203                         size = WRITE_BUFSIZE - 1;
10204
10205                 if (copy_from_user(kbuf, buffer + done, size)) {
10206                         ret = -EFAULT;
10207                         goto out;
10208                 }
10209                 kbuf[size] = '\0';
10210                 buf = kbuf;
10211                 do {
10212                         tmp = strchr(buf, '\n');
10213                         if (tmp) {
10214                                 *tmp = '\0';
10215                                 size = tmp - buf + 1;
10216                         } else {
10217                                 size = strlen(buf);
10218                                 if (done + size < count) {
10219                                         if (buf != kbuf)
10220                                                 break;
10221                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10222                                         pr_warn("Line length is too long: Should be less than %d\n",
10223                                                 WRITE_BUFSIZE - 2);
10224                                         ret = -EINVAL;
10225                                         goto out;
10226                                 }
10227                         }
10228                         done += size;
10229
10230                         /* Remove comments */
10231                         tmp = strchr(buf, '#');
10232
10233                         if (tmp)
10234                                 *tmp = '\0';
10235
10236                         ret = createfn(buf);
10237                         if (ret)
10238                                 goto out;
10239                         buf += size;
10240
10241                 } while (done < count);
10242         }
10243         ret = done;
10244
10245 out:
10246         kfree(kbuf);
10247
10248         return ret;
10249 }
10250
10251 #ifdef CONFIG_TRACER_MAX_TRACE
10252 __init static bool tr_needs_alloc_snapshot(const char *name)
10253 {
10254         char *test;
10255         int len = strlen(name);
10256         bool ret;
10257
10258         if (!boot_snapshot_index)
10259                 return false;
10260
10261         if (strncmp(name, boot_snapshot_info, len) == 0 &&
10262             boot_snapshot_info[len] == '\t')
10263                 return true;
10264
10265         test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10266         if (!test)
10267                 return false;
10268
10269         sprintf(test, "\t%s\t", name);
10270         ret = strstr(boot_snapshot_info, test) == NULL;
10271         kfree(test);
10272         return ret;
10273 }
10274
10275 __init static void do_allocate_snapshot(const char *name)
10276 {
10277         if (!tr_needs_alloc_snapshot(name))
10278                 return;
10279
10280         /*
10281          * When allocate_snapshot is set, the next call to
10282          * allocate_trace_buffers() (called by trace_array_get_by_name())
10283          * will allocate the snapshot buffer. That will alse clear
10284          * this flag.
10285          */
10286         allocate_snapshot = true;
10287 }
10288 #else
10289 static inline void do_allocate_snapshot(const char *name) { }
10290 #endif
10291
10292 __init static void enable_instances(void)
10293 {
10294         struct trace_array *tr;
10295         char *curr_str;
10296         char *str;
10297         char *tok;
10298
10299         /* A tab is always appended */
10300         boot_instance_info[boot_instance_index - 1] = '\0';
10301         str = boot_instance_info;
10302
10303         while ((curr_str = strsep(&str, "\t"))) {
10304
10305                 tok = strsep(&curr_str, ",");
10306
10307                 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10308                         do_allocate_snapshot(tok);
10309
10310                 tr = trace_array_get_by_name(tok);
10311                 if (!tr) {
10312                         pr_warn("Failed to create instance buffer %s\n", curr_str);
10313                         continue;
10314                 }
10315                 /* Allow user space to delete it */
10316                 trace_array_put(tr);
10317
10318                 while ((tok = strsep(&curr_str, ","))) {
10319                         early_enable_events(tr, tok, true);
10320                 }
10321         }
10322 }
10323
10324 __init static int tracer_alloc_buffers(void)
10325 {
10326         int ring_buf_size;
10327         int ret = -ENOMEM;
10328
10329
10330         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10331                 pr_warn("Tracing disabled due to lockdown\n");
10332                 return -EPERM;
10333         }
10334
10335         /*
10336          * Make sure we don't accidentally add more trace options
10337          * than we have bits for.
10338          */
10339         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10340
10341         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10342                 goto out;
10343
10344         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10345                 goto out_free_buffer_mask;
10346
10347         /* Only allocate trace_printk buffers if a trace_printk exists */
10348         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10349                 /* Must be called before global_trace.buffer is allocated */
10350                 trace_printk_init_buffers();
10351
10352         /* To save memory, keep the ring buffer size to its minimum */
10353         if (ring_buffer_expanded)
10354                 ring_buf_size = trace_buf_size;
10355         else
10356                 ring_buf_size = 1;
10357
10358         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10359         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10360
10361         raw_spin_lock_init(&global_trace.start_lock);
10362
10363         /*
10364          * The prepare callbacks allocates some memory for the ring buffer. We
10365          * don't free the buffer if the CPU goes down. If we were to free
10366          * the buffer, then the user would lose any trace that was in the
10367          * buffer. The memory will be removed once the "instance" is removed.
10368          */
10369         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10370                                       "trace/RB:prepare", trace_rb_cpu_prepare,
10371                                       NULL);
10372         if (ret < 0)
10373                 goto out_free_cpumask;
10374         /* Used for event triggers */
10375         ret = -ENOMEM;
10376         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10377         if (!temp_buffer)
10378                 goto out_rm_hp_state;
10379
10380         if (trace_create_savedcmd() < 0)
10381                 goto out_free_temp_buffer;
10382
10383         /* TODO: make the number of buffers hot pluggable with CPUS */
10384         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10385                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10386                 goto out_free_savedcmd;
10387         }
10388
10389         if (global_trace.buffer_disabled)
10390                 tracing_off();
10391
10392         if (trace_boot_clock) {
10393                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10394                 if (ret < 0)
10395                         pr_warn("Trace clock %s not defined, going back to default\n",
10396                                 trace_boot_clock);
10397         }
10398
10399         /*
10400          * register_tracer() might reference current_trace, so it
10401          * needs to be set before we register anything. This is
10402          * just a bootstrap of current_trace anyway.
10403          */
10404         global_trace.current_trace = &nop_trace;
10405
10406         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10407
10408         ftrace_init_global_array_ops(&global_trace);
10409
10410         init_trace_flags_index(&global_trace);
10411
10412         register_tracer(&nop_trace);
10413
10414         /* Function tracing may start here (via kernel command line) */
10415         init_function_trace();
10416
10417         /* All seems OK, enable tracing */
10418         tracing_disabled = 0;
10419
10420         atomic_notifier_chain_register(&panic_notifier_list,
10421                                        &trace_panic_notifier);
10422
10423         register_die_notifier(&trace_die_notifier);
10424
10425         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10426
10427         INIT_LIST_HEAD(&global_trace.systems);
10428         INIT_LIST_HEAD(&global_trace.events);
10429         INIT_LIST_HEAD(&global_trace.hist_vars);
10430         INIT_LIST_HEAD(&global_trace.err_log);
10431         list_add(&global_trace.list, &ftrace_trace_arrays);
10432
10433         apply_trace_boot_options();
10434
10435         register_snapshot_cmd();
10436
10437         test_can_verify();
10438
10439         return 0;
10440
10441 out_free_savedcmd:
10442         free_saved_cmdlines_buffer(savedcmd);
10443 out_free_temp_buffer:
10444         ring_buffer_free(temp_buffer);
10445 out_rm_hp_state:
10446         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10447 out_free_cpumask:
10448         free_cpumask_var(global_trace.tracing_cpumask);
10449 out_free_buffer_mask:
10450         free_cpumask_var(tracing_buffer_mask);
10451 out:
10452         return ret;
10453 }
10454
10455 void __init ftrace_boot_snapshot(void)
10456 {
10457 #ifdef CONFIG_TRACER_MAX_TRACE
10458         struct trace_array *tr;
10459
10460         if (!snapshot_at_boot)
10461                 return;
10462
10463         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10464                 if (!tr->allocated_snapshot)
10465                         continue;
10466
10467                 tracing_snapshot_instance(tr);
10468                 trace_array_puts(tr, "** Boot snapshot taken **\n");
10469         }
10470 #endif
10471 }
10472
10473 void __init early_trace_init(void)
10474 {
10475         if (tracepoint_printk) {
10476                 tracepoint_print_iter =
10477                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10478                 if (MEM_FAIL(!tracepoint_print_iter,
10479                              "Failed to allocate trace iterator\n"))
10480                         tracepoint_printk = 0;
10481                 else
10482                         static_key_enable(&tracepoint_printk_key.key);
10483         }
10484         tracer_alloc_buffers();
10485
10486         init_events();
10487 }
10488
10489 void __init trace_init(void)
10490 {
10491         trace_event_init();
10492
10493         if (boot_instance_index)
10494                 enable_instances();
10495 }
10496
10497 __init static void clear_boot_tracer(void)
10498 {
10499         /*
10500          * The default tracer at boot buffer is an init section.
10501          * This function is called in lateinit. If we did not
10502          * find the boot tracer, then clear it out, to prevent
10503          * later registration from accessing the buffer that is
10504          * about to be freed.
10505          */
10506         if (!default_bootup_tracer)
10507                 return;
10508
10509         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10510                default_bootup_tracer);
10511         default_bootup_tracer = NULL;
10512 }
10513
10514 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10515 __init static void tracing_set_default_clock(void)
10516 {
10517         /* sched_clock_stable() is determined in late_initcall */
10518         if (!trace_boot_clock && !sched_clock_stable()) {
10519                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10520                         pr_warn("Can not set tracing clock due to lockdown\n");
10521                         return;
10522                 }
10523
10524                 printk(KERN_WARNING
10525                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10526                        "If you want to keep using the local clock, then add:\n"
10527                        "  \"trace_clock=local\"\n"
10528                        "on the kernel command line\n");
10529                 tracing_set_clock(&global_trace, "global");
10530         }
10531 }
10532 #else
10533 static inline void tracing_set_default_clock(void) { }
10534 #endif
10535
10536 __init static int late_trace_init(void)
10537 {
10538         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10539                 static_key_disable(&tracepoint_printk_key.key);
10540                 tracepoint_printk = 0;
10541         }
10542
10543         tracing_set_default_clock();
10544         clear_boot_tracer();
10545         return 0;
10546 }
10547
10548 late_initcall_sync(late_trace_init);